Skip to content

Commit

Permalink
Add support for annotation authors
Browse files Browse the repository at this point in the history
  • Loading branch information
semvis123 committed Aug 8, 2023
1 parent d78e82a commit 3a761d7
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 3 deletions.
8 changes: 8 additions & 0 deletions catdoc.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ func GetCommentsFromFile(file io.ReadSeeker) (string, error) {
return callWASMFuncWithFile("get_comments", file)
}

func GetAnnotationAuthorsFromFile(file io.ReadSeeker) ([]string, error) {
r, err := callWASMFuncWithFile("get_annotation_authors", file)
if err != nil {
return nil, err
}
return strings.Split(r, "\n"), nil
}

func GetVersion() (string, error) {
return callWASMFunc("get_version", nil)
}
Expand Down
Binary file modified catdoc.wasm
Binary file not shown.
2 changes: 1 addition & 1 deletion catdoc/src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ catdoc-wasm: emsdk.uptodate
-I. -O3 -DHAVE_CONFIG_H \
-DCATDOC_VERSION=\"0.95\" \
-DCHARSETPATH=\"charsets\" \
-sEXPORTED_FUNCTIONS=_get_text,_get_author,_get_last_author,_get_version,_get_title,_get_subject,_get_keywords,_get_comments\
-sEXPORTED_FUNCTIONS=_get_text,_get_author,_get_last_author,_get_version,_get_title,_get_subject,_get_keywords,_get_comments,_get_annotation_authors\
-sSTANDALONE_WASM -sWARN_ON_UNDEFINED_SYMBOLS=0 \
--no-entry -sFILESYSTEM=1 -sALLOW_MEMORY_GROWTH -sMAXIMUM_MEMORY=1GB

Expand Down
65 changes: 65 additions & 0 deletions catdoc/src/analyze.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ extern char ole_sign[], zip_sign[]; /* from ole.c */
char rtf_sign[] = "{\\rtf";
char old_word_sign[] = {0xdb, 0xa5, 0};
char write_sign[] = {0x31, 0xBE, 0};
unsigned char sttbfRMark[] = {0x00, 0x00, 0x07, 0x00, 0x55, 0x00,
0x6E, 0x00, 0x6B, 0x00, 0x6E, 0x00,
0x6F, 0x00, 0x77, 0x00, 0x6E, 0x00};
int verbose = 0;

unsigned char *read_metadata(unsigned char *buffer, metadata metadata_type) {
Expand All @@ -38,13 +41,75 @@ unsigned char *read_metadata(unsigned char *buffer, metadata metadata_type) {
return NULL;
}

int find_offset(FILE *f, unsigned char *mark, int mark_length) {
int offset = 0;
int block_size = 1024 * 25;
int pos = 0;
int bpos = 0;
unsigned char buf[block_size];
int block_n = 0;

while (!feof(f)) {
long n = fread(buf, 1, block_size, f);
while (bpos != n) {
if (buf[bpos] == mark[pos]) {
pos++;
bpos++;
if (pos == mark_length) {
offset = block_n * block_size + bpos - pos;
return offset;
}
} else {
bpos -= pos;
bpos++;
pos = 0;
}
}
bpos = 0;
block_n++;
}
fprintf(stderr, "stttbfRMark offset is not found");
exit(1);
}

void read_annotation_authors(FILE *f) {
int offset = find_offset(f, sttbfRMark, 18) - 4;
fseek(f, offset, SEEK_SET);

int block_size = 1024;
unsigned char buf[2];
fread(buf, 1, 2, f);
if (buf[0] != 0xff || buf[1] != 0xff) {
fprintf(stderr, "stttbfRMark offset is invalid");
exit(1);
}
fread(buf, 1, 2, f);
unsigned int count = getshort(buf, 0) - 1;
fseek(f, 18, SEEK_CUR);
for (int i = 0; i < count; i++) {
fread(buf, 1, 2, f);
unsigned int str_len = getshort(buf, 0);
unsigned short *str = calloc(str_len, 2);
fread(str, 2, str_len, f);
for (int j = 0; j < str_len; j++) {
printf("%lc", str[j]);
}
printf("\n");
free(str);
}
}

/*********************************************************************
* Determines format of input file and calls parse_word_header or
* process_file if
* it is word processor file or copy_out if it is plain text file
* return not 0 when error
********************************************************************/
int analyze_format(FILE *f, metadata metadata_type) {
if (metadata_type == annotation_authors) {
read_annotation_authors(f);
return 0;
}
unsigned char buffer[129];
long offset = 0;
FILE *new_file, *ole_file;
Expand Down
10 changes: 9 additions & 1 deletion catdoc/src/catdoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ int main(int argc, char **argv) {
get_locale_charset();
#endif
metadata metadata_type = none;
while ((c = getopt(argc, argv, "Vls:d:f:taubxv8wALTSKCm:")) != -1) {
while ((c = getopt(argc, argv, "Vls:d:f:taubxv8wALTSKCUm:")) != -1) {
switch (c) {
case 's':
check_charset(&source_csname, optarg);
Expand Down Expand Up @@ -104,6 +104,9 @@ int main(int argc, char **argv) {
case 'C':
metadata_type = comments;
break;
case 'U':
metadata_type = annotation_authors;
break;
case 'm': {
char *endptr;
wrap_margin = (int)strtol(optarg, &endptr, 0);
Expand Down Expand Up @@ -246,3 +249,8 @@ void get_comments() {
char *args[] = {"", "-C", "/input_file/file.doc"};
main(3, args);
}

void get_annotation_authors() {
char *args[] = {"", "-U", "/input_file/file.doc"};
main(3, args);
}
4 changes: 3 additions & 1 deletion catdoc/src/catdoc.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ void get_title();
void get_subject();
void get_keywords();
void get_comments();
void get_annotation_authors();

char *find_file(char *name, const char *path);
char *stradd(const char *s1, const char *s2);
Expand Down Expand Up @@ -255,7 +256,8 @@ typedef enum {
title,
subject,
keywords,
comments
comments,
annotation_authors
} metadata;
int analyze_format(FILE *f, metadata metadata_type);
void list_charsets(void);
Expand Down
20 changes: 20 additions & 0 deletions catdoc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ func testFileFunc(title, expected string, fun func(io.ReadSeeker) (string, error
}
}

func testFileFuncArr(title string, expected []string, fun func(io.ReadSeeker) ([]string, error), t *testing.T) {
f, err := os.Open("test.doc")
if err != nil {
t.Fatalf("could not open test document, %v", err)
}
arr, err := fun(f)
if err != nil {
t.Fatalf("expected error to be nil, got %v", err)
}
for i := range expected {
if arr[i] != expected[i] {
t.Fatalf("expected %s to be \"%v\", got %v", title, expected, arr)
}
}
}

func TestGetTextFromFile(t *testing.T) {
testFileFunc("text", "text-inside-doc", gocatdoc.GetTextFromFile, t)
}
Expand Down Expand Up @@ -58,3 +74,7 @@ func TestGetKeywordsFromFile(t *testing.T) {
func TestGetCommentsFromFile(t *testing.T) {
testFileFunc("comments", "Comments", gocatdoc.GetCommentsFromFile, t)
}

func TestGetAnnotationAuthorsFromFile(t *testing.T) {
testFileFuncArr("annoation_authors", []string{"H. Potter"}, gocatdoc.GetAnnotationAuthorsFromFile, t)
}
Binary file modified test.doc
Binary file not shown.

0 comments on commit 3a761d7

Please sign in to comment.