Skip to content

Commit

Permalink
[Feature:Plagiarism] Add flag to ignore C++ comments (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
williamjallen authored Nov 23, 2021
1 parent fd22c02 commit 7ba0d8d
Show file tree
Hide file tree
Showing 7 changed files with 560 additions and 68 deletions.
3 changes: 2 additions & 1 deletion bin/tokenize_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file):

tokenizer = f"./{language_token_data['tokenizer']}"

if not language_token_data.get("input_as_argument"):
if language_token_data.get('input_as_argument') is not None and \
language_token_data['input_as_argument'] is not False:
my_concatenated_file = f'< {my_concatenated_file}'

if "command_args" in language_token_data:
Expand Down
102 changes: 69 additions & 33 deletions tests/data/tokenizer/c/expected_output/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@
"type": "PUNCTUATION-;",
"value": ";"
},
{
"char": 21,
"line": 6,
"type": "COMMENT",
"value": "// define a variable"
},
{
"char": 5,
"line": 7,
Expand Down Expand Up @@ -149,6 +155,12 @@
"type": "PUNCTUATION-;",
"value": ";"
},
{
"char": 39,
"line": 7,
"type": "COMMENT",
"value": "// define a variable and set it equal to 1"
},
{
"char": 5,
"line": 9,
Expand All @@ -173,6 +185,12 @@
"type": "PUNCTUATION-;",
"value": ";"
},
{
"char": 43,
"line": 9,
"type": "COMMENT",
"value": "// print something"
},
{
"char": 5,
"line": 10,
Expand Down Expand Up @@ -200,204 +218,222 @@
{
"char": 5,
"line": 12,
"type": "COMMENT",
"value": "// loop from 1 to n and multiply the previous result by i"
},
{
"char": 5,
"line": 13,
"type": "KEYWORD",
"value": "for"
},
{
"char": 8,
"line": 12,
"line": 13,
"type": "PUNCTUATION-(",
"value": "("
},
{
"char": 9,
"line": 12,
"line": 13,
"type": "KEYWORD",
"value": "int"
},
{
"char": 13,
"line": 12,
"line": 13,
"type": "IDENTIFIER",
"value": "i"
},
{
"char": 15,
"line": 12,
"line": 13,
"type": "PUNCTUATION-=",
"value": "="
},
{
"char": 17,
"line": 12,
"line": 13,
"type": "LITERAL",
"value": "1"
},
{
"char": 18,
"line": 12,
"line": 13,
"type": "PUNCTUATION-;",
"value": ";"
},
{
"char": 20,
"line": 12,
"line": 13,
"type": "IDENTIFIER",
"value": "i"
},
{
"char": 22,
"line": 12,
"line": 13,
"type": "PUNCTUATION-<=",
"value": "<="
},
{
"char": 24,
"line": 12,
"line": 13,
"type": "IDENTIFIER",
"value": "n"
},
{
"char": 25,
"line": 12,
"line": 13,
"type": "PUNCTUATION-;",
"value": ";"
},
{
"char": 27,
"line": 12,
"line": 13,
"type": "PUNCTUATION-++",
"value": "++"
},
{
"char": 29,
"line": 12,
"line": 13,
"type": "IDENTIFIER",
"value": "i"
},
{
"char": 30,
"line": 12,
"line": 13,
"type": "PUNCTUATION-)",
"value": ")"
},
{
"char": 5,
"line": 13,
"line": 14,
"type": "PUNCTUATION-{",
"value": "{"
},
{
"char": 9,
"line": 14,
"line": 15,
"type": "IDENTIFIER",
"value": "factorial"
},
{
"char": 19,
"line": 14,
"line": 15,
"type": "PUNCTUATION-*=",
"value": "*="
},
{
"char": 22,
"line": 14,
"line": 15,
"type": "IDENTIFIER",
"value": "i"
},
{
"char": 23,
"line": 14,
"line": 15,
"type": "PUNCTUATION-;",
"value": ";"
},
{
"char": 9,
"line": 16,
"type": "COMMENT",
"value": "/*\n factorial += i; // this doesn't work\n factorial -= i; // this doesn't work either\n */"
},
{
"char": 5,
"line": 15,
"line": 20,
"type": "PUNCTUATION-}",
"value": "}"
},
{
"char": 5,
"line": 17,
"line": 22,
"type": "IDENTIFIER",
"value": "cout"
},
{
"char": 10,
"line": 17,
"line": 22,
"type": "PUNCTUATION-<<",
"value": "<<"
},
{
"char": 13,
"line": 17,
"line": 22,
"type": "LITERAL",
"value": "\"Factorial of \""
},
{
"char": 29,
"line": 17,
"line": 22,
"type": "PUNCTUATION-<<",
"value": "<<"
},
{
"char": 32,
"line": 17,
"line": 22,
"type": "IDENTIFIER",
"value": "n"
},
{
"char": 34,
"line": 17,
"line": 22,
"type": "PUNCTUATION-<<",
"value": "<<"
},
{
"char": 37,
"line": 17,
"line": 22,
"type": "LITERAL",
"value": "\" = \""
},
{
"char": 43,
"line": 17,
"line": 22,
"type": "PUNCTUATION-<<",
"value": "<<"
},
{
"char": 46,
"line": 17,
"line": 22,
"type": "IDENTIFIER",
"value": "factorial"
},
{
"char": 55,
"line": 17,
"line": 22,
"type": "PUNCTUATION-;",
"value": ";"
},
{
"char": 57,
"line": 22,
"type": "COMMENT",
"value": "// print the result"
},
{
"char": 5,
"line": 18,
"line": 23,
"type": "KEYWORD",
"value": "return"
},
{
"char": 12,
"line": 18,
"line": 23,
"type": "LITERAL",
"value": "0"
},
{
"char": 13,
"line": 18,
"line": 23,
"type": "PUNCTUATION-;",
"value": ";"
},
{
"char": 1,
"line": 19,
"line": 24,
"type": "PUNCTUATION-}",
"value": "}"
}
Expand Down
Loading

0 comments on commit 7ba0d8d

Please sign in to comment.