From 895dcf8a094ba35441377ce00fc27a8b1d01eaa5 Mon Sep 17 00:00:00 2001 From: Mark Fullmer Date: Sun, 1 Aug 2021 11:18:42 -0700 Subject: [PATCH] Support pipe separators and multiple instances --- index.php | 51 ++++++++++++++++++ src/TagConverter.php | 36 +++++++++++-- test/StructureTest.php | 43 +++++++++++++++ test/data/file-with-multiple-students.txt | 54 +++++++++++++++++++ test/data/file-with-single-student.txt | 37 +++++++++++++ test/data/file-with-traditional-structure.txt | 28 ++++++++++ test/rendered/test-multiple.php | 18 +++++++ test/rendered/test-new.php | 18 +++++++ test/rendered/test-original.php | 18 +++++++ 9 files changed, 298 insertions(+), 5 deletions(-) create mode 100644 test/StructureTest.php create mode 100644 test/data/file-with-multiple-students.txt create mode 100644 test/data/file-with-single-student.txt create mode 100644 test/data/file-with-traditional-structure.txt create mode 100644 test/rendered/test-multiple.php create mode 100644 test/rendered/test-new.php create mode 100644 test/rendered/test-original.php diff --git a/index.php b/index.php index ffe0201..544fc63 100644 --- a/index.php +++ b/index.php @@ -61,5 +61,56 @@ +

Design & behavior

+
    +
  1. Tags must be wrapped in < and >
  2. +
  3. Tag names and tag values may only alphanumeric characters, spaces, underscores, and hypens.
  4. +
  5. Tag names must be separated from tag values by a :
  6. +
  7. Spaces at the beginning at end of tag names or tag values are ignored; spaces within tag values will be preserved
  8. +
  9. Items with multiple values may be indicated by a pipe (|) character or semicolon (;)
  10. +
  11. Everything not wrapped in < and > will be considered "text"
  12. +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
StatusTag ExampleExplanation
Good<MyTag:SomeText>
Good<My Tag:Some Text>Spaces in tag names & values OK
Good< My Tag : Some Text >Spaces padding tag names & values OK
Good< My-Tag : Some_Text >Underscores & hyphens OK
Good< My-Tag : First value | Second value>Pipe or semicolon used to indicate multiple values
Bad< My/Tag : Some:Text >Other characters not OK
'; diff --git a/src/TagConverter.php b/src/TagConverter.php index 3a2d991..0375410 100644 --- a/src/TagConverter.php +++ b/src/TagConverter.php @@ -85,13 +85,14 @@ protected static function convert($original = '') { $array = []; // Only search for tags within header section, if demarcated. $header_split = preg_split('//', $original); - preg_match_all("/<([a-zA-Z0-9_ -\(\)\/]*):([a-zA-Z0-9._&;, \-\(\)\/]*)>/", $original, $matches, PREG_SET_ORDER); + preg_match_all("/<([a-zA-Z0-9_ -\(\)\/]*):([a-zA-Z0-9._&;|, \-\(\)\/]*)>/", $original, $matches, PREG_SET_ORDER); if (isset($matches[0])) { // Store strings. foreach ($matches as $key => $values) { $values[2] = trim($values[2]); - $multiple_terms = preg_grep('/;/', explode("\n", $values[2])); - if (!empty($multiple_terms)) { + $semicolon_separated = preg_grep('/;/', explode("\n", $values[2])); + $pipe_separated = preg_grep('/\|/', explode("\n", $values[2])); + if (!empty($semicolon_separated)) { $terms = preg_split('/;/', $values[2]); foreach ($terms as $i => &$term) { if (empty($term)) { @@ -100,13 +101,31 @@ protected static function convert($original = '') { $term = (string) trim($term); } } + elseif (!empty($pipe_separated)) { + $terms = preg_split('/\|/', $values[2]); + foreach ($terms as $i => &$term) { + if (empty($term)) { + unset($terms[$i]); + } + $term = (string) trim($term); + } + } else { - $terms = (string) trim($values[2]); + $terms = [trim($values[2])]; + } + + // Handle scenario where there are multiple instances of the same key. + $category = trim($values[1]); + if (isset($array[$category])) { + $array[$category] = array_merge($array[$category], $terms); + } + else { + $array[$category] = $terms; } - $array[trim($values[1])] = $terms; } } + // Retrieve & store text (everything other than the headers). // Remove tags and parse each line into an array element. if (isset($header_split[1])) { $untagged = $header_split[1]; @@ -129,6 +148,13 @@ protected static function convert($original = '') { // Add a new array element, 'text', to the array. If nothing else, the // $array array will now contain the 'text' element with an empty string. $array['text'] = $clean; + + // Reduce items that have a single array value into a string. + foreach ($array as $key => $value) { + if (is_array($value) && count($value) == 1) { + $array[$key] = $value[0]; + } + } return $array; } diff --git a/test/StructureTest.php b/test/StructureTest.php new file mode 100644 index 0000000..06780d8 --- /dev/null +++ b/test/StructureTest.php @@ -0,0 +1,43 @@ + [ + 'filename' => 'file-with-traditional-structure.txt', + 'expected' => '{"Student ID":"10410","Country":"China","Institution":"University of Arizona","Course":"ENGL 106","Mode":"Face to Face","Length":"16 weeks","Assignment":"DE","Draft":"F","Year in School":"1","Gender":"M","Course Year":"2018","Course Semester":"Spring","College":"Colleges Letters Arts Science","Program":"No Major Selected Ltr Art Sci","Proficiency Exam":"TOEFL","Exam total":"73.0","Exam reading":"17.0","Exam listening":"18.0","Exam speaking":"17.0","Exam writing":"21.0","Instructor":"1018","Section":"1039","text":"English:106-005\nUnit1 Project: Description and Explanation\nLanguage Change in my Blog Writing\nWhen the author write about Sociolinguistics, he always write about the variation about language.\n"}', + ], + 'Single' => [ + 'filename' => 'file-with-single-student.txt', + 'expected' => '{"Student IDs":"10527","Group ID":"NA","Institution":"University of Arizona","Course":"ENGL 106","Mode":"Face to Face","Length":"16 weeks","Assignment":"DE","Draft":"F","Course Year":"2019","Course Semester":"Spring","Instructor":"1019","Section":"1057","Student ID":"10527","Country":"NA","L1":"NA","Heritage Spanish Speaker":"NA","Year in School":"1","Gender":"F","College":"Eller College of Management","Program":"Pre-Economics","Proficiency Exam":"TOEFL","Exam total":"85.0","Exam reading":"21.0","Exam listening":"24.0","Exam speaking":"20.0","Exam writing":"20.0","text":"2019-2-19. 10:00am\nAuthor-Spolsky. \nClass section-Eng106\nIn my re-write passage, I followed some rules of the blog and informal writing. Generally, this article is written for myself, so the goal of the rewriting article is to help me better understand this article.\n"}', + ], + 'Multiple' => [ + 'filename' => 'file-with-multiple-students.txt', + 'expected' => '{"Student IDs":["10527","10528"],"Group ID":"NA","Institution":"University of Arizona","Course":"ENGL 106","Mode":"Face to Face","Length":"16 weeks","Assignment":"DE","Draft":"F","Course Year":"2019","Course Semester":"Spring","Instructor":"1019","Section":"1057","Student ID":["10527","10528"],"Country":["NA","CHN"],"L1":["NA","Chinese"],"Heritage Spanish Speaker":["NA","NA"],"Year in School":["1","2"],"Gender":["F","M"],"College":["Eller College of Management","School B"],"Program":["Pre-Economics","English"],"Proficiency Exam":["TOEFL","TOEFL"],"Exam total":["85.0","89.0"],"Exam reading":["21.0","22.0"],"Exam listening":["24.0","25.0"],"Exam speaking":["20.0","21.0"],"Exam writing":["20.0","21.0"],"text":"2019-2-19. 10:00am\nAuthor-Spolsky. \nClass section-Eng106\nIn my re-write passage, I followed some rules of the blog and informal writing. Generally, this article is written for myself, so the goal of the rewriting article is to help me better understand this article.\n"}', + ], + ]; + } + + /** + * Test assertions. + * + * @dataProvider dataProvider + */ + public function testStructure($filename, $expected) { + $input = file_get_contents('test/data/' . $filename, FILE_USE_INCLUDE_PATH); + $actual = TagConverter::json($input); + $this->assertEquals($expected, html_entity_decode($actual)); + } + +} diff --git a/test/data/file-with-multiple-students.txt b/test/data/file-with-multiple-students.txt new file mode 100644 index 0000000..f3efe9f --- /dev/null +++ b/test/data/file-with-multiple-students.txt @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +2019-2-19. 10:00am +Author-Spolsky. +Class section-Eng106 +In my re-write passage, I followed some rules of the blog and informal writing. Generally, this article is written for myself, so the goal of the rewriting article is to help me better understand this article. diff --git a/test/data/file-with-single-student.txt b/test/data/file-with-single-student.txt new file mode 100644 index 0000000..f843691 --- /dev/null +++ b/test/data/file-with-single-student.txt @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +2019-2-19. 10:00am +Author-Spolsky. +Class section-Eng106 +In my re-write passage, I followed some rules of the blog and informal writing. Generally, this article is written for myself, so the goal of the rewriting article is to help me better understand this article. diff --git a/test/data/file-with-traditional-structure.txt b/test/data/file-with-traditional-structure.txt new file mode 100644 index 0000000..09c5ab2 --- /dev/null +++ b/test/data/file-with-traditional-structure.txt @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + +English:106-005 +Unit1 Project: Description and Explanation +Language Change in my Blog Writing +When the author write about Sociolinguistics, he always write about the variation about language. diff --git a/test/rendered/test-multiple.php b/test/rendered/test-multiple.php new file mode 100644 index 0000000..62a5d05 --- /dev/null +++ b/test/rendered/test-multiple.php @@ -0,0 +1,18 @@ +
';
+  print_r($text);
+  echo '
'; diff --git a/test/rendered/test-new.php b/test/rendered/test-new.php new file mode 100644 index 0000000..48e2a1d --- /dev/null +++ b/test/rendered/test-new.php @@ -0,0 +1,18 @@ +
';
+  print_r($text);
+  echo '
'; diff --git a/test/rendered/test-original.php b/test/rendered/test-original.php new file mode 100644 index 0000000..138105f --- /dev/null +++ b/test/rendered/test-original.php @@ -0,0 +1,18 @@ +
';
+  print_r($text);
+  echo '
';