Skip to content

Commit

Permalink
Support pipe separators and multiple instances
Browse files Browse the repository at this point in the history
  • Loading branch information
markfullmer committed Aug 1, 2021
1 parent e5f98dd commit 895dcf8
Show file tree
Hide file tree
Showing 9 changed files with 298 additions and 5 deletions.
51 changes: 51 additions & 0 deletions index.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,56 @@
</div>
</form>
</div>
<h3>Design & behavior</h3>
<ol>
<li>Tags must be wrapped in <code>&lt;</code> and <code>&gt;</code></li>
<li>Tag names and tag values may only alphanumeric characters, spaces, underscores, and hypens.</li>
<li>Tag names must be separated from tag values by a <code>:</code></li>
<li>Spaces at the beginning at end of tag names or tag values are ignored; spaces within tag values will be preserved</li>
<li>Items with multiple values may be indicated by a pipe (|) character or semicolon (;)</li>
<li>Everything not wrapped in <code>&lt;</code> and <code>&gt;</code> will be considered "text"</li>
</ol>
<table>
<thead>
<tr>
<th>Status</th>
<th>Tag Example</th>
<th>Explanation</th>
</tr>
</thead>
<tbody>
<tr>
<td>Good</td>
<td><code>&lt;MyTag:SomeText&gt;</code></td>
<td></td>
</tr>
<tr>
<td>Good</td>
<td><code>&lt;My Tag:Some Text&gt;</code></td>
<td>Spaces in tag names &amp; values OK</td>
</tr>
<tr>
<td>Good</td>
<td><code>&lt; My Tag : Some Text &gt;</code></td>
<td>Spaces padding tag names &amp; values OK</td>
</tr>
<tr>
<td>Good</td>
<td><code>&lt; My-Tag : Some_Text &gt;</code></td>
<td>Underscores &amp; hyphens OK</td>
</tr>
<tr>
<td>Good</td>
<td><code>&lt; My-Tag : First value | Second value&gt;</code></td>
<td>Pipe or semicolon used to indicate multiple values</td>
</tr>
<tr>
<td>Bad</td>
<td><code>&lt; My/Tag : Some:Text &gt;</code></td>
<td>Other characters not OK</td>
</tr>
</tbody>
</table>
</body>
</html>';
36 changes: 31 additions & 5 deletions src/TagConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,14 @@ protected static function convert($original = '') {
$array = [];
// Only search for tags within header section, if demarcated.
$header_split = preg_split('/<End Header>/', $original);
preg_match_all("/<([a-zA-Z0-9_ -\(\)\/]*):([a-zA-Z0-9._&;, \-\(\)\/]*)>/", $original, $matches, PREG_SET_ORDER);
preg_match_all("/<([a-zA-Z0-9_ -\(\)\/]*):([a-zA-Z0-9._&;|, \-\(\)\/]*)>/", $original, $matches, PREG_SET_ORDER);
if (isset($matches[0])) {
// Store <TAGNAME: VALUE> strings.
foreach ($matches as $key => $values) {
$values[2] = trim($values[2]);
$multiple_terms = preg_grep('/;/', explode("\n", $values[2]));
if (!empty($multiple_terms)) {
$semicolon_separated = preg_grep('/;/', explode("\n", $values[2]));
$pipe_separated = preg_grep('/\|/', explode("\n", $values[2]));
if (!empty($semicolon_separated)) {
$terms = preg_split('/;/', $values[2]);
foreach ($terms as $i => &$term) {
if (empty($term)) {
Expand All @@ -100,13 +101,31 @@ protected static function convert($original = '') {
$term = (string) trim($term);
}
}
elseif (!empty($pipe_separated)) {
$terms = preg_split('/\|/', $values[2]);
foreach ($terms as $i => &$term) {
if (empty($term)) {
unset($terms[$i]);
}
$term = (string) trim($term);
}
}
else {
$terms = (string) trim($values[2]);
$terms = [trim($values[2])];
}

// Handle scenario where there are multiple instances of the same key.
$category = trim($values[1]);
if (isset($array[$category])) {
$array[$category] = array_merge($array[$category], $terms);
}
else {
$array[$category] = $terms;
}
$array[trim($values[1])] = $terms;
}
}

// Retrieve & store text (everything other than the headers).
// Remove tags and parse each line into an array element.
if (isset($header_split[1])) {
$untagged = $header_split[1];
Expand All @@ -129,6 +148,13 @@ protected static function convert($original = '') {
// Add a new array element, 'text', to the array. If nothing else, the
// $array array will now contain the 'text' element with an empty string.
$array['text'] = $clean;

// Reduce items that have a single array value into a string.
foreach ($array as $key => $value) {
if (is_array($value) && count($value) == 1) {
$array[$key] = $value[0];
}
}
return $array;
}

Expand Down
43 changes: 43 additions & 0 deletions test/StructureTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php

namespace writecrow\TagConverter;

use PHPUnit\Framework\TestCase;

/**
* Test files are converted correctly.
*/
class StructureTest extends TestCase {

/**
* Provides data.
*/
public function dataProvider() {
return [
'Traditional' => [
'filename' => 'file-with-traditional-structure.txt',
'expected' => '{"Student ID":"10410","Country":"China","Institution":"University of Arizona","Course":"ENGL 106","Mode":"Face to Face","Length":"16 weeks","Assignment":"DE","Draft":"F","Year in School":"1","Gender":"M","Course Year":"2018","Course Semester":"Spring","College":"Colleges Letters Arts Science","Program":"No Major Selected Ltr Art Sci","Proficiency Exam":"TOEFL","Exam total":"73.0","Exam reading":"17.0","Exam listening":"18.0","Exam speaking":"17.0","Exam writing":"21.0","Instructor":"1018","Section":"1039","text":"English:106-005\nUnit1 Project: Description and Explanation\nLanguage Change in my Blog Writing\nWhen the author write about Sociolinguistics, he always write about the variation about language.\n"}',
],
'Single' => [
'filename' => 'file-with-single-student.txt',
'expected' => '{"Student IDs":"10527","Group ID":"NA","Institution":"University of Arizona","Course":"ENGL 106","Mode":"Face to Face","Length":"16 weeks","Assignment":"DE","Draft":"F","Course Year":"2019","Course Semester":"Spring","Instructor":"1019","Section":"1057","Student ID":"10527","Country":"NA","L1":"NA","Heritage Spanish Speaker":"NA","Year in School":"1","Gender":"F","College":"Eller College of Management","Program":"Pre-Economics","Proficiency Exam":"TOEFL","Exam total":"85.0","Exam reading":"21.0","Exam listening":"24.0","Exam speaking":"20.0","Exam writing":"20.0","text":"2019-2-19. 10:00am\nAuthor-Spolsky. <name> <name>\nClass section-Eng106\nIn my re-write passage, I followed some rules of the blog and informal writing. Generally, this article is written for myself, so the goal of the rewriting article is to help me better understand this article.\n"}',
],
'Multiple' => [
'filename' => 'file-with-multiple-students.txt',
'expected' => '{"Student IDs":["10527","10528"],"Group ID":"NA","Institution":"University of Arizona","Course":"ENGL 106","Mode":"Face to Face","Length":"16 weeks","Assignment":"DE","Draft":"F","Course Year":"2019","Course Semester":"Spring","Instructor":"1019","Section":"1057","Student ID":["10527","10528"],"Country":["NA","CHN"],"L1":["NA","Chinese"],"Heritage Spanish Speaker":["NA","NA"],"Year in School":["1","2"],"Gender":["F","M"],"College":["Eller College of Management","School B"],"Program":["Pre-Economics","English"],"Proficiency Exam":["TOEFL","TOEFL"],"Exam total":["85.0","89.0"],"Exam reading":["21.0","22.0"],"Exam listening":["24.0","25.0"],"Exam speaking":["20.0","21.0"],"Exam writing":["20.0","21.0"],"text":"2019-2-19. 10:00am\nAuthor-Spolsky. <name> <name>\nClass section-Eng106\nIn my re-write passage, I followed some rules of the blog and informal writing. Generally, this article is written for myself, so the goal of the rewriting article is to help me better understand this article.\n"}',
],
];
}

/**
* Test assertions.
*
* @dataProvider dataProvider
*/
public function testStructure($filename, $expected) {
$input = file_get_contents('test/data/' . $filename, FILE_USE_INCLUDE_PATH);
$actual = TagConverter::json($input);
$this->assertEquals($expected, html_entity_decode($actual));
}

}
54 changes: 54 additions & 0 deletions test/data/file-with-multiple-students.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<Text>
<Student IDs: 10527|10528>
<Group ID: NA>
<Institution: University of Arizona>
<Course: ENGL 106>
<Mode: Face to Face>
<Length: 16 weeks>
<Assignment: DE>
<Draft: F>
<Course Year: 2019>
<Course Semester: Spring>
<Instructor: 1019>
<Section: 1057>
</Text>

<Student 1>
<Student ID: 10527>
<Country: NA>
<L1: NA>
<Heritage Spanish Speaker: NA>
<Year in School: 1>
<Gender: F>
<College: Eller College of Management>
<Program: Pre-Economics>
<Proficiency Exam: TOEFL>
<Exam total: 85.0>
<Exam reading: 21.0>
<Exam listening: 24.0>
<Exam speaking: 20.0>
<Exam writing: 20.0>
</Student 1>

<Student 2>
<Student ID: 10528>
<Country: CHN>
<L1: Chinese>
<Heritage Spanish Speaker: NA>
<Year in School: 2>
<Gender: M>
<College: School B>
<Program: English>
<Proficiency Exam: TOEFL>
<Exam total: 89.0>
<Exam reading: 22.0>
<Exam listening: 25.0>
<Exam speaking: 21.0>
<Exam writing: 21.0>
</Student 1>
<End Header>

2019-2-19. 10:00am
Author-Spolsky. <name> <name>
Class section-Eng106
In my re-write passage, I followed some rules of the blog and informal writing. Generally, this article is written for myself, so the goal of the rewriting article is to help me better understand this article.
37 changes: 37 additions & 0 deletions test/data/file-with-single-student.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<Text>
<Student IDs: 10527>
<Group ID: NA>
<Institution: University of Arizona>
<Course: ENGL 106>
<Mode: Face to Face>
<Length: 16 weeks>
<Assignment: DE>
<Draft: F>
<Course Year: 2019>
<Course Semester: Spring>
<Instructor: 1019>
<Section: 1057>
</Text>

<Student 1>
<Student ID: 10527>
<Country: NA>
<L1: NA>
<Heritage Spanish Speaker: NA>
<Year in School: 1>
<Gender: F>
<College: Eller College of Management>
<Program: Pre-Economics>
<Proficiency Exam: TOEFL>
<Exam total: 85.0>
<Exam reading: 21.0>
<Exam listening: 24.0>
<Exam speaking: 20.0>
<Exam writing: 20.0>
</Student 1>
<End Header>

2019-2-19. 10:00am
Author-Spolsky. <name> <name>
Class section-Eng106
In my re-write passage, I followed some rules of the blog and informal writing. Generally, this article is written for myself, so the goal of the rewriting article is to help me better understand this article.
28 changes: 28 additions & 0 deletions test/data/file-with-traditional-structure.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<Student ID: 10410>
<Country: China>
<Institution: University of Arizona>
<Course: ENGL 106>
<Mode: Face to Face>
<Length: 16 weeks>
<Assignment: DE>
<Draft: F>
<Year in School: 1>
<Gender: M>
<Course Year: 2018>
<Course Semester: Spring>
<College: Colleges Letters Arts Science>
<Program: No Major Selected Ltr Art Sci>
<Proficiency Exam: TOEFL>
<Exam total: 73.0>
<Exam reading: 17.0>
<Exam listening: 18.0>
<Exam speaking: 17.0>
<Exam writing: 21.0>
<Instructor: 1018>
<Section: 1039>
<End Header>

English:106-005
Unit1 Project: Description and Explanation
Language Change in my Blog Writing
When the author write about Sociolinguistics, he always write about the variation about language.
18 changes: 18 additions & 0 deletions test/rendered/test-multiple.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

/**
* @file
* Demonstration file of using TagConverter library.
*/

require 'vendor/autoload.php';

use writecrow\TagConverter\TagConverter;

$file = file_get_contents('test/data/file-with-multiple-students.txt', FILE_USE_INCLUDE_PATH);

$text = TagConverter::json($file);

echo '<div><pre><code>';
print_r($text);
echo '</code></pre></div>';
18 changes: 18 additions & 0 deletions test/rendered/test-new.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

/**
* @file
* Demonstration file of using TagConverter library.
*/

require 'vendor/autoload.php';

use writecrow\TagConverter\TagConverter;

$file = file_get_contents('test/data/file-with-single-student.txt', FILE_USE_INCLUDE_PATH);

$text = TagConverter::php($file);

echo '<div><pre><code>';
print_r($text);
echo '</code></pre></div>';
18 changes: 18 additions & 0 deletions test/rendered/test-original.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

/**
* @file
* Demonstration file of using TagConverter library.
*/

require 'vendor/autoload.php';

use writecrow\TagConverter\TagConverter;

$file = file_get_contents('test/data/file-with-traditional-structure.txt', FILE_USE_INCLUDE_PATH);

$text = TagConverter::php($file);

echo '<div><pre><code>';
print_r($text);
echo '</code></pre></div>';

0 comments on commit 895dcf8

Please sign in to comment.