Skip to content

Commit

Permalink
Tighten regex
Browse files Browse the repository at this point in the history
Add missing punctuation characters
  • Loading branch information
u01jmg3 committed Dec 29, 2018
1 parent 9077e29 commit cf45cbc
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions docx2md.php
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,10 @@ private function docx2md(array $args, $isTestMode = false)
$xml = preg_replace('#\s*<br\s*/?>#i', "\n", $xml);

// Remove leading whitespace before closing tags
$xml = preg_replace('/\s*(\<\/)/m', '$1', $xml);
$xml = preg_replace('/\s+(\<\/)/m', '\\1', $xml);

// Remove whitespace between tags
$xml = preg_replace('/(\>)\s*(\<)/m', '$1$2', $xml);
$xml = preg_replace('/(\>)\s+(\<)/m', '\\1\\2', $xml);

$intermediaryDocument->loadXML($xml);

Expand Down Expand Up @@ -376,7 +376,7 @@ private function docx2md(array $args, $isTestMode = false)
$output = preg_replace('! +!', ' ', $output);

// Remove spaces preceding punctuation
$output = preg_replace('/\s*([\.,\?\!])/', '\\1', $output);
$output = preg_replace('/\s+([\.,;:\?\!])/', '\\1', $output);

// Escape existing chars used in markdown as formatting
$output = addcslashes($output, '*_~`');
Expand Down

0 comments on commit cf45cbc

Please sign in to comment.