From aadb26495f9a31b1aa31ecbf473d43d19dc0d973 Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Wed, 20 Dec 2023 22:33:17 +1100 Subject: [PATCH 1/6] Compile OMML for maths with linebreak: https://github.com/metanorma/metanorma-standoc/issues/111 --- lib/html2doc/math.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/html2doc/math.rb b/lib/html2doc/math.rb index 89f7fcd..efeedc7 100644 --- a/lib/html2doc/math.rb +++ b/lib/html2doc/math.rb @@ -165,10 +165,9 @@ def ooml_clean(xml) def mathml_to_ooml1(xml, docnamespaces) doc = Nokogiri::XML::Document::new doc.root = ooxml_cleanup(xml, docnamespaces) - # ooxml = @xsltemplate.transform(doc) - d = xml.parent["block"] != "false" # display_style - ooxml = Nokogiri::XML(Plurimath::Math.parse(doc.to_xml(indent: 0), - :mathml).to_omml) + #d = xml.parent["block"] != "false" # display_style + ooxml = Nokogiri::XML(Plurimath::Math + .parse(doc.to_xml(indent: 0), :mathml).to_omml(split_on_linebreak: true)) ooxml = unitalic(accent_tr(ooxml)) ooxml = ooml_clean(uncenter(xml, ooxml)) xml.swap(ooxml) From e94ba424539a13d4be081515f9c86953be8734aa Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Wed, 20 Dec 2023 22:37:44 +1100 Subject: [PATCH 2/6] Compile OMML for maths with linebreak: https://github.com/metanorma/metanorma-standoc/issues/111 --- spec/html2doc_spec.rb | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/spec/html2doc_spec.rb b/spec/html2doc_spec.rb index 678e1d4..4d04340 100644 --- a/spec/html2doc_spec.rb +++ b/spec/html2doc_spec.rb @@ -506,6 +506,31 @@ def image_clean(xml) OUTPUT end + it "processes linebreaks in MathML mtext" do + Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"]) + .process(html_input("
+ + x=y + +
")) + expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) + .to match_fuzzy(<<~OUTPUT) + #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} + #{word_body('
+ + x= + + +
+
+ + y + +
', '
')} + #{WORD_FTR1} + OUTPUT + end + it "unwraps and converts accent in MathML" do Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"]) .process(html_input("
From e3d822fcc037e25df112a27a122bf6c14a1c3ca0 Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Thu, 21 Dec 2023 00:51:45 +1100 Subject: [PATCH 3/6] Compile OMML for maths with linebreak: https://github.com/metanorma/metanorma-standoc/issues/111 --- lib/html2doc/math.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/html2doc/math.rb b/lib/html2doc/math.rb index efeedc7..a629896 100644 --- a/lib/html2doc/math.rb +++ b/lib/html2doc/math.rb @@ -229,8 +229,8 @@ def uncenter_unneeded(math, ooxml, alignnode) (math_block?(ooxml, math) || !alignnode) and return ooxml if !math_only_para?(alignnode) ooxml.name == "oMathPara" and - ooxml = ooxml.elements.detect { |x| x.name == "oMath" } - return ooxml + ooxml = ooxml.elements.select { |x| x.name == "oMath" } + return Nokogiri::XML::NodeSet.new(math.document, ooxml) end nil end From 194ca19b4ed1d5dcd4e2bbb7c27992d43d0c430e Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Thu, 21 Dec 2023 00:54:52 +1100 Subject: [PATCH 4/6] Compile OMML for maths with linebreak: https://github.com/metanorma/metanorma-standoc/issues/111 --- lib/html2doc/math.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/html2doc/math.rb b/lib/html2doc/math.rb index a629896..412554b 100644 --- a/lib/html2doc/math.rb +++ b/lib/html2doc/math.rb @@ -229,7 +229,7 @@ def uncenter_unneeded(math, ooxml, alignnode) (math_block?(ooxml, math) || !alignnode) and return ooxml if !math_only_para?(alignnode) ooxml.name == "oMathPara" and - ooxml = ooxml.elements.select { |x| x.name == "oMath" } + ooxml = ooxml.elements.select { |x| %w(oMath r).include?(x.name) } return Nokogiri::XML::NodeSet.new(math.document, ooxml) end nil From 4ef7b5d4f7ac387737a47d8af85a8211477b49d3 Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Thu, 21 Dec 2023 01:25:42 +1100 Subject: [PATCH 5/6] Compile OMML for maths with linebreak: https://github.com/metanorma/metanorma-standoc/issues/111 --- lib/html2doc/math.rb | 47 +++++++++++++++++++++++++++---------------- spec/html2doc_spec.rb | 14 ++++++++++--- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/lib/html2doc/math.rb b/lib/html2doc/math.rb index 412554b..9f5ce08 100644 --- a/lib/html2doc/math.rb +++ b/lib/html2doc/math.rb @@ -38,13 +38,13 @@ def unwrap_accents(doc) # random fixes to MathML input that OOXML needs to render properly def ooxml_cleanup(math, docnamespaces) - #encode_math( - unwrap_accents( - mathml_preserve_space( - mathml_insert_rows(math, docnamespaces), docnamespaces - ), - ) - #) + # encode_math( + unwrap_accents( + mathml_preserve_space( + mathml_insert_rows(math, docnamespaces), docnamespaces + ), + ) + # ) math.add_namespace(nil, MATHML_NS) math end @@ -165,7 +165,7 @@ def ooml_clean(xml) def mathml_to_ooml1(xml, docnamespaces) doc = Nokogiri::XML::Document::new doc.root = ooxml_cleanup(xml, docnamespaces) - #d = xml.parent["block"] != "false" # display_style + # d = xml.parent["block"] != "false" # display_style ooxml = Nokogiri::XML(Plurimath::Math .parse(doc.to_xml(indent: 0), :mathml).to_omml(split_on_linebreak: true)) ooxml = unitalic(accent_tr(ooxml)) @@ -201,9 +201,10 @@ def math_only_para?(node) x.text.strip.empty? end - def math_block?(_ooxml, mathml) + def math_block?(ooxml, mathml) # ooxml.name == "oMathPara" || mathml["displaystyle"] == "true" - mathml["displaystyle"] == "true" + mathml["displaystyle"] == "true" && + ooxml.xpath("./m:oMath", "m" => OOXML_NS).size <= 1 end STYLE_BEARING_NODE = @@ -227,11 +228,23 @@ def uncenter(math, ooxml) def uncenter_unneeded(math, ooxml, alignnode) (math_block?(ooxml, math) || !alignnode) and return ooxml - if !math_only_para?(alignnode) - ooxml.name == "oMathPara" and - ooxml = ooxml.elements.select { |x| %w(oMath r).include?(x.name) } - return Nokogiri::XML::NodeSet.new(math.document, ooxml) - end - nil - end + math_only_para?(alignnode) and return nil + ooxml.name == "oMathPara" and + ooxml = ooxml.elements.select { |x| %w(oMath r).include?(x.name) } + ooxml.size > 1 ? nil : Nokogiri::XML::NodeSet.new(math.document, ooxml) + end + + # first = true + # ooxml.reverse.map do |e| + # if e.name == "oMath" && first + # first = false + # e + # elsif e.name == "oMath" + # e.wrap("").previous = "" + # e.parent + # else + # e + # end + # e.name == "oMath" and first = false + # end.reverse end diff --git a/spec/html2doc_spec.rb b/spec/html2doc_spec.rb index 4d04340..9fd8802 100644 --- a/spec/html2doc_spec.rb +++ b/spec/html2doc_spec.rb @@ -279,6 +279,7 @@ def image_clean(xml) end RSpec.describe Html2Doc do +=begin it "has a version number" do expect(Html2Doc::VERSION).not_to be nil end @@ -505,26 +506,33 @@ def image_clean(xml) #{WORD_FTR1} OUTPUT end - +=end it "processes linebreaks in MathML mtext" do Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"]) .process(html_input("
x=y + =z
")) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
- + x=
- y + y= + + +
+
+ + z
', '
')} #{WORD_FTR1} From 993cd076b8df21acb53b5bfc5e1ce64bc4b3cbd9 Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Thu, 21 Dec 2023 01:50:43 +1100 Subject: [PATCH 6/6] version bump --- lib/html2doc/version.rb | 2 +- spec/html2doc_spec.rb | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/html2doc/version.rb b/lib/html2doc/version.rb index 6208bd9..139b74f 100644 --- a/lib/html2doc/version.rb +++ b/lib/html2doc/version.rb @@ -1,3 +1,3 @@ class Html2Doc - VERSION = "1.7.1".freeze + VERSION = "1.7.2".freeze end diff --git a/spec/html2doc_spec.rb b/spec/html2doc_spec.rb index 9fd8802..ed32852 100644 --- a/spec/html2doc_spec.rb +++ b/spec/html2doc_spec.rb @@ -279,7 +279,6 @@ def image_clean(xml) end RSpec.describe Html2Doc do -=begin it "has a version number" do expect(Html2Doc::VERSION).not_to be nil end @@ -506,7 +505,7 @@ def image_clean(xml) #{WORD_FTR1} OUTPUT end -=end + it "processes linebreaks in MathML mtext" do Html2Doc.new(filename: "test", asciimathdelims: ["{{", "}}"]) .process(html_input("