diff --git a/src/odr/internal/html/document.cpp b/src/odr/internal/html/document.cpp index cfa14e9d..ec20726e 100644 --- a/src/odr/internal/html/document.cpp +++ b/src/odr/internal/html/document.cpp @@ -80,7 +80,7 @@ void front(const Document &document, HtmlWriter &out, const HtmlConfig &config, out.write_body_begin(HtmlElementOptions().set_class(body_clazz)); } -void back(const Document &document, internal::html::HtmlWriter &out, +void back(const Document &document, html::HtmlWriter &out, const HtmlConfig &config, const HtmlResourceLocator &resourceLocator) { (void)document; @@ -104,8 +104,8 @@ void back(const Document &document, internal::html::HtmlWriter &out, std::string fill_path_variables(const std::string &path, std::optional index = {}) { std::string result = path; - internal::util::string::replace_all(result, "{index}", - index ? std::to_string(*index) : ""); + util::string::replace_all(result, "{index}", + index ? std::to_string(*index) : ""); return result; } @@ -219,7 +219,7 @@ class SlideHtmlFragment final : public HtmlFragmentBase { void write_html_fragment(HtmlWriter &out, const HtmlConfig &config, const HtmlResourceLocator &resourceLocator) const final { - internal::html::translate_slide(m_slide, out, config, resourceLocator); + html::translate_slide(m_slide, out, config, resourceLocator); } private: @@ -253,7 +253,7 @@ class PageHtmlFragment final : public HtmlFragmentBase { void write_html_fragment(HtmlWriter &out, const HtmlConfig &config, const HtmlResourceLocator &resourceLocator) const final { - internal::html::translate_page(m_page, out, config, resourceLocator); + html::translate_page(m_page, out, config, resourceLocator); } private: @@ -304,12 +304,11 @@ Html html::translate_document(const odr::Document &document, std::uint32_t i = 0; for (const auto &fragment : service.fragments()) { std::string filled_path = get_output_path(document, i, output_path, config); - std::ofstream ostream(filled_path); + std::ofstream ostream(filled_path, std::ios::out); if (!ostream.is_open()) { throw FileWriteError(); } - internal::html::HtmlWriter out(ostream, config.format_html, - config.html_indent); + html::HtmlWriter out(ostream, config.format_html, config.html_indent); fragment.write_html_document(out.out(), config, resourceLocator); diff --git a/src/odr/internal/html/html_writer.cpp b/src/odr/internal/html/html_writer.cpp index 5fc3ee92..540bb1fd 100644 --- a/src/odr/internal/html/html_writer.cpp +++ b/src/odr/internal/html/html_writer.cpp @@ -121,123 +121,123 @@ HtmlElementOptions::set_extra(std::optional _extra) { HtmlWriter::HtmlWriter(std::ostream &out, bool format, std::uint8_t indent, std::uint32_t current_indent) - : m_out{out}, m_format{format}, m_indent(indent, ' '), + : m_out{&out}, m_format{format}, m_indent(indent, ' '), m_current_indent{current_indent} {} HtmlWriter::HtmlWriter(std::ostream &out, const HtmlConfig &config) : HtmlWriter{out, config.format_html, config.html_indent} {} void HtmlWriter::write_begin() { - m_out << "\n"; - m_out << ""; + out() << "\n"; + out() << ""; } void HtmlWriter::write_end() { write_new_line(); - m_out << ""; + out() << ""; } void HtmlWriter::write_header_begin() { write_new_line(); ++m_current_indent; - m_out << ""; + out() << ""; } void HtmlWriter::write_header_end() { --m_current_indent; write_new_line(); - m_out << ""; + out() << ""; } void HtmlWriter::write_header_title(const std::string &title) { write_new_line(); - m_out << "" << title << ""; + out() << "" << title << ""; } void HtmlWriter::write_header_viewport(const std::string &viewport) { write_new_line(); - m_out << R"("; + out() << R"("; } void HtmlWriter::write_header_target(const std::string &target) { write_new_line(); - m_out << ""; + out() << ""; } void HtmlWriter::write_header_charset(const std::string &charset) { write_new_line(); - m_out << ""; + out() << ""; } void HtmlWriter::write_header_style(const std::string &href) { write_new_line(); - m_out << R"("; + out() << R"("; } void HtmlWriter::write_header_style_begin() { write_new_line(); ++m_current_indent; - m_out << ""; + out() << ""; } void HtmlWriter::write_script(const std::string &src) { write_new_line(); - m_out << R"("; + out() << R"("; } void HtmlWriter::write_script_begin() { write_new_line(); ++m_current_indent; - m_out << ""; + out() << ""; } void HtmlWriter::write_body_begin(const HtmlElementOptions &options) { write_new_line(); ++m_current_indent; - m_out << ""; + out() << ""; } void HtmlWriter::write_body_end() { --m_current_indent; write_new_line(); - m_out << ""; + out() << ""; } void HtmlWriter::write_element_begin(const std::string &name, @@ -248,12 +248,12 @@ void HtmlWriter::write_element_begin(const std::string &name, m_stack.push_back({name, options.inline_element}); } - m_out << "<" << name; - write_element_options(m_out, options); + out() << "<" << name; + write_element_options(out(), options); if (options.close_type == HtmlCloseType::trailing) { - m_out << "/>"; + out() << "/>"; } else { - m_out << ">"; + out() << ">"; } } @@ -269,7 +269,7 @@ void HtmlWriter::write_element_end(const std::string &name) { } m_stack.pop_back(); - m_out << ""; + out() << ""; } bool HtmlWriter::is_inline_mode() const { @@ -286,9 +286,9 @@ void HtmlWriter::write_new_line() { return; } - m_out << '\n'; + out() << '\n'; for (std::uint32_t i = 0; i < m_current_indent; ++i) { - m_out << m_indent; + out() << m_indent; } } @@ -297,9 +297,9 @@ void HtmlWriter::write_raw(const HtmlWritable &writable, bool new_line) { write_new_line(); } - write_writable(m_out, writable); + write_writable(out(), writable); } -std::ostream &HtmlWriter::out() { return m_out; } +std::ostream &HtmlWriter::out() { return *m_out; } } // namespace odr::internal::html diff --git a/src/odr/internal/html/html_writer.hpp b/src/odr/internal/html/html_writer.hpp index 2f8158ab..85776796 100644 --- a/src/odr/internal/html/html_writer.hpp +++ b/src/odr/internal/html/html_writer.hpp @@ -88,7 +88,7 @@ class HtmlWriter { bool inline_element{false}; }; - std::ostream &m_out; + std::ostream *m_out{nullptr}; bool m_format{false}; std::string m_indent; std::uint32_t m_current_indent{0}; diff --git a/src/odr/internal/html/wvware_wrapper.cpp b/src/odr/internal/html/wvware_wrapper.cpp index 2e469bc6..fe40b8d5 100644 --- a/src/odr/internal/html/wvware_wrapper.cpp +++ b/src/odr/internal/html/wvware_wrapper.cpp @@ -1,9 +1,12 @@ #include +#include #include #include #include +#include +#include #include #include @@ -34,105 +37,299 @@ namespace { /// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/wv.h#L2776-L2814 /// to allow for more state variables. struct TranslationState : public expand_data { - int i = 0; + explicit TranslationState(html::HtmlWriter _out) + : expand_data{}, out(std::move(_out)) {} + char *charset = nullptr; PAP *ppap = nullptr; - std::unique_ptr output_stream; -}; + struct { + int message = 0; + } special_char_handler_state = {}; -/// Originally from `wvWare.c` `wvStrangeNoGraphicData` -/// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/wvWare.c#L661-L676 -/// simplified to HTML output -void strange_no_graphic_data(wvParseStruct *ps, int graphicstype) { - std::cerr << "Strange No Graphic Data in the 0x01/0x08 graphic\n"; + html::HtmlWriter out; +}; - // TODO print to output file - printf(R"(%#.2x graphic
)", graphicstype, - "StrangeNoGraphicData"); +/// Originally from `text.c` `wvConvertUnicodeToHtml` +/// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/text.c#L1999-L2154 +int convert_unicode_to_html(wvParseStruct *ps, U16 char16) { + auto *data = (TranslationState *)ps->userData; + auto &out = data->out; + + switch (char16) { + case 11: + out.out() << "
"; + return 1; + case 31: /* non-required hyphen */ + out.out() << "­"; /*vladimir@lukianov.name HTML 4.01 spec*/ + return 1; + case 30: + case 45: + case 0x2013: + out.out() << "-"; /* en-dash */ + return 1; + case 12: + case 13: + case 14: + case 7: + return 1; + case 34: + out.out() << """; + return 1; + case 38: + out.out() << "&"; + return 1; + case 60: + out.out() << "<"; + return 1; + case 62: + out.out() << ">"; + return 1; + /* + german characters, im assured that this is the right way to handle them + by Markus Schulte + + As the output encoding for HTML was chosen as UTF-8, + we don't need Ä etc. etc. I removed all but sz + -- MV 6.4.2000 + */ + + case 0xdf: + out.out() << "ß"; + return 1; + /* end german characters */ + case 0x2026: +#if 0 +/* +this just looks awful in netscape 4.5, so im going to do a very foolish +thing and just put ... instead of this +*/ + printf ("…"); +/*is there a proper html name for ... &ellipse;? Yes, … -- MV */ +#endif + out.out() << "…"; + return 1; + case 0x2019: + out.out() << "'"; + return 1; + case 0x2215: + out.out() << "/"; + return 1; + case 0xF8E7: /* without this, things should work in theory, but not for me */ + out.out() << "_"; + return 1; + case 0x2018: + out.out() << "`"; + return 1; + + /* Windows specials (MV): */ + case 0x0160: + out.out() << "Š"; + return 1; + case 0x0161: + out.out() << "š"; + return 1; + case 0x2014: + out.out() << "—"; + return 1; + case 0x201c: + out.out() << "“"; /* inverted double quotation mark */ + return 1; + case 0x201d: + out.out() << "”"; /* double q.m. */ + return 1; + case 0x201e: + out.out() << "„"; /* below double q.m. */ + return 1; + case 0x2020: + out.out() << "†"; + return 1; + case 0x2021: + out.out() << "‡"; + return 1; + case 0x2022: + out.out() << "•"; + return 1; + case 0x0152: + out.out() << "Œ"; + return 1; + case 0x0153: + out.out() << "œ"; + return 1; + case 0x0178: + out.out() << "Ÿ"; + return 1; + case 0x2030: + out.out() << "‰"; + return 1; + case 0x20ac: + out.out() << "€"; + return 1; + + /* Mac specials (MV): */ + + case 0xf020: + out.out() << " "; + return 1; + case 0xf02c: + out.out() << ","; + return 1; + case 0xf028: + out.out() << "("; + return 1; + + case 0xf03e: + out.out() << ">"; + return 1; + case 0xf067: + out.out() << "γ"; + return 1; + case 0xf064: + out.out() << "δ"; + return 1; + case 0xf072: + out.out() << "ρ"; + return 1; + case 0xf073: + out.out() << "σ"; + return 1; + case 0xf0ae: + out.out() << "→"; /* right arrow */ + return 1; + case 0xf0b6: + out.out() << "∂"; /* partial deriv. */ + return 1; + case 0xf0b3: + out.out() << "≥"; + return 1; + default: + break; + } + /* Debugging aid: */ + /* if (char16 >= 0x100) printf("[%x]", char16); */ + return 0; } -/// Originally from `wvWare.c` `name_to_url` -/// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/wvWare.c#L1703-L1772 -char *name_to_url(char *name) { - // TODO get rid of static - // TODO use std::string - static char *url = 0; - static long max = 0; - char *ptr = 0; - long count = 0; - - ptr = name; - while (*ptr) { - switch (*ptr) { - case ' ': - count += 3; - break; - default: - count++; - break; - } - ptr++; +/// Originally from `text.c` `wvOutputFromUnicode` +/// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/text.c#L757-L840 +void output_from_unicode(wvParseStruct *ps, U16 eachchar, char *outputtype) { + auto *data = (TranslationState *)ps->userData; + auto &out = data->out; + + // TODO static + static char cached_outputtype[33]; /* Last outputtype */ + static GIConv g_iconv_handle = (GIConv)-1; /* Cached iconv descriptor */ + static int need_swapping; + gchar *ibuf, *obuf; + size_t ibuflen, obuflen, len, count, i; + U8 buffer[2], buffer2[5]; + + if (convert_unicode_to_html(ps, eachchar) != 0) { + return; } - count++; - if (count > max) { - char *more = nullptr; - if (url == nullptr) { - more = static_cast(malloc(count)); - } else { - more = static_cast(realloc(url, count)); + if ((g_iconv_handle == (GIConv)-1) || + strcmp(cached_outputtype, outputtype) != 0) { + if ((g_iconv_handle != (GIConv)-1)) { + g_iconv_close(g_iconv_handle); } - if (more != nullptr) { - url = more; - max = count; + + g_iconv_handle = g_iconv_open(outputtype, "UCS-2"); + if (g_iconv_handle == (GIConv)-1) { + std::cerr << "g_iconv_open fail: " << errno + << ", cannot convert UCS-2 to " << outputtype << "\n"; + out.out() << "?"; + return; } - } - if (url != nullptr) { - count = 0; - ptr = name; - while ((*ptr != 0) && (count < max)) { - switch (*ptr) { - case ' ': - url[count++] = '%'; - if (count < max) - url[count++] = '2'; - if (count < max) - url[count++] = '0'; - break; - default: - url[count++] = *ptr; - break; - } - ptr++; + /* safe to cache the output type here */ + strcpy(cached_outputtype, outputtype); + + /* Determining if unicode biteorder is swapped (glibc < 2.2) */ + need_swapping = 1; + + buffer[0] = 0x20; + buffer[1] = 0; + ibuf = reinterpret_cast(buffer); + obuf = reinterpret_cast(buffer2); + ibuflen = 2; + obuflen = 5; + + count = g_iconv(g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen); + if (count >= 0) { + need_swapping = buffer2[0] != 0x20; } - url[max - 1] = 0; + } + + if (need_swapping) { + buffer[0] = (eachchar >> 8) & 0x00ff; + buffer[1] = eachchar & 0x00ff; } else { - std::cerr << "failed to convert name to URL\n"; - return name; + buffer[0] = eachchar & 0x00ff; + buffer[1] = (eachchar >> 8) & 0x00ff; } - return url; + ibuf = reinterpret_cast(buffer); + obuf = reinterpret_cast(buffer2); + + ibuflen = 2; + len = obuflen = 5; + + count = g_iconv(g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen); + if (count == (size_t)-1) { + std::cerr << "iconv failed, errno: " << errno << ", char: 0x" << std::hex + << eachchar << ", UCS-2 -> " << outputtype << "\n"; + + /* I'm torn here - do i just announce the failure, continue, or copy over to + * the other buffer? */ + + /* errno is usually 84 (illegal byte sequence) + should i reverse the bytes and try again? */ + out.out() << ibuf[1]; + } else { + len = len - obuflen; + + for (i = 0; i < len; i++) { + out.out() << buffer2[i]; + } + } +} + +/// Originally from `wvWare.c` `wvStrangeNoGraphicData` +/// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/wvWare.c#L661-L676 +/// simplified to HTML output +void strange_no_graphic_data(wvParseStruct *ps, int graphicstype) { + auto *data = (TranslationState *)ps->userData; + auto &out = data->out; + + std::cerr << "Strange No Graphic Data in the 0x01/0x08 graphic\n"; + + // TODO + out.out() << R"()
)"; } /// Originally from `wvWare.c` `wvPrintGraphics` /// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/wvWare.c#L1239-L1287 /// simplified to HTML output -void print_graphics(int graphicstype, int width, int height, char *source) { +void print_graphics(wvParseStruct *ps, int graphicstype, int width, int height, + char *source) { // upstream converts to PNG, we just use the original format as the browser // should support them + auto *data = (TranslationState *)ps->userData; + auto &out = data->out; + // TODO export/embed image - // TODO replace printf - printf(R"(%#.2x graphic
)", - width, height, graphicstype, name_to_url(source)); + out.out() << R"()
)"; } /// Originally from `wvWare.c` `myelehandler` /// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/wvWare.c#L503-L599 -int element_handler(wvParseStruct *ps, wvTag tag, void *props, int dirty) { +int element_handler(wvParseStruct *ps, wvTag tag, void *props, int /*dirty*/) { auto *data = (TranslationState *)ps->userData; data->anSttbfAssoc = &ps->anSttbfAssoc; data->lfo = &ps->lfo; @@ -238,17 +435,15 @@ int document_handler(wvParseStruct *ps, wvTag tag) { data->endcell = &ps->endcell; data->vmerges = &ps->vmerges; data->norows = &ps->norows; - if (data->i == 0) { - wvSetEntityConverter(data); - data->filename = ps->filename; - data->whichcell = 0; - data->whichrow = 0; - data->asep = nullptr; - data->i++; - wvInitPAP(&data->lastpap); - data->nextpap = nullptr; - data->ps = ps; - } + + wvSetEntityConverter(data); + data->filename = ps->filename; + data->whichcell = 0; + data->whichrow = 0; + data->asep = nullptr; + wvInitPAP(&data->lastpap); + data->nextpap = nullptr; + data->ps = ps; if (data->charset == nullptr) { data->charset = wvAutoCharset(ps); @@ -279,18 +474,15 @@ int char_handler(wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid) { ps->fieldmiddle = 0; fieldCharProc(ps, eachchar, chartype, lid); /* temp */ return 0; - break; case 20: fieldCharProc(ps, eachchar, chartype, lid); ps->fieldmiddle = 1; return 0; - break; case 21: ps->fieldmiddle = 0; ps->fieldstate--; fieldCharProc(ps, eachchar, chartype, lid); /* temp */ return 0; - break; case 0x08: std::cerr << "hmm did we loose the fSpec flag ?, this is possibly a bug\n"; break; @@ -298,16 +490,18 @@ int char_handler(wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid) { break; } - if (ps->fieldstate != 0) { - if (fieldCharProc(ps, eachchar, chartype, lid) != 0) { - return 0; - } + if (ps->fieldstate != 0 && fieldCharProc(ps, eachchar, chartype, lid) != 0) { + return 0; } - if (data->charset != nullptr) { - wvOutputHtmlChar(eachchar, chartype, data->charset, lid); - } else { - wvOutputHtmlChar(eachchar, chartype, wvAutoCharset(ps), lid); + // from `wvOutputHtmlChar` + { + char *outputtype = + data->charset != nullptr ? data->charset : wvAutoCharset(ps); + if (chartype != 0) { + eachchar = wvHandleCodePage(eachchar, lid); + } + output_from_unicode(ps, eachchar, outputtype); } return 0; @@ -316,20 +510,22 @@ int char_handler(wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid) { /// Originally from `wvWare.c` `mySpecCharProc` /// https://github.com/opendocument-app/wvWare/blob/c015326b001f1ad6dfb1f5e718461c16c56cca5f/wvWare.c#L1289-L1553 int special_char_handler(wvParseStruct *ps, U16 eachchar, CHP *achp) { - static int message; - PICF picf; - FSPA *fspa; auto *data = (TranslationState *)ps->userData; + auto &state = data->special_char_handler_state; + auto &out = data->out; + + PICF picf; + FSPA *fspa = nullptr; switch (eachchar) { case 19: - std::cerr << "field began\n"; + // field began ps->fieldstate++; ps->fieldmiddle = 0; fieldCharProc(ps, eachchar, 0, 0x400); /* temp */ return 0; case 20: - if (achp->fOle2) { + if (achp->fOle2 != 0) { std::cerr << "this field has an associated embedded object of id " << achp->fcPic_fcObj_lTagObj << "\n"; } @@ -341,6 +537,7 @@ int special_char_handler(wvParseStruct *ps, U16 eachchar, CHP *achp) { ps->fieldmiddle = 0; fieldCharProc(ps, eachchar, 0, 0x400); /* temp */ return 0; + default: break; } @@ -358,25 +555,21 @@ int special_char_handler(wvParseStruct *ps, U16 eachchar, CHP *achp) { Blip blip; char *name; long p = wvStream_tell(ps->data); - std::cerr << "picture 0x01 here, at offset " << achp->fcPic_fcObj_lTagObj - << " in Data Stream, obj is " << achp->fObj << ", ole is " - << achp->fOle2 << "\n"; - if (achp->fOle2) { + if (achp->fOle2 != 0) { return 0; } - if (no_graphics != 0) { - wvStream_goto(ps->data, achp->fcPic_fcObj_lTagObj); - wvGetPICF(wvQuerySupported(&ps->fib, nullptr), &picf, ps->data); - f = picf.rgb; - if (wv0x01(&blip, f, picf.lcb - picf.cbHeader) != 0) { - name = wvHtmlGraphic(ps, &blip); - print_graphics(0x01, (int)wvTwipsToHPixels(picf.dxaGoal), - (int)wvTwipsToVPixels(picf.dyaGoal), name); - wvFree(name); - } else { - strange_no_graphic_data(ps, 0x01); - } + + wvStream_goto(ps->data, achp->fcPic_fcObj_lTagObj); + wvGetPICF(wvQuerySupported(&ps->fib, nullptr), &picf, ps->data); + f = picf.rgb; + if (wv0x01(&blip, f, picf.lcb - picf.cbHeader) != 0) { + name = wvHtmlGraphic(ps, &blip); + print_graphics(ps, 0x01, (int)wvTwipsToHPixels(picf.dxaGoal), + (int)wvTwipsToVPixels(picf.dyaGoal), name); + wvFree(name); + } else { + strange_no_graphic_data(ps, 0x01); } wvStream_goto(ps->data, p); @@ -386,28 +579,28 @@ int special_char_handler(wvParseStruct *ps, U16 eachchar, CHP *achp) { Blip blip; char *name; if (wvQuerySupported(&ps->fib, nullptr) == WORD8) { - if (!no_graphics) { - if (ps->nooffspa > 0) { - fspa = wvGetFSPAFromCP(ps->currentcp, ps->fspa, ps->fspapos, - ps->nooffspa); - - if (fspa == nullptr) { - std::cerr << "No fspa! Insanity abounds!\n"; - return 0; - } - - data->props = fspa; - if (wv0x08(&blip, fspa->spid, ps)) { - name = wvHtmlGraphic(ps, &blip); - print_graphics( - 0x08, (int)wvTwipsToHPixels(fspa->xaRight - fspa->xaLeft), - (int)wvTwipsToVPixels(fspa->yaBottom - fspa->yaTop), name); - wvFree(name); - } else - strange_no_graphic_data(ps, 0x08); - } else { - std::cerr << "nooffspa was <=0! Ignoring.\n"; + if (ps->nooffspa > 0) { + fspa = + wvGetFSPAFromCP(ps->currentcp, ps->fspa, ps->fspapos, ps->nooffspa); + + if (fspa == nullptr) { + std::cerr << "No fspa! Insanity abounds!\n"; + return 0; } + + data->props = fspa; + if (wv0x08(&blip, (int)fspa->spid, ps) != 0) { + name = wvHtmlGraphic(ps, &blip); + print_graphics( + ps, 0x08, + (int)wvTwipsToHPixels((short)(fspa->xaRight - fspa->xaLeft)), + (int)wvTwipsToVPixels((short)(fspa->yaBottom - fspa->yaTop)), + name); + wvFree(name); + } else + strange_no_graphic_data(ps, 0x08); + } else { + std::cerr << "nooffspa was <=0! Ignoring.\n"; } } else { FDOA *fdoa; @@ -428,45 +621,45 @@ int special_char_handler(wvParseStruct *ps, U16 eachchar, CHP *achp) { U16 mtextra[8] = {'M', 'T', ' ', 'E', 'x', 't', 'r', 'a'}; if (0 == memcmp(symbol, ps->fonts.ffn[achp->ftcSym].xszFfn, 12)) { - if ((!message) && (strcasecmp("UTF-8", data->charset) != 0)) { + if ((state.message == 0) && (strcasecmp("UTF-8", data->charset) != 0)) { std::cerr << "Symbol font detected (too late sorry!), rerun wvHtml with option --charset utf-8\n\ option to support correct symbol font conversion to a viewable format.\n"; - message++; + state.message++; } - wvOutputFromUnicode(wvConvertSymbolToUnicode(achp->xchSym - 61440), + output_from_unicode(ps, wvConvertSymbolToUnicode(achp->xchSym - 61440), data->charset); return 0; } else if (0 == memcmp(mtextra, ps->fonts.ffn[achp->ftcSym].xszFfn, 16)) { - if ((message == 0) && (strcasecmp("UTF-8", data->charset) != 0)) { + if ((state.message == 0) && (strcasecmp("UTF-8", data->charset) != 0)) { std::cerr << "MT Extra font detected (too late sorry!), rerun wvHtml with option --charset utf-8\n\ option to support correct symbol font conversion to a viewable format.\n"; - message++; + state.message++; } - wvOutputFromUnicode(wvConvertMTExtraToUnicode(achp->xchSym - 61440), + output_from_unicode(ps, wvConvertMTExtraToUnicode(achp->xchSym - 61440), data->charset); return 0; } else if (0 == memcmp(wingdings, ps->fonts.ffn[achp->ftcSym].xszFfn, 18)) { - if (message == 0) { + if (state.message == 0) { std::cerr << "Wingdings font detected, i need a mapping table to " "unicode for this\n"; - message++; + state.message++; } } else { - if (message == 0) { + if (state.message == 0) { char *fontname = wvWideStrToMB(ps->fonts.ffn[achp->ftcSym].xszFfn); std::cerr << "Special font " << fontname - << ", i need a mapping table to unicode for this\n"; + << ", I need a mapping table to unicode for this\n"; wvFree(fontname); - // TODO replace printf - printf("*"); + out.out() << "*"; + state.message++; } return 0; } } default: - return 0; + break; } return 0; @@ -477,8 +670,17 @@ option to support correct symbol font conversion to a viewable format.\n"; Html html::translate_wvware_oldms_file( const WvWareLegacyMicrosoftFile &oldms_file, const std::string &output_path, const HtmlConfig &config) { + HtmlResourceLocator resourceLocator = + local_resource_locator(output_path, config); + auto output_file_path = output_path + "/document.html"; + std::ofstream ostream(output_file_path, std::ios::out); + if (!ostream.is_open()) { + throw FileWriteError(); + } + html::HtmlWriter out(ostream, config.format_html, config.html_indent); + wvParseStruct &ps = oldms_file.parse_struct(); wvSetElementHandler(&ps, element_handler); @@ -487,27 +689,29 @@ Html html::translate_wvware_oldms_file( wvSetSpecialCharHandler(&ps, special_char_handler); state_data handle; - TranslationState translation_state; - translation_state.output_stream = - std::make_unique(output_file_path, std::ios::out); + TranslationState translation_state(out); wvInitStateData(&handle); translation_state.sd = &handle; ps.userData = &translation_state; - *translation_state.output_stream << "\n\n\n" - << "\n" - << "Document\n" - << "\n\n"; + out.write_begin(); + out.write_header_begin(); + out.write_header_charset("UTF-8"); + out.write_header_target("_blank"); + out.write_header_title("odr"); + out.write_header_viewport( + "width=device-width,initial-scale=1.0,user-scalable=yes"); + out.write_header_end(); + out.write_body_begin(); if (wvHtml(&ps) != 0) { throw std::runtime_error("wvHtml failed"); } - *translation_state.output_stream << "\n\n"; - - translation_state.output_stream->flush(); + out.write_body_end(); + out.write_end(); return { FileType::legacy_word_document, config, {{"document", output_file_path}}};