diff --git a/README.md b/README.md index b1c9a45..ad79928 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ translates TeX Books (simple one like stories kind of book) to HTML. I use this ## What this project *is not* The purpose of this project **is not** and never will be a kind of "TeX To HTML" compiler, I do *not* intend to implement the full TeX -and LaTeX specification. As such the support of mathematics is likely **never to be** included here. This is a tool mainly done *for me* for my usage, that is publishing my book on ScribbleHub and the web. I'm sure you can find real compilers if you want to support +and LaTeX specification. This is a tool mainly done *for me* for my usage, that is publishing my book on ScribbleHub and the web. I'm sure you can find real compilers if you want to support the full specification of TeX and LaTeX ## Support @@ -23,6 +23,13 @@ The project currently translate *LaTeX* to human-readable HTML files (the line b - `textbf` is replaced by `` - `newline`, `\\`, and `par` puts a `
` - `bigskip` closes the paragraph and open another one. +- Some math + - Inline math with `$` + - Math env + - Align* + - Align + - Equation + - Equation* ## Near Future TODO List * parsing glossary entries from a glossary file and printing the `gls` with a link to the glossary diff --git a/bin/main.ml b/bin/main.ml index d111a91..4171970 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -14,7 +14,7 @@ let spec = [ ] let execute_command file outname start_chapter = - Htmlfromtexbooks.Parser.print_file_in_html ~min_chap:start_chapter file outname;; + Htmlfromtexbooks.Htmlgen.print_file_in_html ~min_chap:start_chapter file outname;; let parse_filename file outname start_chapter = if not (Sys.file_exists file) then (Printf.printf "The %s file do not exists" file; exit 2) diff --git a/lib/dune b/lib/dune index 2c34db5..a81c472 100644 --- a/lib/dune +++ b/lib/dune @@ -1,3 +1,5 @@ +(include_subdirs unqualified) + (library (name htmlfromtexbooks) (libraries str)) diff --git a/lib/htmlgen.ml b/lib/htmlgen.ml new file mode 100644 index 0000000..e1bd4d2 --- /dev/null +++ b/lib/htmlgen.ml @@ -0,0 +1,218 @@ +open Parser +open Glossary +open Utils + +let url_encoded_str s = s;; + +let print_table_of_content ast min_chap = + let count = [|1;1;1;1|] in + let rec aux acc ast = + match ast with + | [] -> acc + | Chapter (s,l)::q -> + let chapnum = count.(0) in + begin + count.(0) <- count.(0) + 1; + count.(1) <- 1; + count.(2) <- 1; + count.(3) <- 1; + end; + let str = aux "" l in + let new_line = if chapnum>=min_chap then Printf.sprintf "
  • Chapter %i : %s
  • \n" + chapnum (chapnum-min_chap+1) s else "" in + aux (acc^new_line^str) q + + | Section (s,l)::q -> + let chapnum,secnum = count.(0),count.(1) in + begin + count.(1) <- count.(1) + 1; + count.(2) <- 1; + count.(3) <- 1; + end; + let str = aux "" l in + let new_line = Printf.sprintf "
  • Section %i.%i : %s
  • \n" + (2.**(float chapnum)*.3.**(float secnum)) (chapnum-min_chap+1) secnum s in + aux (acc^new_line^str) q + + | Subsection (s,l)::q -> + let chapnum,secnum,ssecnum = count.(0),count.(1),count.(2) in + begin + count.(2) <- count.(2) + 1; + count.(3) <- 1; + end; + let str = aux "" l in + let new_line = Printf.sprintf "
  • Subsection %i.%i.%i : %s
  • \n" + (2.**(float chapnum)*.3.**(float secnum)*.5.**(float ssecnum)) (chapnum-min_chap+1) secnum ssecnum s in + aux (acc^new_line^str) q + + | Subsubsection (s,l)::q -> + let chapnum,secnum,ssecnum,sssecnum = count.(0),count.(1),count.(2),count.(3) in + begin + count.(3) <- count.(3) + 1; + end; + let str = aux "" l in + let new_line = Printf.sprintf "
  • Subsubsection %i.%i.%i.%i : %s
  • \n" + (2.**(float chapnum)*.3.**(float secnum)*.5.**(float ssecnum)*.7.**(float sssecnum)) (chapnum-min_chap+1) secnum ssecnum sssecnum s in + aux (acc^new_line^str) q + | Env (_,l)::q -> let a = aux acc l in aux (acc^a) q + | _::q -> aux acc q + in (aux "" ast);; + + let parse_to_html ?(min_chap=1) write_before ast= + let count = [|1;1;1;1|] in + let rec aux ?(write=write_before) acc ast = + match ast with + | [] -> acc + | Nul::q -> aux acc q + | Line s::q -> + let line= if write then Printf.sprintf "%s\n" s else "" + in aux ~write:write (acc^line) q + | Math s::q -> + let url = Printf.sprintf "https://latex.codecogs.com/svg.image?%s"s in + let url = url_encoded_str url in + let line = if write then Printf.sprintf "\n" url else "" + in aux ~write:write (acc^line) q + | AtomicCmd (s,_)::q -> + let new_line = (match s with + | "par" -> "
    \n" + | "bigskip" -> "

    \n\n

    \n" + | "\\" -> "
    \n" + | "printglossaries" -> "" + | "sep" -> "

    ***
    " + | "item" -> "·" + | "newline" -> "
    \n" + | "ast" -> "*" + | e -> + (try + let structure = Hashtbl.find commands e in + let str = aux ~write:write acc structure + in str + with _ -> "")) + in let new_acc = if write then acc^new_line^"\n" else "" + in aux ~write:write new_acc q + + | OneArgCmd (s,_,l)::q -> + let str = aux "" l in + let new_line = (match s with + | "par" -> "
    \n" + | "bigskip" -> "

    \n\n

    \n" + | "\\" -> "
    \n" + | "printglossaries" -> "" + | "item" -> "·" + | "sep" -> "

    ***
    " + | "newline" -> "
    \n" + | "ast" -> "*" + | "gls" -> + (match l with + | [] -> "" + | Line s::_ -> + let name,_ = recognize_gls s in Printf.sprintf "%s " s name + | _::_ -> "") + | "textit" -> (Printf.sprintf "%s" str) + | "textbf" -> (Printf.sprintf "%s" str) + | "url" -> (Printf.sprintf "%s" (Str.global_replace (Str.regexp "\n") "" str) str) + | e -> + (try + let structure = Hashtbl.find commands e in + let str = aux ~write:write acc structure + in str + with _ -> "")) + in let new_acc = if write then acc^(new_line) else "" + in aux ~write:write new_acc q + + | Chapter (s,l)::q -> + let chapnum = count.(0) in + begin + count.(0) <- count.(0) + 1; + count.(1) <- 1; + count.(2) <- 1; + count.(3) <- 1; + end; + let str = aux ~write:(chapnum>=min_chap) "" l in + let new_line = if chapnum>=min_chap then Printf.sprintf "

    Chapter %i : %s


    \n" + chapnum (chapnum-min_chap+1) s else "" in + aux ~write:write (acc^new_line^str) q + + | Section (s,l)::q -> + let chapnum,secnum = count.(0),count.(1) in + begin + count.(1) <- count.(1) + 1; + count.(2) <- 1; + count.(3) <- 1; + end; + let str = aux ~write:write "" l in + let new_line = Printf.sprintf "

    Section %i.%i : %s


    \n" + (2.**(float chapnum)*.3.**(float secnum)) (chapnum-min_chap+1) secnum s in + aux ~write:write (acc^new_line^str) q + + | Subsection (s,l)::q -> + let chapnum,secnum,ssecnum = count.(0),count.(1),count.(2) in + begin + count.(2) <- count.(2) + 1; + count.(3) <- 1; + end; + let str = aux ~write:write "" l in + let new_line = Printf.sprintf "

    Subsection %i.%i.%i : %s


    \n" + (2.**(float chapnum)*.3.**(float secnum)*.5.**(float ssecnum)) (chapnum-min_chap+1) secnum ssecnum s in + aux ~write:write (acc^new_line^str) q + + | Subsubsection (s,l)::q -> + let chapnum,secnum,ssecnum,sssecnum = count.(0),count.(1),count.(2),count.(3) in + begin + count.(3) <- count.(3) + 1; + end; + let str = aux ~write:write "" l in + let new_line = Printf.sprintf "

    Subsubsection %i.%i.%i.%i : %s


    \n" + (2.**(float chapnum)*.3.**(float secnum)*.5.**(float ssecnum)*.7.**(float sssecnum)) (chapnum-min_chap+1) secnum ssecnum sssecnum s in + aux ~write:write (acc^new_line^str) q + + | Env (s,l)::q -> + let str = aux ~write:write "" l in + let new_line = (match s with + | "document" -> str + | "center" -> Printf.sprintf "
    \n%s\n
    " str + | _ -> str) + in aux ~write:write (acc^new_line^"\n") q + | _::q -> aux acc q + in aux "" ast;; + + +let prepare_body name str toc = + let line = "" ^ name ^ "\n" + in let line = line ^ "\n" + in let line = line ^ "" + in let line = line ^ "
    \n" + in let line = line ^ (Printf.sprintf "

    %s

    \n" name) + in let line = line ^ "

    Table of Content

    \n" + in let line = line ^ "\n" + in let line = line ^ "
    \n" + in let line = line ^ str ^ "\n" + in let line = line ^ (prints_glossary ()) ^ "\n" + in let line = line ^ "" + in line;; + + + let pre_parse_file file = + let str = read_file file in + let str = String.concat "\n" str in + let a = parse_string str + in let p,doc = separate_preamble a + in read_preamble p; + let doc = separate_sections doc + in let doc = calculate_environments doc + in let doc = Mathgen.re_calculate_env doc in + (match (Hashtbl.find_opt preamble "glossary") with + | Some s -> init_glossary s + | None -> ();); + doc;; + + +let print_file_in_html ?(min_chap=1) ?(write_before=false) file outname = + let a = pre_parse_file file in + let html = parse_to_html ~min_chap:min_chap write_before a in + let toc = print_table_of_content a min_chap in + let name = try Hashtbl.find preamble "title" with _ -> "Generic" in + prepare_body name html toc + |> write_to_file outname;; \ No newline at end of file diff --git a/lib/mathgen.ml b/lib/mathgen.ml new file mode 100644 index 0000000..d7a30a2 --- /dev/null +++ b/lib/mathgen.ml @@ -0,0 +1,53 @@ +open Parser + +let generate_latex_command s l = + let line = "\\"^s in + let args = String.concat "," l in + let line = if args="" then line else Printf.sprintf "%s[%s]" line args in + line;; + + + +let generate_latex l = + let rec unparse acc l = + match l with + | [] -> String.concat " " acc + | Line s::q -> unparse (s::acc) q + | AtomicCmd (s,l)::q -> + let line = generate_latex_command s l in + unparse (line::acc) q + | OneArgCmd (s,l,l2)::q -> + let line = generate_latex_command s l in + let line = Printf.sprintf "%s{%s}" line (unparse [] l2) in + unparse (line::acc) q + | MultipleArgCmd (s,l,l2)::q -> + let line = generate_latex_command s l in + let l = List.map (unparse []) l2 in + let line = Printf.sprintf "%s{%s}" line (String.concat "\n" l) in + unparse (line::acc) q + | _::q -> unparse acc q + in unparse [] l;; + +let env_de_latexer env = + match env with + | e -> e;; + +let re_calculate_env ast = + let rec aux acc ast = + match ast with + | [] -> acc + | Env (s,n)::q when s="align" + -> aux (Math(Printf.sprintf "\\begin{align}%s\\end{align}" (generate_latex n))::acc) q + | Env (s,n)::q when s="align*" + -> aux (Math(Printf.sprintf "\\begin{align*}%s\\end{align*}" (generate_latex n))::acc) q + | Env (s,n)::q when s="equation" + -> aux (Math(Printf.sprintf "\\begin{equation}%s\\end{equation}" (generate_latex n))::acc) q + | Env (s,n)::q when s="equation*" + -> aux (Math(Printf.sprintf "\\begin{equation*}%s\\end{equation*}" (generate_latex n))::acc) q + | Env(s,n)::q + -> let ast = aux [] n in + let ast = List.rev ast + in let env = Env(s,ast) + in aux (env::acc) q + | e::q -> aux (e::acc) q + in List.rev (aux [] ast);; \ No newline at end of file diff --git a/lib/parser.ml b/lib/parser.ml index 7b492b5..6a6121a 100644 --- a/lib/parser.ml +++ b/lib/parser.ml @@ -1,4 +1,3 @@ -open Glossary open Utils type cmd = | NullCommand @@ -13,6 +12,7 @@ type structure = (*OK*) | OneArgCmd of string * string list * structure list | MultipleArgCmd of string * string list * structure list list | Env of string * structure list + | Math of string | Subsubsection of string * structure list | Subsection of string * structure list | Section of string * structure list @@ -21,189 +21,6 @@ type structure = (*OK*) let preamble = Hashtbl.create 1;; let commands = Hashtbl.create 1;; -let parse_to_html ?(min_chap=1) ast= - let count = [|1;1;1;1|] in - let rec aux ?(write=false) acc ast = - match ast with - | [] -> acc - | Nul::q -> aux acc q - | Line s::q -> - let line= if write then Printf.sprintf "%s\n" s else "" - in aux ~write:write (acc^line) q - - | AtomicCmd (s,_)::q -> - let new_line = (match s with - | "par" -> "
    \n" - | "bigskip" -> "

    \n\n

    \n" - | "\\" -> "
    \n" - | "printglossaries" -> "" - | "sep" -> "

    ***
    " - | "item" -> "·" - | "newline" -> "
    \n" - | "ast" -> "*" - | e -> - (try - let structure = Hashtbl.find commands e in - let str = aux ~write:write acc structure - in str - with _ -> "")) - in let new_acc = if write then acc^new_line^"\n" else "" - in aux ~write:write new_acc q - - | OneArgCmd (s,_,l)::q -> - let str = aux "" l in - let new_line = (match s with - | "par" -> "
    \n" - | "bigskip" -> "

    \n\n

    \n" - | "\\" -> "
    \n" - | "printglossaries" -> "" - | "item" -> "·" - | "sep" -> "

    ***
    " - | "newline" -> "
    \n" - | "ast" -> "*" - | "gls" -> - (match l with - | [] -> "" - | Line s::_ -> - let name,_ = recognize_gls s in Printf.sprintf "%s " s name - | _::_ -> "") - | "textit" -> (Printf.sprintf "%s" str) - | "textbf" -> (Printf.sprintf "%s" str) - | "url" -> (Printf.sprintf "%s" (Str.global_replace (Str.regexp "\n") "" str) str) - | e -> - (try - let structure = Hashtbl.find commands e in - let str = aux ~write:write acc structure - in str - with _ -> "")) - in let new_acc = if write then acc^(new_line) else "" - in aux ~write:write new_acc q - - | Chapter (s,l)::q -> - let chapnum = count.(0) in - begin - count.(0) <- count.(0) + 1; - count.(1) <- 1; - count.(2) <- 1; - count.(3) <- 1; - end; - let str = aux ~write:(chapnum>=min_chap) "" l in - let new_line = if chapnum>=min_chap then Printf.sprintf "

    Chapter %i : %s


    \n" - chapnum (chapnum-min_chap+1) s else "" in - aux ~write:write (acc^new_line^str) q - - | Section (s,l)::q -> - let chapnum,secnum = count.(0),count.(1) in - begin - count.(1) <- count.(1) + 1; - count.(2) <- 1; - count.(3) <- 1; - end; - let str = aux ~write:write "" l in - let new_line = Printf.sprintf "

    Section %i.%i : %s


    \n" - (2.**(float chapnum)*.3.**(float secnum)) (chapnum-min_chap+1) secnum s in - aux ~write:write (acc^new_line^str) q - - | Subsection (s,l)::q -> - let chapnum,secnum,ssecnum = count.(0),count.(1),count.(2) in - begin - count.(2) <- count.(2) + 1; - count.(3) <- 1; - end; - let str = aux ~write:write "" l in - let new_line = Printf.sprintf "

    Subsection %i.%i.%i : %s


    \n" - (2.**(float chapnum)*.3.**(float secnum)*.5.**(float ssecnum)) (chapnum-min_chap+1) secnum ssecnum s in - aux ~write:write (acc^new_line^str) q - - | Subsubsection (s,l)::q -> - let chapnum,secnum,ssecnum,sssecnum = count.(0),count.(1),count.(2),count.(3) in - begin - count.(3) <- count.(3) + 1; - end; - let str = aux ~write:write "" l in - let new_line = Printf.sprintf "

    Subsubsection %i.%i.%i.%i : %s


    \n" - (2.**(float chapnum)*.3.**(float secnum)*.5.**(float ssecnum)*.7.**(float sssecnum)) (chapnum-min_chap+1) secnum ssecnum sssecnum s in - aux ~write:write (acc^new_line^str) q - - | Env (s,l)::q -> - let str = aux ~write:write "" l in - let new_line = (match s with - | "document" -> str - | "center" -> Printf.sprintf "
    \n%s\n
    " str - | _ -> str) - in aux ~write:write (acc^new_line^"\n") q - | _::q -> aux acc q - in aux "" ast;; - -let print_table_of_content ast min_chap = - let count = [|1;1;1;1|] in - let rec aux acc ast = - match ast with - | [] -> acc - | Chapter (s,l)::q -> - let chapnum = count.(0) in - begin - count.(0) <- count.(0) + 1; - count.(1) <- 1; - count.(2) <- 1; - count.(3) <- 1; - end; - let str = aux "" l in - let new_line = if chapnum>=min_chap then Printf.sprintf "
  • Chapter %i : %s
  • \n" - chapnum (chapnum-min_chap+1) s else "" in - aux (acc^new_line^str) q - - | Section (s,l)::q -> - let chapnum,secnum = count.(0),count.(1) in - begin - count.(1) <- count.(1) + 1; - count.(2) <- 1; - count.(3) <- 1; - end; - let str = aux "" l in - let new_line = Printf.sprintf "
  • Section %i.%i : %s
  • \n" - (2.**(float chapnum)*.3.**(float secnum)) (chapnum-min_chap+1) secnum s in - aux (acc^new_line^str) q - - | Subsection (s,l)::q -> - let chapnum,secnum,ssecnum = count.(0),count.(1),count.(2) in - begin - count.(2) <- count.(2) + 1; - count.(3) <- 1; - end; - let str = aux "" l in - let new_line = Printf.sprintf "
  • Subsection %i.%i.%i : %s
  • \n" - (2.**(float chapnum)*.3.**(float secnum)*.5.**(float ssecnum)) (chapnum-min_chap+1) secnum ssecnum s in - aux (acc^new_line^str) q - - | Subsubsection (s,l)::q -> - let chapnum,secnum,ssecnum,sssecnum = count.(0),count.(1),count.(2),count.(3) in - begin - count.(3) <- count.(3) + 1; - end; - let str = aux "" l in - let new_line = Printf.sprintf "
  • Subsubsection %i.%i.%i.%i : %s
  • \n" - (2.**(float chapnum)*.3.**(float secnum)*.5.**(float ssecnum)*.7.**(float sssecnum)) (chapnum-min_chap+1) secnum ssecnum sssecnum s in - aux (acc^new_line^str) q - | Env (_,l)::q -> let a = aux acc l in aux (acc^a) q - | _::q -> aux acc q - in (aux "" ast);; - -let prepare_body name str toc = - let line = "" ^ name ^ "\n" - in let line = line ^ "\n" - in let line = line ^ "" - in let line = line ^ "
    \n" - in let line = line ^ (Printf.sprintf "

    %s

    \n" name) - in let line = line ^ "

    Table of Content

    \n" - in let line = line ^ "\n" - in let line = line ^ "
    \n" - in let line = line ^ str ^ "\n" - in let line = line ^ (prints_glossary ()) ^ "\n" - in let line = line ^ "" - in line;; (*TODO: prendre en compte les nested cmd pour éviter les }} rémanent*) let parse_interior_of_an_accolade list_of_chars acc = @@ -258,18 +75,26 @@ let parse_command list_of_chars = let append_line str q = match (String.trim (str)) with - | "$" -> q - | "~" -> q - | "~$" -> q | "" -> q | e -> Line(e)::q +let parse_math l = + let rec parse acc l = + match l with + | [] -> acc,[] + | t::q when t='$' -> acc,q + | t::q -> parse (acc^(String.make 1 t)) q + in let a,b = parse "" l in Math(a),b;; +let print_math = function + | Math e -> print_endline e; + | _ -> ();; let rec parse_string str = let rec parse current_acc acc lst = match lst with | [] -> append_line current_acc acc + | t::q when t='$' -> let a,q2 = parse_math q in parse "" (a::(append_line current_acc acc)) q2 | t::q when t='\\' -> let cmd,l = parse_command q in parse "" (Cmd(cmd)::(append_line current_acc acc)) l @@ -278,6 +103,7 @@ and parse_nested_commands ast_list = match ast_list with | [] -> [] | Line s::q -> (Line s)::(parse_nested_commands q) + | Math e::q -> (Math e)::(parse_nested_commands q) | Cmd c::q -> let a = (match c with | NullCommand -> Nul @@ -367,28 +193,4 @@ let separate_sections lst = tab.(3) <- true; extract_section (chap::acc) l | e::q -> extract_section (e::acc) q - in let a,_ = extract_section [] lst in List.rev a;; - - - -let pre_parse_file file = - let str = read_file file in - let str = String.concat "\n" str in - let a = parse_string str - in let p,doc = separate_preamble a - in read_preamble p; - let doc = separate_sections doc - in let doc = calculate_environments doc - in - (match (Hashtbl.find_opt preamble "glossary") with - | Some s -> init_glossary s - | None -> ();); - doc;; - - -let print_file_in_html ?(min_chap=1) file outname = - let a = pre_parse_file file in - let html = parse_to_html ~min_chap:min_chap a in - let toc = print_table_of_content a min_chap in - prepare_body (Hashtbl.find preamble "title") html toc - |> write_to_file outname;; \ No newline at end of file + in let a,_ = extract_section [] lst in List.rev a;; \ No newline at end of file diff --git a/test.html b/test.html new file mode 100644 index 0000000..4ada210 --- /dev/null +++ b/test.html @@ -0,0 +1,31 @@ +Generic + +
    +

    Generic

    +

    Table of Content

    + +
    +tralalala +

    + +

    + +This is an integral + +

    +au center + +
    +!!! +selsle + + + + \ No newline at end of file diff --git a/test.txt b/test.txt index 7ea4d33..24377ac 100644 --- a/test.txt +++ b/test.txt @@ -1,11 +1,14 @@ \documentclass{article} -\newcommand{\sep}{\begin{center} - \boldmath $\ast$~$\ast$~$\ast$ -\end{center}} \usepackage{babel} Hello \begin{document} tralalala +\bigskip +This is an integral +\begin{align*} +2x &= 4 \\ +3y &= 3 +\end{align*} \begin{center} au center \end{center}