Skip to content

Commit

Permalink
Getting there. Correctly-ish identify HTML (and ignore it) and pull o…
Browse files Browse the repository at this point in the history
…ut esi:vars and recognize basic interpolation.
  • Loading branch information
tyler committed Dec 19, 2024
1 parent c9187eb commit 4455dda
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 34 deletions.
1 change: 1 addition & 0 deletions esi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod expression;
mod functions;
mod new_parse;
mod parse;
mod parser_types;

use document::{FetchState, Task};
use expression::{logged_evaluate_expression, maybe_evaluate_interpolated, EvalContext};
Expand Down
142 changes: 108 additions & 34 deletions esi/src/new_parse.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
use nom::branch::alt;
use nom::bytes::complete::is_not as complete_is_not;
use nom::bytes::streaming::*;
use nom::character::streaming::*;
use nom::combinator::{complete, map, not, peek, recognize, success, verify};
use nom::combinator::{complete, map, map_res, not, peek, recognize, success, verify};
use nom::error::{Error, ParseError};
use nom::multi::{fold_many0, length_data, many0, many_till};
use nom::multi::{fold_many0, length_data, length_value, many0, many1, many_till};
use nom::sequence::{delimited, pair, preceded, separated_pair, tuple};
use nom::{IResult, Parser};

#[derive(Debug)]
enum Chunk<'a> {
EsiStartTag(&'a str, Vec<(&'a str, &'a str)>),
EsiEndTag(&'a str),
Text(&'a str),
}
use crate::parser_types::*;

fn parse(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
fold_many0(
Expand All @@ -29,35 +25,62 @@ fn chunk(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
alt((text, esi_tag, html))(input)
}

fn parse_interpolated(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
fold_many0(
complete(interpolated_chunk),
Vec::new,
|mut acc: Vec<Chunk>, mut item| {
acc.append(&mut item);
acc
},
)(input)
}

fn interpolated_chunk(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
alt((interpolated_text, interpolation, esi_tag, html))(input)
}

fn esi_tag(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
alt((esi_start_tag, esi_end_tag))(input)
alt((esi_vars,))(input)
}
fn esi_start_tag(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map(
delimited(tag("<esi:"), pair(esi_tag_name, attributes), char('>')),
|(tagname, attrs)| vec![Chunk::EsiStartTag(tagname, attrs)],
)(input)

fn esi_vars(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
alt((esi_vars_short, esi_vars_long))(input)
}
fn esi_end_tag(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map(
delimited(tag("</esi:"), is_not(">"), char('>')),
|s: &str| vec![Chunk::EsiEndTag(s)],

fn esi_vars_short(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map_res(
delimited(
tag("<esi:vars"),
attributes,
preceded(multispace0, alt((tag(">"), tag("/>")))),
),
|attrs| {
if let Some((k, v)) = attrs.iter().find(|(k, v)| *k == "name") {
Ok(vec![Chunk::Expr(v)])
} else {
Err("no name field in short form vars")
}
},
)(input)
}

fn esi_tag_name(input: &str) -> IResult<&str, &str, Error<&str>> {
tag("vars")(input)
fn esi_vars_long(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map(
delimited(tag("<esi:vars>"), parse_interpolated, tag("</esi:vars>")),
|v| v,
)(input)
}

fn attributes(input: &str) -> IResult<&str, Vec<(&str, &str)>, Error<&str>> {
many0(separated_pair(
preceded(multispace1, alpha1),
char('='),
xmlstring,
htmlstring,
))(input)
}

fn xmlstring(input: &str) -> IResult<&str, &str, Error<&str>> {
fn htmlstring(input: &str) -> IResult<&str, &str, Error<&str>> {
delimited(char('"'), is_not("\""), char('"'))(input) // TODO: obviously wrong
}

Expand All @@ -84,26 +107,45 @@ fn script(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
)),
|(start, script, end)| {
println!("script parser succeeded");
vec![Chunk::Text(start), Chunk::Text(script), Chunk::Text(end)]
vec![Chunk::Html(start), Chunk::Text(script), Chunk::Html(end)]
},
)(input)
}

fn end_tag(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map(
recognize(delimited(tag("</"), is_not(">"), char('>'))),
|s: &str| vec![Chunk::Text(s)],
verify(
recognize(delimited(tag("</"), is_not(">"), char('>'))),
|s: &str| !s.starts_with("</esi:"),
),
|s: &str| vec![Chunk::Html(s)],
)(input)
}

fn start_tag(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map(
recognize(delimited(char('<'), is_not(">"), char('>'))),
|s: &str| vec![Chunk::Text(s)],
verify(
recognize(delimited(char('<'), is_not(">"), char('>'))),
|s: &str| !s.starts_with("</") && !s.starts_with("<esi:"),
),
|s: &str| vec![Chunk::Html(s)],
)(input)
}
fn text(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map(take_until1("<"), |s: &str| vec![Chunk::Text(s)])(input)
map(recognize(many1(complete_is_not("<"))), |s: &str| {
vec![Chunk::Text(s)]
})(input)
}
fn interpolated_text(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map(recognize(many1(complete_is_not("<$"))), |s: &str| {
vec![Chunk::Text(s)]
})(input)
}
fn interpolation(input: &str) -> IResult<&str, Vec<Chunk>, Error<&str>> {
map(
recognize(delimited(tag("$("), is_not(")"), tag(")"))),
|s: &str| vec![Chunk::Expr(s)],
)(input)
}

#[cfg(test)]
Expand All @@ -112,10 +154,20 @@ mod tests {

#[test]
fn test_new_parse() {
let x = parse(
"<a>foo</a><bar />baz<esi:vars name=\"$hello\"><sCripT src=\"whatever\"><baz><script> less < more </script>",
);
println!("{:?}", x);
let input = r#"
<a>foo</a>
<bar />
baz
<esi:vars name="$hello">
<esi:vars>
hello <br>
</esi:vars>
<sCripT src="whatever">
<baz>
<script> less < more </script>"#;
let output = parse(input);
println!("{input}");
println!("{:?}", output);
}
#[test]
fn test_new_parse_script() {
Expand All @@ -128,8 +180,30 @@ mod tests {
println!("{:?}", x);
}
#[test]
fn test_new_parse_esi_tag() {
let x = esi_start_tag("<esi:vars foo=\"hello\">");
fn test_new_parse_esi_vars_short() {
let x = esi_tag(r#"<esi:vars name="hello">"#);
println!("{:?}", x);
}
#[test]
fn test_new_parse_esi_vars_long() {
let x = esi_tag(
r#"<esi:vars>hello<esi:vars><br></esi:vars><esi:vars name="bleh" />there</esi:vars>"#,
);
println!("{:?}", x);
}
#[test]
fn test_new_parse_plain_text() {
let x = parse("hello\nthere");
println!("{:?}", x);
}
#[test]
fn test_new_parse_esi_end_tag() {
let x = parse("</esi:vars>");
println!("{:?}", x);
}
#[test]
fn test_new_parse_interpolated() {
let x = parse("hello $(foo)<esi:vars>goodbye $(foo)</esi:vars>");
println!("{:?}", x);
}
}
21 changes: 21 additions & 0 deletions esi/src/parser_types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#[derive(Debug)]
pub enum Tag {
Vars,
Include,
Text,
Choose,
When,
Otherwise,
Try,
Attempt,
Except,
}

#[derive(Debug)]
pub enum Chunk<'a> {
EsiStartTag(Tag, Vec<(&'a str, &'a str)>),
EsiEndTag(Tag),
Expr(&'a str),
Html(&'a str),
Text(&'a str),
}

0 comments on commit 4455dda

Please sign in to comment.