Skip to content

Commit

Permalink
Updated expression parsers to explicitly encode precedence rather tha…
Browse files Browse the repository at this point in the history
…n reconstruct it after the fact. This makes debugging and modifying expression parsing behavior much simpler. Other minor syntax enhancements that bring query success rate up to 62% and command success rate up to 87%.
  • Loading branch information
scnerd committed Oct 2, 2024
1 parent 5eaa780 commit 90ce8d2
Show file tree
Hide file tree
Showing 4 changed files with 709 additions and 233 deletions.
90 changes: 64 additions & 26 deletions src/commands/cmd_search/spl.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
use crate::commands::spl::{SplCommand, SplCommandOptions};
use crate::spl::ast::{Expr, ParsedCommandOptions};
use crate::spl::operators;
use crate::spl::operators::OperatorSymbolTrait;
use crate::spl::parser::{expr, ws};
use crate::spl::parser::{combine_all_expressions, expr, space_separated_list1};
use crate::spl::python::impl_pyclass;
use crate::spl::{ast, operators};
use nom::branch::alt;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::{multispace0, multispace1};
use nom::combinator::{eof, map, verify};
use nom::multi::fold_many_m_n;
use nom::combinator::{eof, map};
use nom::sequence::tuple;
use nom::IResult;
use pyo3::prelude::*;
Expand Down Expand Up @@ -42,26 +41,9 @@ impl SplCommand<SearchCommand> for SearchParser {
type Options = SearchCommandOptions;

fn parse_body(input: &str) -> IResult<&str, SearchCommand> {
map(
verify(
fold_many_m_n(
1, // <-- differs from original code, but I don't see how 0 makes sense
100,
ws(expr),
|| None,
|a, b| match a {
None => Some(b),
Some(a) => Some(Expr::Binary(ast::Binary {
left: Box::new(a),
symbol: operators::And::SYMBOL.into(),
right: Box::new(b),
})),
},
),
|v| v.is_some(),
),
|v| SearchCommand { expr: v.unwrap() },
)(input)
map(space_separated_list1(expr), |exprs| SearchCommand {
expr: combine_all_expressions(exprs, operators::And::SYMBOL).unwrap(),
})(input)
}

fn match_name(input: &str) -> IResult<&str, ()> {
Expand All @@ -78,6 +60,7 @@ impl SplCommand<SearchCommand> for SearchParser {
#[cfg(test)]
mod tests {
use super::*;
use crate::spl::ast;
use crate::spl::parser::field_in;
use crate::spl::utils::test::*;

Expand Down Expand Up @@ -370,7 +353,7 @@ mod tests {
let query = r#"search
query!="SELECT * FROM Win32_ProcessStartTrace WHERE ProcessName = 'wsmprovhost.exe'"
AND query!="SELECT * FROM __InstanceOperationEvent WHERE TargetInstance ISA 'AntiVirusProduct' OR TargetInstance ISA 'FirewallProduct' OR TargetInstance ISA 'AntiSpywareProduct'"
"#;
"#.trim();

assert_eq!(
SearchParser::parse(query),
Expand All @@ -388,7 +371,62 @@ mod tests {
)
)
}
))
))
);
}

#[test]
fn test_search_9() {
let query = r#"
sourcetype=XmlWinEventLog:Microsoft-Windows-Sysmon/Operational
OR source=XmlWinEventLog:Microsoft-Windows-Sysmon/Operational
OR source=Syslog:Linux-Sysmon/Operational (process_name=3CXDesktopApp.exe OR OriginalFileName=3CXDesktopApp.exe) FileVersion=18.12.*"#;

assert_eq!(
SearchParser::parse(query),
Ok((
"",
SearchCommand {
expr: _and(
_and(
_or(
_eq(
ast::Field::from("sourcetype"),
ast::Field::from(
"XmlWinEventLog:Microsoft-Windows-Sysmon/Operational"
)
),
_or(
_eq(
ast::Field::from("source"),
ast::Field::from(
"XmlWinEventLog:Microsoft-Windows-Sysmon/Operational"
)
),
_eq(
ast::Field::from("source"),
ast::Field::from("Syslog:Linux-Sysmon/Operational")
),
),
),
_or(
_eq(
ast::Field::from("process_name"),
ast::Field::from("3CXDesktopApp.exe")
),
_eq(
ast::Field::from("OriginalFileName"),
ast::Field::from("3CXDesktopApp.exe")
)
)
),
_eq(
ast::Field::from("FileVersion"),
ast::Wildcard::from("18.12.*")
)
)
}
))
);
}
}
211 changes: 185 additions & 26 deletions src/commands/cmd_t_stats/spl.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
use crate::commands::spl::{SplCommand, SplCommandOptions};
use crate::spl::ast::{
Binary, Call, Constant, Expr, Field, LeafExpr, ParsedCommandOptions, TimeSpan,
};
use crate::spl::operators;
use crate::spl::operators::OperatorSymbolTrait;
use crate::spl::ast::{Call, Expr, Field, ParsedCommandOptions, TimeSpan};
use crate::spl::parser::{
comma_or_space_separated_list1, field, field_in, logical_expression, space_separated_list1,
time_span, token, unwrapped_option, ws,
time_span, token, ws,
};
use crate::spl::python::impl_pyclass;
use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case};
use nom::character::complete::multispace1;
use nom::combinator::{eof, into, map, opt, recognize, verify};
use nom::multi::{fold_many1, separated_list1};
use nom::multi::separated_list1;
use nom::sequence::{delimited, pair, preceded, separated_pair, tuple};
use nom::IResult;
use pyo3::prelude::*;
Expand Down Expand Up @@ -154,24 +150,7 @@ fn _parse_from_datamodel(input: &str) -> IResult<&str, (String, Option<String>)>
fn _parse_where(input: &str) -> IResult<&str, Expr> {
preceded(
ws(tag_no_case("where")),
ws(unwrapped_option(alt((
fold_many1(
ws(verify(
preceded(opt(ws(tag_no_case("AND"))), logical_expression),
|e| !matches!(e, Expr::Leaf(LeafExpr::Constant(Constant::Field(Field(name)))) if name.to_ascii_lowercase() == "by"),
)),
|| None,
|a, b| match a {
None => Some(b),
Some(a) => Some(Expr::Binary(Binary {
left: Box::new(a),
symbol: operators::And::SYMBOL.into(),
right: Box::new(b),
})),
},
),
map(field_in, |expr| Some(expr.into())),
)))),
ws(alt((logical_expression, into(field_in)))),
)(input)
}

Expand Down Expand Up @@ -321,11 +300,12 @@ impl SplCommand<TStatsCommand> for TStatsParser {
mod tests {
use super::*;
use crate::spl::ast;
use crate::spl::parser::logical_expression_term;
use crate::spl::utils::test::*;
use nom::combinator::all_consuming;

#[test]
fn test_xsl_script_execution_with_wmic_1() {
fn test_tstats_1() {
let query = r#"tstats summariesonly=false allow_old_summaries=true fillnull_value=null
count min(_time) as firstTime max(_time) as lastTime
from datamodel=Endpoint.Processes
Expand Down Expand Up @@ -756,4 +736,183 @@ mod tests {
))
);
}

#[test]
fn test_tstats_8() {
let query = r#"tstats
summariesonly=false allow_old_summaries=true fillnull_value=null
count min(_time) as firstTime max(_time) as lastTime
from datamodel=Endpoint.Filesystem
where NOT(Filesystem.file_path IN ("*\\Program Files*")) Filesystem.file_name = *.url
by Filesystem.file_create_time Filesystem.process_id Filesystem.file_name Filesystem.user Filesystem.file_path Filesystem.process_guid Filesystem.dest
"#;

assert_eq!(
all_consuming(TStatsParser::parse)(query),
Ok((
"",
TStatsCommand {
prestats: false,
local: false,
append: false,
summaries_only: false,
include_reduced_buckets: false,
allow_old_summaries: true,
chunk_size: 10000000,
fillnull_value: Some("null".into()),
exprs: vec![
_call!(count()).into(),
_alias("firstTime", _call!(min(ast::Field::from("_time")))).into(),
_alias("lastTime", _call!(max(ast::Field::from("_time")))).into(),
],
datamodel: Some("Endpoint.Filesystem".into()),
nodename: None,
where_condition: Some(_and(
_not(_isin(
"Filesystem.file_path",
vec![ast::Wildcard::from(r#"*\\Program Files*"#).into(),]
)),
_eq(
ast::Field::from("Filesystem.file_name"),
ast::Wildcard::from(r#"*.url"#)
)
)),
by_fields: Some(vec![
ast::Field::from("Filesystem.file_create_time").into(),
ast::Field::from("Filesystem.process_id").into(),
ast::Field::from("Filesystem.file_name").into(),
ast::Field::from("Filesystem.user").into(),
ast::Field::from("Filesystem.file_path").into(),
ast::Field::from("Filesystem.process_guid").into(),
ast::Field::from("Filesystem.dest").into(),
]),
by_prefix: None,
}
))
);
}

#[test]
fn test_tstats_9() {
let query = r#"tstats
summariesonly=false allow_old_summaries=true fillnull_value=null
count min(_time) as firstTime max(_time) as lastTime
from datamodel=Endpoint.Processes
where
Processes.parent_process_name IN ("sqlservr.exe", "sqlagent.exe", "sqlps.exe", "launchpad.exe", "sqldumper.exe")
(Processes.process_name=certutil.exe OR Processes.original_file_name=CertUtil.exe)
(Processes.process=*urlcache* Processes.process=*split*)
OR Processes.process=*urlcache*
by Processes.dest Processes.user Processes.parent_process Processes.parent_process_name Processes.process_name Processes.process Processes.process_id Processes.original_file_name Processes.parent_process_id
"#;

assert_eq!(
logical_expression_term(r#"Processes.process=*urlcache* Processes.process=*split*"#)
.unwrap()
.0,
" Processes.process=*split*"
);
assert_eq!(
logical_expression_term(r#"Processes.process=*split*"#)
.unwrap()
.0,
""
);
assert_eq!(
logical_expression(r#"Processes.process=*urlcache* AND Processes.process=*split*"#)
.unwrap()
.0,
""
);
assert_eq!(
logical_expression_term(
r#"(Processes.process=*urlcache* AND Processes.process=*split*)"#
)
.unwrap()
.0,
""
);
assert_eq!(
logical_expression_term(r#"(Processes.process=*urlcache* Processes.process=*split*)"#)
.unwrap()
.0,
""
);

assert_eq!(
all_consuming(TStatsParser::parse)(query),
Ok((
"",
TStatsCommand {
prestats: false,
local: false,
append: false,
summaries_only: false,
include_reduced_buckets: false,
allow_old_summaries: true,
chunk_size: 10000000,
fillnull_value: Some("null".into()),
exprs: vec![
_call!(count()).into(),
_alias("firstTime", _call!(min(ast::Field::from("_time")))).into(),
_alias("lastTime", _call!(max(ast::Field::from("_time")))).into(),
],
datamodel: Some("Endpoint.Processes".into()),
nodename: None,
where_condition: Some(_or(
_and(
_and(
_isin(
"Processes.parent_process_name",
vec![
ast::StrValue::from("sqlservr.exe").into(),
ast::StrValue::from("sqlagent.exe").into(),
ast::StrValue::from("sqlps.exe").into(),
ast::StrValue::from("launchpad.exe").into(),
ast::StrValue::from("sqldumper.exe").into(),
]
),
_or(
_eq(
ast::Field::from("Processes.process_name"),
ast::StrValue::from("certutil.exe")
),
_eq(
ast::Field::from("Processes.original_file_name"),
ast::StrValue::from("CertUtil.exe")
),
),
),
_and(
_eq(
ast::Field::from("Processes.process"),
ast::Wildcard::from("*urlcache*")
),
_eq(
ast::Field::from("Processes.process"),
ast::Wildcard::from("*split*")
),
),
),
_eq(
ast::Field::from("Processes.process"),
ast::Wildcard::from("*urlcache*")
),
)),
by_fields: Some(vec![
ast::Field::from("Processes.dest").into(),
ast::Field::from("Processes.user").into(),
ast::Field::from("Processes.parent_process").into(),
ast::Field::from("Processes.parent_process_name").into(),
ast::Field::from("Processes.process_name").into(),
ast::Field::from("Processes.process").into(),
ast::Field::from("Processes.process_id").into(),
ast::Field::from("Processes.original_file_name").into(),
ast::Field::from("Processes.parent_process_id").into(),
]),
by_prefix: None,
}
))
);
}
}
Loading

0 comments on commit 90ce8d2

Please sign in to comment.