diff --git a/src/commands/cmd_add_totals/spl.rs b/src/commands/cmd_add_totals/spl.rs index ba5cb72..990766d 100644 --- a/src/commands/cmd_add_totals/spl.rs +++ b/src/commands/cmd_add_totals/spl.rs @@ -5,7 +5,7 @@ use crate::spl::{field, ws}; use nom::combinator::map; use nom::multi::many0; use nom::sequence::pair; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def cAddtotals[_: P]: P[AddTotals] = "addtotals" ~ commandOptions ~ field.rep(1).? map { diff --git a/src/commands/cmd_bin/spl.rs b/src/commands/cmd_bin/spl.rs index 3bce8a7..f3964b8 100644 --- a/src/commands/cmd_bin/spl.rs +++ b/src/commands/cmd_bin/spl.rs @@ -6,7 +6,7 @@ use crate::spl::{aliased_field, field}; use nom::branch::alt; use nom::combinator::{into, map}; use nom::sequence::pair; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // // bin [...] [AS ] diff --git a/src/commands/cmd_collect/pyspark.rs b/src/commands/cmd_collect/pyspark.rs index 27aa055..0850489 100644 --- a/src/commands/cmd_collect/pyspark.rs +++ b/src/commands/cmd_collect/pyspark.rs @@ -2,6 +2,7 @@ use super::spl::*; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for CollectCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_collect/spl.rs b/src/commands/cmd_collect/spl.rs index 8729e72..cd115c0 100644 --- a/src/commands/cmd_collect/spl.rs +++ b/src/commands/cmd_collect/spl.rs @@ -5,7 +5,7 @@ use crate::spl::field_list; use anyhow::anyhow; use nom::combinator::map; use nom::sequence::pair; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def collect[_: P]: P[CollectCommand] = "collect" ~ commandOptions ~ fieldList map { diff --git a/src/commands/cmd_dedup/pyspark.rs b/src/commands/cmd_dedup/pyspark.rs index 0e97b7d..05296a0 100644 --- a/src/commands/cmd_dedup/pyspark.rs +++ b/src/commands/cmd_dedup/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_dedup::spl::DedupCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for DedupCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_dedup/spl.rs b/src/commands/cmd_dedup/spl.rs index a1c7601..78e40a5 100644 --- a/src/commands/cmd_dedup/spl.rs +++ b/src/commands/cmd_dedup/spl.rs @@ -9,7 +9,7 @@ use nom::bytes::complete::{tag, tag_no_case}; use nom::combinator::{map, opt, verify}; use nom::multi::many1; use nom::sequence::{pair, preceded, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; #[derive(Debug, PartialEq, Clone, Hash)] diff --git a/src/commands/cmd_eval/spl.rs b/src/commands/cmd_eval/spl.rs index 3b0315c..c0e8a93 100644 --- a/src/commands/cmd_eval/spl.rs +++ b/src/commands/cmd_eval/spl.rs @@ -6,7 +6,7 @@ use nom::bytes::complete::tag; use nom::combinator::map; use nom::multi::separated_list0; use nom::sequence::separated_pair; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def eval[_: P]: P[EvalCommand] = "eval" ~ (field ~ "=" ~ expr).rep(sep = ",") map EvalCommand diff --git a/src/commands/cmd_event_stats/pyspark.rs b/src/commands/cmd_event_stats/pyspark.rs index 16e96a2..9cb35f4 100644 --- a/src/commands/cmd_event_stats/pyspark.rs +++ b/src/commands/cmd_event_stats/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_event_stats::spl::EventStatsCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for EventStatsCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_event_stats/spl.rs b/src/commands/cmd_event_stats/spl.rs index 60e0824..498560a 100644 --- a/src/commands/cmd_event_stats/spl.rs +++ b/src/commands/cmd_event_stats/spl.rs @@ -5,7 +5,7 @@ use crate::spl::{field_list, stats_call, ws}; use nom::bytes::complete::tag_no_case; use nom::combinator::{map, opt}; use nom::sequence::{preceded, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def eventStats[_: P]: P[EventStatsCommand] = ("eventstats" ~ commandOptions ~ statsCall diff --git a/src/commands/cmd_fields/spl.rs b/src/commands/cmd_fields/spl.rs index 819a8ea..fc45c68 100644 --- a/src/commands/cmd_fields/spl.rs +++ b/src/commands/cmd_fields/spl.rs @@ -7,7 +7,7 @@ use nom::bytes::complete::tag; use nom::combinator::{map, opt}; use nom::multi::separated_list1; use nom::sequence::tuple; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // /* diff --git a/src/commands/cmd_fill_null/pyspark.rs b/src/commands/cmd_fill_null/pyspark.rs index a95fbe1..ae3bca4 100644 --- a/src/commands/cmd_fill_null/pyspark.rs +++ b/src/commands/cmd_fill_null/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_fill_null::spl::FillNullCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for FillNullCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_fill_null/spl.rs b/src/commands/cmd_fill_null/spl.rs index 743be36..348e613 100644 --- a/src/commands/cmd_fill_null/spl.rs +++ b/src/commands/cmd_fill_null/spl.rs @@ -7,7 +7,7 @@ use nom::bytes::complete::tag; use nom::combinator::{map, opt}; use nom::multi::many1; use nom::sequence::{preceded, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def fillNull[_: P]: P[FillNullCommand] = ("fillnull" ~ ("value=" ~~ (doubleQuoted|token)).? diff --git a/src/commands/cmd_format/pyspark.rs b/src/commands/cmd_format/pyspark.rs index 4629cdb..4e2842e 100644 --- a/src/commands/cmd_format/pyspark.rs +++ b/src/commands/cmd_format/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_format::spl::FormatCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for FormatCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_format/spl.rs b/src/commands/cmd_format/spl.rs index 18e0c8f..e0658f0 100644 --- a/src/commands/cmd_format/spl.rs +++ b/src/commands/cmd_format/spl.rs @@ -4,7 +4,7 @@ use crate::commands::spl::{SplCommand, SplCommandOptions}; use crate::spl::{double_quoted, ws}; use nom::combinator::{map, opt}; use nom::sequence::{pair, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def format[_: P]: P[FormatCommand] = ("format" ~ commandOptions ~ doubleQuoted.rep(6).?) map { diff --git a/src/commands/cmd_head/spl.rs b/src/commands/cmd_head/spl.rs index 716c7e6..12058dd 100644 --- a/src/commands/cmd_head/spl.rs +++ b/src/commands/cmd_head/spl.rs @@ -6,7 +6,7 @@ use nom::branch::alt; use nom::bytes::complete::tag_no_case; use nom::combinator::{map, opt}; use nom::sequence::{preceded, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // /** diff --git a/src/commands/cmd_input_lookup/pyspark.rs b/src/commands/cmd_input_lookup/pyspark.rs index 71e1804..b46d7f4 100644 --- a/src/commands/cmd_input_lookup/pyspark.rs +++ b/src/commands/cmd_input_lookup/pyspark.rs @@ -1,6 +1,7 @@ use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for super::spl::InputLookup { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_input_lookup/spl.rs b/src/commands/cmd_input_lookup/spl.rs index 7265c0e..cb990af 100644 --- a/src/commands/cmd_input_lookup/spl.rs +++ b/src/commands/cmd_input_lookup/spl.rs @@ -5,7 +5,7 @@ use crate::spl::{expr, token, ws}; use nom::bytes::complete::tag_no_case; use nom::combinator::{map, opt}; use nom::sequence::{preceded, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def inputLookup[_: P]: P[InputLookup] = diff --git a/src/commands/cmd_join/pyspark.rs b/src/commands/cmd_join/pyspark.rs index 670ca5f..90d7b25 100644 --- a/src/commands/cmd_join/pyspark.rs +++ b/src/commands/cmd_join/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_join::spl::JoinCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for JoinCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_join/spl.rs b/src/commands/cmd_join/spl.rs index dc0cdcd..9a640c0 100644 --- a/src/commands/cmd_join/spl.rs +++ b/src/commands/cmd_join/spl.rs @@ -6,7 +6,7 @@ use nom::bytes::complete::tag; use nom::combinator::map; use nom::multi::separated_list1; use nom::sequence::tuple; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // def join[_: P]: P[JoinCommand] = // ("join" ~ commandOptions ~ field.rep(min = 1, sep = ",") ~ subSearch) map { diff --git a/src/commands/cmd_lookup/pyspark.rs b/src/commands/cmd_lookup/pyspark.rs index 924dce8..7063911 100644 --- a/src/commands/cmd_lookup/pyspark.rs +++ b/src/commands/cmd_lookup/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_lookup::spl::LookupCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for LookupCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_lookup/spl.rs b/src/commands/cmd_lookup/spl.rs index 7712627..89c3c69 100644 --- a/src/commands/cmd_lookup/spl.rs +++ b/src/commands/cmd_lookup/spl.rs @@ -7,7 +7,7 @@ use nom::bytes::complete::tag_no_case; use nom::character::complete::multispace1; use nom::combinator::{map, opt}; use nom::sequence::{separated_pair, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; #[derive(Debug, PartialEq, Clone, Hash)] diff --git a/src/commands/cmd_make_results/pyspark.rs b/src/commands/cmd_make_results/pyspark.rs index 1dadf14..86d205f 100644 --- a/src/commands/cmd_make_results/pyspark.rs +++ b/src/commands/cmd_make_results/pyspark.rs @@ -1,6 +1,7 @@ use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for super::spl::MakeResults { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_make_results/spl.rs b/src/commands/cmd_make_results/spl.rs index 645205a..60d12d0 100644 --- a/src/commands/cmd_make_results/spl.rs +++ b/src/commands/cmd_make_results/spl.rs @@ -2,7 +2,7 @@ use crate::ast::ast::ParsedCommandOptions; use crate::ast::python::impl_pyclass; use crate::commands::spl::{SplCommand, SplCommandOptions}; use nom::combinator::map; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def makeResults[_: P]: P[MakeResults] = ("makeresults" ~ commandOptions) map { diff --git a/src/commands/cmd_map/spl.rs b/src/commands/cmd_map/spl.rs index a6723d3..82f3e00 100644 --- a/src/commands/cmd_map/spl.rs +++ b/src/commands/cmd_map/spl.rs @@ -4,7 +4,7 @@ use crate::commands::spl::{SplCommand, SplCommandOptions}; use crate::spl::quoted_search; use nom::combinator::map; use nom::sequence::pair; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def _map[_: P]: P[MapCommand] = "map" ~ quotedSearch ~ commandOptions map { diff --git a/src/commands/cmd_mv_combine/pyspark.rs b/src/commands/cmd_mv_combine/pyspark.rs index dc59654..eb4af5c 100644 --- a/src/commands/cmd_mv_combine/pyspark.rs +++ b/src/commands/cmd_mv_combine/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_mv_combine::spl::MvCombineCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for MvCombineCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_mv_combine/spl.rs b/src/commands/cmd_mv_combine/spl.rs index 8cafb29..73daf98 100644 --- a/src/commands/cmd_mv_combine/spl.rs +++ b/src/commands/cmd_mv_combine/spl.rs @@ -5,7 +5,7 @@ use crate::spl::{double_quoted, field, ws}; use nom::bytes::complete::{tag, tag_no_case}; use nom::combinator::{map, opt}; use nom::sequence::{pair, preceded}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def mvcombine[_: P]: P[MvCombineCommand] = ("mvcombine" ~ ("delim" ~ "=" ~ doubleQuoted).? diff --git a/src/commands/cmd_mv_expand/pyspark.rs b/src/commands/cmd_mv_expand/pyspark.rs index fb053df..7efdea0 100644 --- a/src/commands/cmd_mv_expand/pyspark.rs +++ b/src/commands/cmd_mv_expand/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_mv_expand::spl::MvExpandCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for MvExpandCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_mv_expand/spl.rs b/src/commands/cmd_mv_expand/spl.rs index 7ca6d31..bf64746 100644 --- a/src/commands/cmd_mv_expand/spl.rs +++ b/src/commands/cmd_mv_expand/spl.rs @@ -5,7 +5,7 @@ use crate::spl::{field, int, ws}; use nom::bytes::complete::{tag, tag_no_case}; use nom::combinator::{map, opt}; use nom::sequence::{pair, preceded}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def mvexpand[_: P]: P[MvExpandCommand] = ("mvexpand" ~ field ~ ("limit" ~ "=" ~ int).?) map { diff --git a/src/commands/cmd_regex/pyspark.rs b/src/commands/cmd_regex/pyspark.rs index 283f0ac..55b55a6 100644 --- a/src/commands/cmd_regex/pyspark.rs +++ b/src/commands/cmd_regex/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_regex::spl::RegexCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for RegexCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_regex/spl.rs b/src/commands/cmd_regex/spl.rs index 8f05c3c..52ed00c 100644 --- a/src/commands/cmd_regex/spl.rs +++ b/src/commands/cmd_regex/spl.rs @@ -6,7 +6,7 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::combinator::{map, opt}; use nom::sequence::pair; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // def _regex[_: P]: P[RegexCommand] = // "regex" ~ (field ~ ("="|"!=").!).? ~ doubleQuoted map RegexCommand.tupled diff --git a/src/commands/cmd_rename/pyspark.rs b/src/commands/cmd_rename/pyspark.rs index 7b24893..14b9e89 100644 --- a/src/commands/cmd_rename/pyspark.rs +++ b/src/commands/cmd_rename/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_rename::spl::RenameCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for RenameCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_rename/spl.rs b/src/commands/cmd_rename/spl.rs index 0f43e60..1519bf3 100644 --- a/src/commands/cmd_rename/spl.rs +++ b/src/commands/cmd_rename/spl.rs @@ -5,7 +5,7 @@ use crate::spl::{aliased_field, ws}; use nom::bytes::complete::tag; use nom::combinator::map; use nom::multi::separated_list1; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def rename[_: P]: P[RenameCommand] = diff --git a/src/commands/cmd_return/pyspark.rs b/src/commands/cmd_return/pyspark.rs index 41015b3..44675a2 100644 --- a/src/commands/cmd_return/pyspark.rs +++ b/src/commands/cmd_return/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_return::spl::ReturnCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for ReturnCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_return/spl.rs b/src/commands/cmd_return/spl.rs index 9e2acb8..f08bc0e 100644 --- a/src/commands/cmd_return/spl.rs +++ b/src/commands/cmd_return/spl.rs @@ -8,7 +8,7 @@ use nom::bytes::complete::tag; use nom::combinator::{map, opt}; use nom::multi::many1; use nom::sequence::{preceded, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def _return[_: P]: P[ReturnCommand] = "return" ~ int.? ~ ( diff --git a/src/commands/cmd_rex/pyspark.rs b/src/commands/cmd_rex/pyspark.rs index 9a850db..7b8d056 100644 --- a/src/commands/cmd_rex/pyspark.rs +++ b/src/commands/cmd_rex/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_rex::spl::RexCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for RexCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_rex/spl.rs b/src/commands/cmd_rex/spl.rs index 5192319..0049f91 100644 --- a/src/commands/cmd_rex/spl.rs +++ b/src/commands/cmd_rex/spl.rs @@ -4,7 +4,7 @@ use crate::commands::spl::{SplCommand, SplCommandOptions}; use crate::spl::double_quoted; use nom::combinator::map; use nom::sequence::pair; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // // https://docs.splunk.com/Documentation/Splunk/8.2.2/SearchReference/Rex diff --git a/src/commands/cmd_search/pyspark.rs b/src/commands/cmd_search/pyspark.rs index 7faf764..e0eacf4 100644 --- a/src/commands/cmd_search/pyspark.rs +++ b/src/commands/cmd_search/pyspark.rs @@ -2,30 +2,221 @@ use super::spl::*; use crate::ast::ast; use crate::pyspark::ast::*; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; -use anyhow::anyhow; +use anyhow::{bail, ensure, Result}; +use std::collections::HashSet; + +fn _is_index(expr: &ast::Expr) -> bool { + match expr { + ast::Expr::Leaf(ast::LeafExpr::Constant(ast::Constant::Field(ast::Field(name)))) + if name == "index" => + { + true + } + _ => false, + } +} + +fn split_conditions( + expr: &ast::Expr, + all_ands: bool, + indices: &mut HashSet, +) -> Result> { + match expr.clone() { + // index=lol should result in Source("lol") + ast::Expr::Binary(ast::Binary { + left, + symbol, + right, + }) => { + let left_is_index = _is_index(left.as_ref()); + let right_is_index = _is_index(right.as_ref()); + match (left_is_index, symbol.as_str(), right_is_index) { + (true, "=", false) | (false, "=", true) => { + ensure!(all_ands, "Cannot specify an index under an OR branch"); + let compare_value = if left_is_index { *right } else { *left }; + indices.insert(compare_value.try_into()?); + Ok(None) + } + (true, _, true) | (true, _, _) | (_, _, true) => { + bail!("Invalid index comparison: {:?}", expr) + } + (false, op, false) => { + let still_all_and = all_ands && op == "AND"; + let converted_left = split_conditions(left.as_ref(), still_all_and, indices)?; + let converted_right = split_conditions(right.as_ref(), still_all_and, indices)?; + match (converted_left, op, converted_right) { + (None, _, None) => Ok(None), + (Some(left), "AND", None) | (Some(left), "OR", None) => Ok(Some(left)), + (None, "AND", Some(right)) | (None, "OR", Some(right)) => Ok(Some(right)), + (None, _, _) | (_, _, None) => bail!("Cannot perform comparison {} when one side collapses into an index check", op), + (Some(left), symbol, Some(right)) => Ok(Some(ast::Binary { + left: Box::new(left), + symbol: symbol.into(), + right: Box::new(right), + }.into())) + } + } + } + } + exp => Ok(Some(exp)), + } +} impl PipelineTransformer for SearchCommand { fn transform(&self, state: PipelineTransformState) -> anyhow::Result { - match self.expr.clone() { - // index=lol should result in Source("lol") - ast::Expr::Binary(ast::Binary { - left, - symbol, - right, - }) if symbol == "=" && *left == ast::Field::from("index").into() => match *right { - ast::Expr::Leaf(ast::LeafExpr::Constant(ast::Constant::Field(ast::Field( - name, - )))) => Ok(PipelineTransformState { - df: DataFrame::source(name), - }), - _ => Err(anyhow!("Unsupported index assignment: {:?}", right)), - }, - exp => { - let condition: Expr = exp.try_into()?; - Ok(PipelineTransformState { - df: state.df.where_(condition), - }) + let mut indices = HashSet::new(); + let condition_expr = split_conditions(&self.expr, true, &mut indices)?; + let mut df = if !indices.is_empty() { + let mut _df: Option = None; + for new_index in indices.into_iter() { + let new_source = DataFrame::source(new_index); + _df = match (_df, new_source) { + (None, new_source) => Some(new_source), + (Some(cur_source), new_source) => Some(cur_source.union_by_name(new_source)), + } } - } + _df.unwrap() + } else { + state.df.clone() + }; + + df = match condition_expr { + None => df, + Some(condition) => { + let condition: Expr = condition.try_into()?; + df.where_(condition) + } + }; + + Ok(PipelineTransformState { df }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn check_split_results( + expr: impl Into, + mut expected_indices: Vec, + expected_condition: Option, + ) { + let mut indices = HashSet::new(); + let expr = expr.into(); + let condition: Option = split_conditions(&expr, true, &mut indices).unwrap(); + let converted_condition: Option = condition.map(|e| e.try_into().unwrap()); + let mut indices: Vec<_> = indices.into_iter().collect(); + indices.sort(); + expected_indices.sort(); + assert_eq!(indices, expected_indices); + assert_eq!(converted_condition, expected_condition); + } + + #[test] + fn test_split_conditions_simple_index() { + check_split_results( + ast::Binary { + left: Box::new(ast::Field::from("index").into()), + symbol: "=".into(), + right: Box::new(ast::Field::from("lol").into()), + }, + vec!["lol".into()], + None, + ); + } + + #[test] + fn test_split_conditions_no_index() { + check_split_results( + ast::Binary { + left: Box::new(ast::Field::from("x").into()), + symbol: "=".into(), + right: Box::new(ast::IntValue::from(2).into()), + }, + Vec::::new(), + Some(column_like!([col("x")] == [lit(2)]).into()), + ); + } + + #[test] + fn test_split_conditions_combined_index() { + check_split_results( + ast::Binary { + left: Box::new( + ast::Binary { + left: Box::new(ast::Field::from("x").into()), + symbol: "=".into(), + right: Box::new(ast::IntValue::from(2).into()), + } + .into(), + ), + symbol: "AND".into(), + right: Box::new( + ast::Binary { + left: Box::new(ast::Field::from("index").into()), + symbol: "=".into(), + right: Box::new(ast::Field::from("lol").into()), + } + .into(), + ), + }, + vec!["lol".into()], + Some(column_like!([col("x")] == [lit(2)]).into()), + ); + } + + #[test] + fn test_multi_index_conditions() { + check_split_results( + ast::Binary { + left: Box::new( + ast::Binary { + left: Box::new( + ast::Binary { + left: Box::new( + ast::Binary { + left: Box::new(ast::Field::from("index").into()), + symbol: "=".into(), + right: Box::new(ast::Field::from("lol").into()), + } + .into(), + ), + symbol: "AND".into(), + right: Box::new( + ast::Binary { + left: Box::new(ast::Field::from("x").into()), + symbol: "=".into(), + right: Box::new(ast::IntValue::from(2).into()), + } + .into(), + ), + } + .into(), + ), + symbol: "AND".into(), + right: Box::new( + ast::Binary { + left: Box::new(ast::Field::from("index").into()), + symbol: "=".into(), + right: Box::new(ast::Field::from("two").into()), + } + .into(), + ), + } + .into(), + ), + symbol: "AND".into(), + right: Box::new( + ast::Binary { + left: Box::new(ast::Field::from("y").into()), + symbol: ">".into(), + right: Box::new(ast::IntValue::from(3).into()), + } + .into(), + ), + }, + vec!["lol".into(), "two".into()], + Some(column_like!([[col("x")] == [lit(2)]] & [[col("y")] > [lit(3)]]).into()), + ); } } diff --git a/src/commands/cmd_search/spl.rs b/src/commands/cmd_search/spl.rs index a3294bc..97155f4 100644 --- a/src/commands/cmd_search/spl.rs +++ b/src/commands/cmd_search/spl.rs @@ -10,7 +10,7 @@ use nom::character::complete::{multispace0, multispace1}; use nom::combinator::{eof, map, verify}; use nom::multi::fold_many_m_n; use nom::sequence::tuple; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // def impliedSearch[_: P]: P[SearchCommand] = // "search".? ~ expr.rep(max = 100) map(_.reduce((a, b) => Binary(a, And, b))) map SearchCommand diff --git a/src/commands/cmd_sort/pyspark.rs b/src/commands/cmd_sort/pyspark.rs index b7e851c..1abef78 100644 --- a/src/commands/cmd_sort/pyspark.rs +++ b/src/commands/cmd_sort/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_sort::spl::SortCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for SortCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_sort/spl.rs b/src/commands/cmd_sort/spl.rs index c1fdc27..b085b7b 100644 --- a/src/commands/cmd_sort/spl.rs +++ b/src/commands/cmd_sort/spl.rs @@ -7,7 +7,7 @@ use nom::bytes::complete::tag; use nom::combinator::{map, opt}; use nom::multi::separated_list1; use nom::sequence::pair; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def sort[_: P]: P[SortCommand] = diff --git a/src/commands/cmd_stats/spl.rs b/src/commands/cmd_stats/spl.rs index 790859c..31a3570 100644 --- a/src/commands/cmd_stats/spl.rs +++ b/src/commands/cmd_stats/spl.rs @@ -6,7 +6,7 @@ use crate::spl::{bool_, field_list, stats_call, ws}; use nom::bytes::complete::{tag, tag_no_case}; use nom::combinator::{map, opt}; use nom::sequence::{pair, preceded, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def stats[_: P]: P[StatsCommand] = ("stats" ~ commandOptions ~ statsCall ~ diff --git a/src/commands/cmd_stream_stats/pyspark.rs b/src/commands/cmd_stream_stats/pyspark.rs index 56dd28f..d1da3fd 100644 --- a/src/commands/cmd_stream_stats/pyspark.rs +++ b/src/commands/cmd_stream_stats/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_stream_stats::spl::StreamStatsCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for StreamStatsCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_stream_stats/spl.rs b/src/commands/cmd_stream_stats/spl.rs index e68bf73..c461a38 100644 --- a/src/commands/cmd_stream_stats/spl.rs +++ b/src/commands/cmd_stream_stats/spl.rs @@ -5,7 +5,7 @@ use crate::spl::{field_list, stats_call, ws}; use nom::bytes::complete::tag_no_case; use nom::combinator::{map, opt}; use nom::sequence::{preceded, tuple}; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // def streamStats[_: P]: P[StreamStatsCommand] = ("streamstats" ~ commandOptions ~ statsCall diff --git a/src/commands/cmd_table/pyspark.rs b/src/commands/cmd_table/pyspark.rs index ef9fea4..f2d4b4a 100644 --- a/src/commands/cmd_table/pyspark.rs +++ b/src/commands/cmd_table/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_table::spl::TableCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for TableCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_table/spl.rs b/src/commands/cmd_table/spl.rs index ec5949c..7ff77ac 100644 --- a/src/commands/cmd_table/spl.rs +++ b/src/commands/cmd_table/spl.rs @@ -4,7 +4,7 @@ use crate::commands::spl::{SplCommand, SplCommandOptions}; use crate::spl::{field, ws}; use nom::combinator::map; use nom::multi::many1; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // def table[_: P]: P[TableCommand] = "table" ~ field.rep(1) map TableCommand diff --git a/src/commands/cmd_where/pyspark.rs b/src/commands/cmd_where/pyspark.rs index b35b14f..e88ee03 100644 --- a/src/commands/cmd_where/pyspark.rs +++ b/src/commands/cmd_where/pyspark.rs @@ -2,6 +2,7 @@ use crate::commands::cmd_where::spl::WhereCommand; use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer}; impl PipelineTransformer for WhereCommand { + #[allow(unused_variables, unreachable_code)] fn transform(&self, state: PipelineTransformState) -> anyhow::Result { let df = state.df; diff --git a/src/commands/cmd_where/spl.rs b/src/commands/cmd_where/spl.rs index 735fa55..aa112f4 100644 --- a/src/commands/cmd_where/spl.rs +++ b/src/commands/cmd_where/spl.rs @@ -3,7 +3,7 @@ use crate::ast::python::impl_pyclass; use crate::commands::spl::{SplCommand, SplCommandOptions}; use crate::spl::expr; use nom::combinator::map; -use nom::{IResult, Parser}; +use nom::IResult; use pyo3::prelude::*; // // where // def where[_: P]: P[WhereCommand] = "where" ~ expr map WhereCommand diff --git a/src/eval_fns/mod.rs b/src/eval_fns/mod.rs index 8a981db..4d97e3e 100644 --- a/src/eval_fns/mod.rs +++ b/src/eval_fns/mod.rs @@ -1,8 +1,10 @@ use crate::ast::ast; +use crate::pyspark::ast::column_like; use crate::pyspark::ast::*; use crate::pyspark::dealias::Dealias; use crate::pyspark::transpiler::utils::convert_time_format; -use anyhow::{bail, Result}; +use anyhow::{bail, ensure, Result}; +use std::any::type_name; /* https://docs.splunk.com/Documentation/SplunkCloud/9.2.2406/SearchReference/CommonEvalFunctions#Function_list_by_category @@ -140,29 +142,27 @@ tan(X) Computes the tangent of X. tanh(X) Computes the hyperbolic tangent of X. */ -trait EvalFunction { - const SPL_NAME: &'static str; - - fn to_column_like(&self) -> Result; -} - macro_rules! _eval_fn_args { ([$args:ident, $i:expr] ()) => {}; ([$args:ident, $i:ident] ($name:ident : $type:ty , $($tail:tt)*)) => { + ensure!($i < $args.len(), "Expected an argument for position {} ({}:{}), but only received {}", stringify!($i), stringify!($name), type_name::<$type>(), $args.len()); let $name: $type = map_arg(&$args[$i])?; $i += 1; _eval_fn_args!([$args,$i] ($($tail)*)); }; ([$args:ident, $i:ident] ($name:ident : $type:ty)) => { + ensure!($i < $args.len(), "Expected an argument for position {} ({}:{}), but only received {}", stringify!($i), stringify!($name), type_name::<$type>(), $args.len()); let $name: $type = map_arg(&$args[$i])?; $i += 1; }; ([$args:ident, $i:ident] ($name:ident , $($tail:tt)*)) => { + ensure!($i < $args.len(), "Expected an argument for position {} ({}:Expr), but only received {}", stringify!($i), stringify!($name), $args.len()); let $name: Expr = map_arg(&$args[$i])?; $i += 1; _eval_fn_args!([$args,$i] ($($tail)*)); }; ([$args:ident, $i:ident] ($name:ident)) => { + ensure!($i < $args.len(), "Expected an argument for position {} ({}:Expr), but only received {}", stringify!($i), stringify!($name), $args.len()); let $name: Expr = map_arg(&$args[$i])?; $i += 1; }; @@ -171,9 +171,6 @@ macro_rules! _eval_fn_args { macro_rules! eval_fn { ($name:ident [$arg_vec:ident] $args:tt { $out:expr }) => { { - use anyhow::ensure; - use crate::pyspark::ast::column_like; - let mut _i: usize = 0; _eval_fn_args!([$arg_vec, _i] $args); ensure!(_i == $arg_vec.len(), "Mistmatched number of arguments (code: {}, runtime: {}); fix arg list or assign remaining arguments using `eval_fn!({} [{} -> mapped_args] ...`", _i, $arg_vec.len(), stringify!($name), stringify!($arg_vec)); @@ -182,8 +179,6 @@ macro_rules! eval_fn { }; ($name:ident [$arg_vec:ident -> $mapped_arg_name:ident] $args:tt { $out:expr }) => { { - use crate::pyspark::ast::column_like; - let mut _i: usize = 0; _eval_fn_args!([$arg_vec, _i] $args); let $mapped_arg_name: Vec = map_args($arg_vec.iter().skip(_i).cloned().collect())?; @@ -360,7 +355,7 @@ pub fn eval_fn(call: ast::Call) -> Result { column_like!(coalesce(mapped_args)) }), // false() Returns FALSE. - "false" => eval_fn!(false [args] (x) { column_like!(lit(false)) }), + "false" => eval_fn!(false [args] () { column_like!(lit(false)) }), // if(,,) If the expression evaluates to TRUE, returns the , otherwise the function returns the . "if" => eval_fn!(if [args](condition, then_expr, else_expr) { column_like!([when([condition], [then_expr])].otherwise([else_expr])) @@ -376,7 +371,7 @@ pub fn eval_fn(call: ast::Call) -> Result { // match(, ) Returns TRUE if the regular expression finds a match against any substring of the string value . Otherwise returns FALSE. "match" => eval_fn!(match [args] (x, regex) { column_like!(regexp_like([x], [regex])) }), // null() This function takes no arguments and returns NULL. - "null" => eval_fn!(null [args] (x) { column_like!(lit(None)) }), + "null" => eval_fn!(null [args] () { column_like!(lit(None)) }), // nullif(,) Compares the values in two fields and returns NULL if the value in is equal to the value in . Otherwise returns the value in . "nullif" => eval_fn!(nullif [args] (x, y) { column_like!(nullif([x], [y])) }), // searchmatch() Returns TRUE if the event matches the search string. @@ -384,7 +379,7 @@ pub fn eval_fn(call: ast::Call) -> Result { unimplemented!("Unsupported function: {}", name) } // true() Returns TRUE. - "true" => eval_fn!(true [args] (x) { column_like!(lit(true)) }), + "true" => eval_fn!(true [args] () { column_like!(lit(true)) }), // validate(, ,...) Takes a list of conditions and values and returns the value that corresponds to the condition that evaluates to FALSE. This function defaults to NULL if all conditions evaluate to TRUE. This function is the opposite of the case function. "validate" => { unimplemented!("Unsupported function: {}", name) @@ -690,3 +685,29 @@ pub fn eval_fn(call: ast::Call) -> Result { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_simple_function_max() { + let result = eval_fn(ast::Call { + name: "max".to_string(), + args: vec![ast::Field::from("a").into(), ast::Field::from("b").into()], + }); + assert_eq!( + result.unwrap(), + column_like!([greatest([col("a")], [col("b")])].alias("max")) + ); + } + + #[test] + fn test_graceful_failure_for_missing_args() { + let result = eval_fn(ast::Call { + name: "sin".to_string(), + args: vec![], + }); + assert!(result.is_err()); + } +}