From 1dc3381427078e99117b4ad5eac69fb9aaf8cac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Tavares?= Date: Sun, 28 Jul 2024 10:44:51 +0100 Subject: [PATCH] external calls collected --- bin/main.ml | 12 ++- lib/auxiliary/structures.ml | 1 + lib/mdg/analyse.ml | 17 ++-- lib/mdg/analysis/analysisType.ml | 4 +- lib/mdg/analysis/collectExternalCalls.ml | 104 +++++++++++++++++++++++ lib/mdg/structures/externalReferences.ml | 23 +++++ lib/mdg/structures/store.ml | 5 ++ lib/mdg/structures/structures.ml | 4 +- 8 files changed, 157 insertions(+), 13 deletions(-) create mode 100644 lib/mdg/analysis/collectExternalCalls.ml create mode 100644 lib/mdg/structures/externalReferences.ml diff --git a/bin/main.ml b/bin/main.ml index ca5d08d..e32ea06 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -3,6 +3,7 @@ open Setup open Auxiliary.Structures module Graph = Mdg.Graph' module ExportedObject = Mdg.ExportedObject +module ExternalReferences = Mdg.ExternalReferences let env_path = Filename.dirname (Filename.dirname Sys.executable_name) ^ "/lib/ast_gen/";; @@ -74,7 +75,9 @@ let main (filename : string) (output_path : string) (config_path : string) (mult (* STEP 2 : Generate MDG for the normalized code *) if generate_mdg then ( - let graph, exportedObject = Mdg.Analyse.program verbose config_path norm_program in + let graph, exportedObject, external_calls = Mdg.Analyse.program verbose config_path norm_program in + + ExternalReferences.print external_calls; (* TODO : . Graph.iter_external_calls ( @@ -86,11 +89,12 @@ let main (filename : string) (output_path : string) (config_path : string) (mult ) graph; *) add_graph module_graphs file_path graph; - add_summary summaries file_path exportedObject; (* TODO *) + add_summary summaries file_path exportedObject; + add_summary summaries (String.sub file_path 0 (String.length file_path - 3)) exportedObject; - print_endline file_path; + (* print_endline file_path; ExportedObject.print exportedObject; - print_newline (); + print_newline (); *) ); ) (DependencyTree.bottom_up_visit dep_tree); diff --git a/lib/auxiliary/structures.ml b/lib/auxiliary/structures.ml index 83255af..a46fde0 100644 --- a/lib/auxiliary/structures.ml +++ b/lib/auxiliary/structures.ml @@ -11,4 +11,5 @@ module HashTable = struct then Table.fold (fun key value acc -> acc && (Table.mem table' key) && (eq value (Table.find table' key))) table true else false end + \ No newline at end of file diff --git a/lib/mdg/analyse.ml b/lib/mdg/analyse.ml index 50e404a..2ea7fab 100644 --- a/lib/mdg/analyse.ml +++ b/lib/mdg/analyse.ml @@ -149,7 +149,7 @@ module GraphConstrunction (Auxiliary : AbstractAnalysis.T) = struct (* setup new store with only the param and corresponding locations *) let param_locs = get_param_locs func_id in - let new_state = {state with store = param_locs; context = visit func_id} in + let new_state = {state with store = Store.merge store param_locs; context = visit func_id} in analyse_sequence new_state analysis body ); @@ -469,12 +469,14 @@ and construct_object (state : State.t) (loc : LocationSet.t) : ExportedObject.t -let rec program (is_verbose : bool) (config_path : string) ((_, program) : m Program.t) : Graph.t * ExportedObject.t = +let rec program (is_verbose : bool) (config_path : string) ((_, program) : m Program.t) : Graph.t * ExportedObject.t * ExternalReferences.t = verbose := is_verbose; let module Analysis = AbstractAnalysis.Combine (BuildExportsObject.Analysis) - (SinkAliases.Analysis (struct let filename = config_path end)) + (AbstractAnalysis.Combine + (CollectExternalCalls.Analysis) + (SinkAliases.Analysis (struct let filename = config_path end)) ) in let module BuildMDG = GraphConstrunction (Analysis) in @@ -482,17 +484,18 @@ let rec program (is_verbose : bool) (config_path : string) ((_, program) : m Pro let state, analysis = BuildMDG.run init_state program.body in (* process auxiliary analysis outputs*) - let exportsObjectInfo, config = get_analysis_output (Analysis.finish analysis) in + let exportsObjectInfo, config, external_calls = get_analysis_output (Analysis.finish analysis) in add_taint_sinks state config; add_taint_sources state config; let exportsObject = buildExportsObject state exportsObjectInfo in - state.graph, exportsObject; + state.graph, exportsObject, external_calls; -and get_analysis_output (result : AnalysisType.t) : buildExportsObject * sinkAliases = +and get_analysis_output (result : AnalysisType.t) : buildExportsObject * sinkAliases * collectExternalCalls = match result with | Combined (BuildExportsObject exportsObject, - SinkAliases config ) -> exportsObject, config + Combined (CollectExternalCalls ext_calls, + SinkAliases config )) -> exportsObject, config, ext_calls | _ -> failwith "unable to extract analysis output" \ No newline at end of file diff --git a/lib/mdg/analysis/analysisType.ml b/lib/mdg/analysis/analysisType.ml index 3433328..1f711c2 100644 --- a/lib/mdg/analysis/analysisType.ml +++ b/lib/mdg/analysis/analysisType.ml @@ -15,11 +15,13 @@ type buildExportsObject = { exportsIsModuleExports : bool } +type collectExternalCalls = ExternalReferences.t type generateMDG = State.t type sinkAliases = Config.t type t = - | BuildExportsObject of buildExportsObject + | BuildExportsObject of buildExportsObject + | CollectExternalCalls of collectExternalCalls | GenerateMDG of generateMDG | SinkAliases of sinkAliases | Combined of t * t diff --git a/lib/mdg/analysis/collectExternalCalls.ml b/lib/mdg/analysis/collectExternalCalls.ml new file mode 100644 index 0000000..269c2ff --- /dev/null +++ b/lib/mdg/analysis/collectExternalCalls.ml @@ -0,0 +1,104 @@ +open Ast.Grammar +open Structures +open Auxiliary.Functions +module Graph = Graph' + + +type external_reference = ExternalReferences.t' + +module Analysis : AbstractAnalysis.T = struct + type t = { + external_refs : ExternalReferences.t; + calls : ExternalReferences.t; + _context : unit + } + + (* handle external reference objects*) + let create_ext_ref (module' : string) (properties : property list) : external_reference = + { _module = module'; properties = properties } + + let add_property (ext_ref : external_reference) (property : property) : external_reference = + {ext_ref with properties = ext_ref.properties @ [property]} + + (* handle external calls collection *) + let add_ext_call (info : t) (call : LocationSet.t) (ext_ref : external_reference) : t = + ExternalReferences.T.replace info.calls call ext_ref; + info + + + (* handle external references collection *) + let add_ext_ref (info : t) (loc : LocationSet.t) (ext_ref : external_reference) : t = + ExternalReferences.T.replace info.external_refs loc ext_ref; + info + + let get_ext_ref (info : t) (loc : LocationSet.t) : external_reference option = + ExternalReferences.T.find_opt info.external_refs loc + + (* context *) + let update_context (info : t) : t = info + + + let analyse (info : t) (state : State.t) (statement : m Statement.t) : t = + let eval_expr = Store.eval_expr state.store state.this in + let alloc = Graph.alloc state.graph in + + let info = update_context info in + match statement with + | _, AssignFunCall {left; callee; arguments; id_call; _} -> + + if Identifier.get_name callee = "require" then ( + (* get module name *) + let module_arg = List.nth_opt arguments 0 in + let module_name = match module_arg with + | Some (_, Literal {value = String module_name; _}) -> + if String.starts_with ~prefix:"./" module_name + then String.sub module_name 2 (String.length module_name - 2) + else module_name + + | _ -> failwith "failed to obtain require module name" + in + + let loc = eval_expr (Identifier.to_expression left) in + let external_reference = create_ext_ref module_name [] in + add_ext_ref info loc external_reference) + + else ( + (* check if callee is an external call *) + let l_call = LocationSet.singleton (alloc id_call) in + let callee = eval_expr (Identifier.to_expression callee) in + let external_reference = get_ext_ref info callee in + map_default (fun external_reference -> + add_ext_call info l_call external_reference + ) info external_reference;) + + + | _, AssignMetCallStatic {_object; property; id_call; _} -> + (* check if object callee is an external reference *) + let l_call = LocationSet.singleton (alloc id_call) in + let callee = eval_expr _object in + let external_reference = get_ext_ref info callee in + map_default (fun external_reference -> + let external_reference' = add_property external_reference property in + add_ext_call info l_call external_reference' + ) info external_reference + + | _, StaticLookup {left; _object; property; _} -> + (* check if object is an external reference *) + let loc = eval_expr (Identifier.to_expression left) in + let loc_obj = eval_expr _object in + let external_reference = get_ext_ref info loc_obj in + map_default (fun external_reference -> + let external_reference' = add_property external_reference property in + add_ext_ref info loc external_reference' + ) info external_reference; + + | _ -> info + + let init () : t = { + external_refs = ExternalReferences.T.create 10; + calls = ExternalReferences.T.create 10; + _context = () + } + + let finish (info : t) : AnalysisType.t = AnalysisType.CollectExternalCalls info.calls +end \ No newline at end of file diff --git a/lib/mdg/structures/externalReferences.ml b/lib/mdg/structures/externalReferences.ml new file mode 100644 index 0000000..4251f40 --- /dev/null +++ b/lib/mdg/structures/externalReferences.ml @@ -0,0 +1,23 @@ +open Structures + +module T = Hashtbl.Make (LocationSet) + + +type t' = { + _module : string; + properties : property list +} + +type t = t' T.t + +let iter : (LocationSet.t -> t' -> unit) -> t -> unit = T.iter +let print (refs : t) : unit = + print_endline "=======" ; + iter (fun loc value -> + print_endline "key : "; + LocationSet.print loc; + print_endline "value : "; + print_endline ("{ module : " ^ value._module ^ ", \n properties : " ^ String.concat "." value.properties ^ " }"); + print_endline "----------"; + ) refs; + print_endline "=======" ; diff --git a/lib/mdg/structures/store.ml b/lib/mdg/structures/store.ml index 280c122..391b62d 100644 --- a/lib/mdg/structures/store.ml +++ b/lib/mdg/structures/store.ml @@ -18,6 +18,11 @@ let find_opt : t -> location -> LocationSet.t option = HashTable.find_opt let replace : t -> location -> LocationSet.t -> unit = HashTable.replace let copy : t -> t = HashTable.copy +let merge (store : t) (store' : t) : t = + let store_copy = copy store in + HashTable.iter (fun key value -> HashTable.replace store_copy key value) store'; + store_copy + let rec print (store : t) : unit = iter (print_locations) store; print_string "\n"; diff --git a/lib/mdg/structures/structures.ml b/lib/mdg/structures/structures.ml index 31cd0dc..6cc0311 100644 --- a/lib/mdg/structures/structures.ml +++ b/lib/mdg/structures/structures.ml @@ -25,7 +25,9 @@ module LocationSet = struct let print (locations : LocationSet'.t) : unit = apply (fun loc -> print_string (loc ^ ", ") ) locations; - print_newline (); + print_newline () + + let hash = Hashtbl.hash end module AliasSet = Set.Make(String)