Skip to content

Commit

Permalink
Artifact evaluation (#20)
Browse files Browse the repository at this point in the history
* Merge changes from main (#16)

* Fix cmake

* do not make clean

---------

Co-authored-by: Austin Mordahl <austin_noroot@chronos.utdallas.edu>

* Add SugarlyzerConfig locally

* Make sample size configurable, and fix urllib3 to prevent chunked error

* Update docs

* Add comparison script

* Some updates

* Remove zachfiles

* Fix deduplication

* Fix deduplication

* Fix deduplication

* Moved postprocessing to sugarlyzer instead of to jupyter notebook

* Merge configurations in baseline results

* Syntax error

* Remove unnecessary files

* remove unnecessary files

* update API to be compatible with newer versions of python

* Updated readme

* Anonymize notebook

* removed link

* Anonymization, as well as more specific pointers to the paper

* Some small updates to fix exceptions.

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Add scripts

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Add scripts

* Remove tests

* Fix space issue

* Fix the progress bar

* fix removal

* fix removal

* fix alarms

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* For some reason, moving the deletion worked?

* Remove unnecessary logs

* fix file deletion

* Remove intermediate files

* Update requests version to fix breaking change in DockerPy (#17)

* Update requests version to fix breaking change in DockerPy

* Delete results.json

* fixed missing lib

* fix dockerfile cache (#18)

* Add timeout, add back in code for vbdb

---------

Co-authored-by: Austin Mordahl <austin_noroot@chronos.utdallas.edu>
Co-authored-by: arjpeg <58893337+arjpeg@users.noreply.github.com>
  • Loading branch information
3 people authored Sep 27, 2024
1 parent c6ed20c commit 0d33535
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 51 deletions.
13 changes: 13 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ RUN apt-get update \
&& rm /tmp/cmake-install.sh \
&& ln -s /opt/cmake-3.24.1/bin/* /usr/local/bin

# Install cmake From https://www.softwarepronto.com/2022/09/dockerubuntu-installing-latest-cmake-on.html
RUN apt-get update \
&& apt-get -y install build-essential \
&& apt-get install -y wget \
&& rm -rf /var/lib/apt/lists/* \
&& wget https://github.com/Kitware/CMake/releases/download/v3.24.1/cmake-3.24.1-Linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& chmod u+x /tmp/cmake-install.sh \
&& mkdir /opt/cmake-3.24.1 \
&& /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.24.1 \
&& rm /tmp/cmake-install.sh \
&& ln -s /opt/cmake-3.24.1/bin/* /usr/local/bin

ARG JOBS
RUN git clone https://github.com/Z3Prover/z3.git

Expand Down
74 changes: 43 additions & 31 deletions scripts/comparison.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2024-01-27T16:43:46.724365Z",
"start_time": "2024-01-27T16:43:46.617781Z"
}
},
"cell_type": "code",
"outputs": [
{
"name": "stderr",
Expand Down Expand Up @@ -204,64 +205,75 @@
"print(f\"Number of partially matched baselines: {result_hierarchy[False, True, True]}\")\n",
"print(\n",
" f\"Number of unmatched baselines: {sum(v for k, v in result_hierarchy.items() if k not in [(True, True, True), (False, True, True)])}\")"
],
"execution_count": 17
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"# Time Analysis\n",
"This part of the notebook will compute the total time the analysis took, including both desugaring time and the analysis time.\n",
"This requires the log files produced by Sugarlyzer."
],
"metadata": {
"collapsed": false
}
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2024-01-27T16:41:36.802662Z",
"start_time": "2024-01-27T16:41:36.617115Z"
},
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Analysis Time: 1m\n",
"Desugaring time: 12m\n"
"Analysis Time: 283m\n",
"Desugaring time: 402m\n"
]
}
],
"source": [
"log_file = \"\"\n",
"log_files = [\n",
" \"/20TB/austin/noroot/git/Sugarlyzer/tool_paper_logs/tool_paper_results/clang_busybox.json.log\"]\n",
"\n",
" # \"/20TB/austin/noroot/git/Sugarlyzer/tool_paper_logs/tool_paper_results/infer_busybox.json.log\",\n",
" # \"/20TB/austin/noroot/git/Sugarlyzer/tool_paper_logs/tool_paper_results/phasar_busybox.json.log\"]\n",
"\n",
"import os\n",
"import sys\n",
"import re\n",
"\n",
"dtime = 0\n",
"atime = 0\n",
"with open(log_file,'r') as i:\n",
" for line in i.readlines():\n",
" line = line.lstrip().rstrip()\n",
" if 'Analyzing file' in line and ' took ' in line:\n",
" res = re.search(r'took (\\d+\\.\\d+)s',line)\n",
" if res != None:\n",
" atime += float(res.group(1))\n",
" \n",
" elif ' desugared in time:' in line:\n",
" res = re.search(r' desugared in time:(\\d+\\.\\d+)',line)\n",
" if res != None:\n",
" dtime += float(res.group(1))\n",
"for log_file in log_files:\n",
" with open(log_file,'r') as i:\n",
" for line in i.readlines():\n",
" line = line.lstrip().rstrip()\n",
" if 'Analyzing file' in line and ' took ' in line:\n",
" res = re.search(r'took (\\d+\\.\\d+)s',line)\n",
" if res != None:\n",
" atime += float(res.group(1))\n",
" \n",
" elif ' desugared in time:' in line:\n",
" res = re.search(r' desugared in time:(\\d+\\.\\d+)',line)\n",
" if res != None:\n",
" dtime += float(res.group(1))\n",
" \n",
"print (f'Analysis Time: {int(atime/60)}m\\nDesugaring time: {int(dtime/60)}m')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-27T16:41:36.802662Z",
"start_time": "2024-01-27T16:41:36.617115Z"
}
},
"execution_count": 16
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
48 changes: 48 additions & 0 deletions scripts/run_all_experiments.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
set -x
# Run remove-errors on, recommended space on.
find ./resources/programs -type f -name "*.json" | parallel "sed -i 's/\"remove_errors\": false/\"remove_errors\": true/g' {}"

parallel --dryrun -j 1 dispatcher -t {1} -p {2} -r ~/git/Sugarlyzer_results/recommended_space_ON/remove_errors_ON/{1}/{2}/desugared.json --force -v ::: clang infer phasar ::: axtls varbugs


# Run remove-errors off, recommended space on
find ./resources/programs -type f -name "*.json" | parallel "sed -i 's/\"remove_errors\": true/\"remove_errors\": false/g' {}"

parallel --ungroup -j 1 dispatcher -t {1} -p {2} -r ~/git/Sugarlyzer_results/recommended_space_ON/remove_errors_OFF/{1}/{2}/desugared.json --force -v ::: clang infer phasar ::: axtls varbugs toybox

CURDIR=$(pwd)
cd ../Sugarlyzer_results
find . -type f -name '*.log' | parallel -j 1 git add {}
find . -type f -name '*.json' | parallel -j 1 git add {}
git commit -m 'AUTO: Completed remove_errors OFF, recommended_space ON'
cd ${CURDIR}

# Run remove-errors on, recommended space off.
find ./resources/programs -type f -name "*.json" | parallel "sed -i 's/\"remove_errors\": false/\"remove_errors\": true/g' {}"

parallel --ungroup -j 1 dispatcher -t {1} -p {2} -r ~/git/Sugarlyzer_results/recommended_space_OFF/remove_errors_ON/{1}/{2}/desugared.json --force --no-recommended-space -v ::: clang infer phasar ::: axtls varbugs toybox

CURDIR=$(pwd)
cd ../Sugarlyzer_results
find . -type f -name '*.log' | parallel -j 1 git add {}
find . -type f -name '*.json' | parallel -j 1 git add {}
git commit -m 'AUTO: Completed remove_errors ON, recommended_space OFF'
cd ${CURDIR}

# Run baselines
parallel --ungroup -j 1 dispatcher -t {1} -p {2} -r ~/git/Sugarlyzer_results/baselines/{1}/{2}/baselines.json --force -v --baselines::: clang infer phasar ::: axtls varbugs toybox

CURDIR=$(pwd)
cd ../Sugarlyzer_results
find . -type f -name '*.log' | parallel -j 1 git add {}
find . -type f -name '*.json' | parallel -j 1 git add {}
git commit -m 'AUTO: Completed remove_errors OFF, recommended_space ON'
cd ${CURDIR}

CURDIR=$(pwd)
cd ../Sugarlyzer_results
find . -type f -name '*.log' | parallel -j 1 git add {}
find . -type f -name '*.json' | parallel -j 1 git add {}
git commit -m 'AUTO: Completed baselines'
cd ${CURDIR}
43 changes: 43 additions & 0 deletions scripts/run_all_experiments_no_toybox.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash
set -x

# Run remove-errors off, recommended space on
find ./resources/programs -type f -name "*.json" | parallel "sed -i 's/\"remove_errors\": true/\"remove_errors\": false/g' {}"

parallel --ungroup -j 1 dispatcher -t {1} -p {2} -r ~/git/Sugarlyzer_results/recommended_space_ON/remove_errors_OFF/{1}/{2}/desugared.json --force -v ::: clang infer phasar ::: axtls varbugs

CURDIR=$(pwd)
cd ../Sugarlyzer_results
find . -type f -name '*.log' | parallel -j 1 git add {}
find . -type f -name '*.json' | parallel -j 1 git add {}
git commit -m 'AUTO: Completed remove_errors OFF, recommended_space ON'
cd ${CURDIR}

# Run remove-errors on, recommended space off.
find ./resources/programs -type f -name "*.json" | parallel "sed -i 's/\"remove_errors\": false/\"remove_errors\": true/g' {}"

parallel --ungroup -j 1 dispatcher -t {1} -p {2} -r ~/git/Sugarlyzer_results/recommended_space_OFF/remove_errors_ON/{1}/{2}/desugared.json --force --no-recommended-space -v ::: clang infer phasar ::: axtls varbugs

CURDIR=$(pwd)
cd ../Sugarlyzer_results
find . -type f -name '*.log' | parallel -j 1 git add {}
find . -type f -name '*.json' | parallel -j 1 git add {}
git commit -m 'AUTO: Completed remove_errors ON, recommended_space OFF'
cd ${CURDIR}

# Run baselines
parallel --ungroup -j 1 dispatcher -t {1} -p {2} -r ~/git/Sugarlyzer_results/baselines/{1}/{2}/baselines.json --force -v --baselines::: clang infer phasar ::: axtls varbugs

CURDIR=$(pwd)
cd ../Sugarlyzer_results
find . -type f -name '*.log' | parallel -j 1 git add {}
find . -type f -name '*.json' | parallel -j 1 git add {}
git commit -m 'AUTO: Completed remove_errors OFF, recommended_space ON'
cd ${CURDIR}

CURDIR=$(pwd)
cd ../Sugarlyzer_results
find . -type f -name '*.log' | parallel -j 1 git add {}
find . -type f -name '*.json' | parallel -j 1 git add {}
git commit -m 'AUTO: Completed baselines'
cd ${CURDIR}
24 changes: 9 additions & 15 deletions src/sugarlyzer/analyses/Clang.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,21 @@ def analyze(self, file: Path,
included_files = []

output_location = tempfile.mkdtemp()
cmd = ["/usr/bin/time", "-v", "clang-11", '--analyze', "-Xanalyzer", "-analyzer-output=text",
cmd = ["/usr/bin/time", "-v", "timeout", "--preserve-status", "2h", "clang-11", '--analyze', "-Xanalyzer",
"-analyzer-output=text",
*list(itertools.chain(*zip(itertools.cycle(["-I"]), included_dirs))),
*list(itertools.chain(*zip(itertools.cycle(["--include"]), included_files))),
*command_line_defs,
'-nostdinc',
"-c", file.absolute()]
logger.info(f"Running cmd {' '.join(str(s) for s in cmd)}")

ps = subprocess.run(" ".join(str(s) for s in cmd), capture_output=True, shell=True, text=True, executable="/bin/bash")
if ps.returncode == 0:
try:
times = "\n".join(ps.stderr.split("\n")[-30:])
usr_time, sys_time, max_memory = parse_bash_time(times)
logger.info(f"CPU time to analyze {file} was {usr_time + sys_time}s")
logger.info(f"Max memory to analyze {file} was {max_memory}kb")
except Exception as ve:
logger.exception("Could not parse time in string " + times)

if (ps.returncode != 0) or ("error" in ps.stdout.lower()):
logger.warning(f"Running clang on file {str(file)} potentially failed.")
logger.warning(ps.stdout)
pipes = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
stdout, stderr = pipes.communicate()
stdout = str(stdout, 'UTF-8')
stderr = str(stderr, 'UTF-8')
if (pipes.returncode != 0):
logger.warning(f"Running clang on file {str(file)} failed.")
logger.warning(stdout)

with open(output_location + '/report.report','w') as o:
o.write(ps.stderr)
Expand Down
2 changes: 1 addition & 1 deletion src/sugarlyzer/analyses/Infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def analyze(self, file: Path,
command_line_defs = []

output_location = tempfile.mkdtemp()
cmd = ["/usr/bin/time", "-v", "infer", "--pulse-only", '-o', output_location, '--', "clang",
cmd = ["/usr/bin/time", "-v", "timeout", "--preserve-status", "2h", "infer", "--pulse-only", '-o', output_location, '--', "clang",
*list(itertools.chain(*zip(itertools.cycle(["-I"]), included_dirs))),
*list(itertools.chain(*zip(itertools.cycle(["--include"]), included_files))),
*command_line_defs,
Expand Down
5 changes: 3 additions & 2 deletions src/sugarlyzer/analyses/Phasar.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def analyze(self, file: Path,
output_location = tempfile.mkdtemp()
#create ll file
llFile = os.path.join(output_location,str(file)[:-2]+'.ll')
cmd = ['/usr/bin/time', '-v', 'clang-12','-emit-llvm','-S','-fno-discard-value-names','-c','-g',
cmd = ['/usr/bin/time', '-v', "timeout", "--preserve-status", "2h", 'clang-12','-emit-llvm','-S','-fno-discard-value-names','-c','-g',
*list(itertools.chain(*zip(itertools.cycle(["-I"]), included_dirs))),
*list(itertools.chain(*zip(itertools.cycle(["--include"]), included_files))),
*command_line_defs,
Expand All @@ -51,7 +51,8 @@ def analyze(self, file: Path,
logger.exception("Could not parse time in string " + times)

#run phasar on ll
cmd = ['/usr/bin/time', '-v', '/phasar/build/tools/phasar-llvm/phasar-llvm','-D','IFDSUninitializedVariables','-m',llFile,'-O',output_location]
cmd = ['/usr/bin/time', '-v', "timeout", "--preserve-status", "2h", '/phasar/build/tools/phasar-llvm/phasar-llvm','-D',
'IFDSUninitializedVariables','-m',llFile,'-O',output_location]
logger.info(f"Running cmd {cmd}")
ps = subprocess.run(" ".join(str(s) for s in cmd), capture_output=True, text=True, shell=True, executable="/bin/bash")
if (ps.returncode != 0) or ("error" in ps.stdout.lower()):
Expand Down
18 changes: 16 additions & 2 deletions src/sugarlyzer/models/ProgramSpecification.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,21 @@ class BaselineConfig:
def get_baseline_configurations(self) -> Iterable[Path]:
if self.sample_directory is None:
# If we don't have a sample directory, we use the get_all_macros function to get every possible configuration.
raise RuntimeError("Need to reimplement this.")
for source_file in tqdm(self.get_source_files()):
logger.debug(f"Source file is {source_file}")
macros: List[str] = self.get_all_macros(source_file)
logging.debug(f"Macros for file {source_file} are {macros}")

def all_configurations(options: List[str]) -> List[List[Tuple[str, str]]]:
options = list(options)
if len(options) == 0:
return [[]]
else:
result = [a + [(b, options[-1])] for a in all_configurations(options[:-1]) for b in
["DEF", "UNDEF"]]
return result

yield from (ProgramSpecification.BaselineConfig(source_file, c) for c in all_configurations(macros))
else:
yield from self.try_resolve_path(self.sample_directory).iterdir()

Expand All @@ -190,7 +204,7 @@ def get_all_macros(self, fpa):
parser.write(StringIO())
logger.debug(f"Discovered the following macros in file {fpa}: {parser.collected}")
return parser.collected

@search_context.setter
def search_context(self, value):
self.__search_context = value

0 comments on commit 0d33535

Please sign in to comment.