Skip to content

Commit

Permalink
Add logging verbosity level flag to extract_ir.py
Browse files Browse the repository at this point in the history
This commit adds a logging verbosity level flag to extract_ir.py and the
appropriate plumbing/implementation in extract_ir_lib.py. This is
primarily motivated by these errors coming up quite often in non-trivial
builds (as it is fairly often there is some assembly or some flags don't
get passed around somewhere) and not providing a very high signal to
noise ratio, especially when used as a library against a bunch of
projects at once.
  • Loading branch information
boomanaiden154 committed Jul 14, 2023
1 parent b476595 commit adaa73d
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 24 deletions.
10 changes: 9 additions & 1 deletion compiler_opt/tools/extract_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@
'The section name passed to llvm-objcopy. For ELF object files, the '
'default .llvmbc is correct. For Mach-O object files, one should use '
'__LLVM,__bitcode')
flags.DEFINE_enum(
'logging_verbosity', 'NORMAL', ['QUIET', 'NORMAL', 'VERBOSE'],
'Specify the logging verbosity. QUIET prints nothing, silently ignoring '
'failures and printing a status at the end. NORMAL notifies on failure but '
'does not print detailed information. VERBOSE logs errors and prints '
'STDOUT/STDERR of the failed process. Note that verbose logging might not '
'be formatted properly when using multiple threads.')

flags.mark_flag_as_required('output_dir')

Expand Down Expand Up @@ -123,7 +130,8 @@ def main(argv):

relative_output_paths = extract_ir_lib.run_extraction(
objs, FLAGS.num_workers, FLAGS.llvm_objcopy_path, FLAGS.cmd_filter,
FLAGS.thinlto_build, FLAGS.cmd_section_name, FLAGS.bitcode_section_name)
FLAGS.thinlto_build, FLAGS.cmd_section_name, FLAGS.bitcode_section_name,
extract_ir_lib.LoggingVerbosity[FLAGS.logging_verbosity])

extract_ir_lib.write_corpus_manifest(FLAGS.thinlto_build,
relative_output_paths, FLAGS.output_dir)
Expand Down
84 changes: 61 additions & 23 deletions compiler_opt/tools/extract_ir_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import multiprocessing
import functools
import json
import enum

from typing import Dict, List, Optional

Expand All @@ -30,6 +31,25 @@
from compiler_opt.rl import constant


class LoggingVerbosity(enum.Enum):
"""Type for passing around the logging verbosity level."""
QUIET = 0
NORMAL = 1
VERBOSE = 2

def __lt__(self, other):
if self.__class__ is other.__class__:
return self.value < other.value
else:
raise NotImplementedError

def __gt__(self, other):
if self.__class__ is other.__class__:
return self.value > other.value
else:
raise NotImplementedError


# TODO(ml-compiler-opt): maybe we can also convert here the cmdline file,from a
# \0 - separated list of strings, to a \n one.
def should_include_module(cmdline: str, match_regexp: Optional[str]) -> bool:
Expand Down Expand Up @@ -126,28 +146,36 @@ def _get_extraction_bc_command(self, llvm_objcopy_path: str,
self.input_obj(), '/dev/null'
]

def _extract_clang_artifacts(self, llvm_objcopy_path: str, cmd_filter: str,
is_thinlto: bool, cmd_section_name: str,
bitcode_section_name: str) -> Optional[str]:
def _extract_clang_artifacts(
self, llvm_objcopy_path: str, cmd_filter: str, is_thinlto: bool,
cmd_section_name: str, bitcode_section_name: str,
logging_verbosity: LoggingVerbosity) -> Optional[str]:
"""Run llvm-objcopy to extract the .bc and command line."""
if not os.path.exists(self.input_obj()):
logging.info('%s does not exist.', self.input_obj())
return None
os.makedirs(self.dest_dir(), exist_ok=True)
try:
if logging_verbosity < LoggingVerbosity.VERBOSE:
output_pipe = subprocess.PIPE
else:
output_pipe = None
subprocess.run(
self._get_extraction_cmd_command(llvm_objcopy_path, cmd_section_name),
check=True)
check=True,
stdout=output_pipe,
stderr=output_pipe)
if cmd_filter is not None or is_thinlto:
with open(self.cmd_file(), encoding='utf-8') as f:
lines = f.readlines()
assert len(lines) == 1
cmdline = lines[0]
if not should_include_module(cmdline, cmd_filter):
logging.info(
'Excluding module %s because it does not match the filter',
self.input_obj())
os.remove(self.cmd_file())
if logging_verbosity > LoggingVerbosity.QUIET:
logging.info(
'Excluding module %s because it does not match the filter',
self.input_obj())
os.remove(self.cmd_file())
return None
if is_thinlto:
index_file = get_thinlto_index(cmdline, self.obj_base_dir())
Expand All @@ -158,22 +186,26 @@ def _extract_clang_artifacts(self, llvm_objcopy_path: str, cmd_filter: str,
bitcode_section_name),
check=True)
except subprocess.CalledProcessError as e:
# This may happen if .o file was build from asm (.S source).
logging.warning('%s was not processed: %s', self.input_obj(), e)
if logging_verbosity > LoggingVerbosity.QUIET:
# This may happen if .o file was build from asm (.S source).
logging.warning('%s was not processed: %s', self.input_obj(), e)
return None
assert (os.path.exists(self.cmd_file()) and
os.path.exists(self.bc_file()) and
(not is_thinlto or os.path.exists(self.thinlto_index_file())))
return self.relative_output_path()

def _extract_lld_artifacts(self) -> Optional[str]:
def _extract_lld_artifacts(
self, logging_verbosity: LoggingVerbosity) -> Optional[str]:
"""Extract the .bc file with ThinLTO index from an lld ThinLTO invocation.
"""
if not os.path.exists(self.lld_src_bc()):
logging.info('%s does not exist.', self.lld_src_bc())
if logging_verbosity > LoggingVerbosity.QUIET:
logging.info('%s does not exist.', self.lld_src_bc())
return None
if not os.path.exists(self.lld_src_thinlto()):
logging.info('%s does not exist.', self.lld_src_thinlto())
if logging_verbosity > LoggingVerbosity.QUIET:
logging.info('%s does not exist.', self.lld_src_thinlto())
return None
os.makedirs(self.dest_dir(), exist_ok=True)

Expand All @@ -185,20 +217,24 @@ def _extract_lld_artifacts(self) -> Optional[str]:
assert os.path.exists(self.thinlto_index_file())
return self._obj_relative_path

def extract(self,
llvm_objcopy_path: Optional[str] = None,
cmd_filter: Optional[str] = None,
thinlto_build: Optional[str] = None,
cmd_section_name: Optional[str] = '.llvmcmd',
bitcode_section_name: Optional[str] = '.llvmbc') -> Optional[str]:
def extract(
self,
llvm_objcopy_path: Optional[str] = None,
cmd_filter: Optional[str] = None,
thinlto_build: Optional[str] = None,
cmd_section_name: Optional[str] = '.llvmcmd',
bitcode_section_name: Optional[str] = '.llvmbc',
logging_verbosity: Optional[LoggingVerbosity] = LoggingVerbosity.NORMAL
) -> Optional[str]:
if thinlto_build == 'local':
return self._extract_lld_artifacts()
return self._extract_lld_artifacts(logging_verbosity=logging_verbosity)
return self._extract_clang_artifacts(
llvm_objcopy_path=llvm_objcopy_path,
cmd_filter=cmd_filter,
is_thinlto=thinlto_build == 'distributed',
cmd_section_name=cmd_section_name,
bitcode_section_name=bitcode_section_name)
bitcode_section_name=bitcode_section_name,
logging_verbosity=logging_verbosity)


def convert_compile_command_to_objectfile(
Expand Down Expand Up @@ -296,7 +332,8 @@ def make_spec(obj_file: str):

def run_extraction(objs: List[TrainingIRExtractor], num_workers: int,
llvm_objcopy_path: str, cmd_filter: str, thinlto_build: str,
cmd_section_name: str, bitcode_section_name: str):
cmd_section_name: str, bitcode_section_name: str,
logging_verbosity: LoggingVerbosity):
"""Extracts all specified object files into the corpus directory.
Args:
Expand All @@ -321,7 +358,8 @@ def run_extraction(objs: List[TrainingIRExtractor], num_workers: int,
cmd_filter=cmd_filter,
thinlto_build=thinlto_build,
cmd_section_name=cmd_section_name,
bitcode_section_name=bitcode_section_name)
bitcode_section_name=bitcode_section_name,
logging_verbosity=logging_verbosity)

with multiprocessing.Pool(num_workers) as pool:
relative_output_paths = pool.map(extract_artifacts, objs)
Expand Down

0 comments on commit adaa73d

Please sign in to comment.