diff --git a/compiler_opt/tools/extract_ir.py b/compiler_opt/tools/extract_ir.py index e55913ab..4ac85135 100644 --- a/compiler_opt/tools/extract_ir.py +++ b/compiler_opt/tools/extract_ir.py @@ -79,6 +79,13 @@ 'The section name passed to llvm-objcopy. For ELF object files, the ' 'default .llvmbc is correct. For Mach-O object files, one should use ' '__LLVM,__bitcode') +flags.DEFINE_enum( + 'logging_verbosity', 'NORMAL', ['QUIET', 'NORMAL', 'VERBOSE'], + 'Specify the logging verbosity. QUIET prints nothing, silently ignoring ' + 'failures and printing a status at the end. NORMAL notifies on failure but ' + 'does not print detailed information. VERBOSE logs errors and prints ' + 'STDOUT/STDERR of the failed process. Note that verbose logging might not ' + 'be formatted properly when using multiple threads.') flags.mark_flag_as_required('output_dir') @@ -123,7 +130,8 @@ def main(argv): relative_output_paths = extract_ir_lib.run_extraction( objs, FLAGS.num_workers, FLAGS.llvm_objcopy_path, FLAGS.cmd_filter, - FLAGS.thinlto_build, FLAGS.cmd_section_name, FLAGS.bitcode_section_name) + FLAGS.thinlto_build, FLAGS.cmd_section_name, FLAGS.bitcode_section_name, + extract_ir_lib.LoggingVerbosity[FLAGS.logging_verbosity]) extract_ir_lib.write_corpus_manifest(FLAGS.thinlto_build, relative_output_paths, FLAGS.output_dir) diff --git a/compiler_opt/tools/extract_ir_lib.py b/compiler_opt/tools/extract_ir_lib.py index 9be0ad7c..1581aef7 100644 --- a/compiler_opt/tools/extract_ir_lib.py +++ b/compiler_opt/tools/extract_ir_lib.py @@ -22,6 +22,7 @@ import multiprocessing import functools import json +import enum from typing import Dict, List, Optional @@ -30,6 +31,25 @@ from compiler_opt.rl import constant +class LoggingVerbosity(enum.Enum): + """Type for passing around the logging verbosity level.""" + QUIET = 0 + NORMAL = 1 + VERBOSE = 2 + + def __lt__(self, other): + if self.__class__ is other.__class__: + return self.value < other.value + else: + raise NotImplementedError + + def __gt__(self, other): + if self.__class__ is other.__class__: + return self.value > other.value + else: + raise NotImplementedError + + # TODO(ml-compiler-opt): maybe we can also convert here the cmdline file,from a # \0 - separated list of strings, to a \n one. def should_include_module(cmdline: str, match_regexp: Optional[str]) -> bool: @@ -126,28 +146,36 @@ def _get_extraction_bc_command(self, llvm_objcopy_path: str, self.input_obj(), '/dev/null' ] - def _extract_clang_artifacts(self, llvm_objcopy_path: str, cmd_filter: str, - is_thinlto: bool, cmd_section_name: str, - bitcode_section_name: str) -> Optional[str]: + def _extract_clang_artifacts( + self, llvm_objcopy_path: str, cmd_filter: str, is_thinlto: bool, + cmd_section_name: str, bitcode_section_name: str, + logging_verbosity: LoggingVerbosity) -> Optional[str]: """Run llvm-objcopy to extract the .bc and command line.""" if not os.path.exists(self.input_obj()): logging.info('%s does not exist.', self.input_obj()) return None os.makedirs(self.dest_dir(), exist_ok=True) try: + if logging_verbosity < LoggingVerbosity.VERBOSE: + output_pipe = subprocess.PIPE + else: + output_pipe = None subprocess.run( self._get_extraction_cmd_command(llvm_objcopy_path, cmd_section_name), - check=True) + check=True, + stdout=output_pipe, + stderr=output_pipe) if cmd_filter is not None or is_thinlto: with open(self.cmd_file(), encoding='utf-8') as f: lines = f.readlines() assert len(lines) == 1 cmdline = lines[0] if not should_include_module(cmdline, cmd_filter): - logging.info( - 'Excluding module %s because it does not match the filter', - self.input_obj()) - os.remove(self.cmd_file()) + if logging_verbosity > LoggingVerbosity.QUIET: + logging.info( + 'Excluding module %s because it does not match the filter', + self.input_obj()) + os.remove(self.cmd_file()) return None if is_thinlto: index_file = get_thinlto_index(cmdline, self.obj_base_dir()) @@ -158,22 +186,26 @@ def _extract_clang_artifacts(self, llvm_objcopy_path: str, cmd_filter: str, bitcode_section_name), check=True) except subprocess.CalledProcessError as e: - # This may happen if .o file was build from asm (.S source). - logging.warning('%s was not processed: %s', self.input_obj(), e) + if logging_verbosity > LoggingVerbosity.QUIET: + # This may happen if .o file was build from asm (.S source). + logging.warning('%s was not processed: %s', self.input_obj(), e) return None assert (os.path.exists(self.cmd_file()) and os.path.exists(self.bc_file()) and (not is_thinlto or os.path.exists(self.thinlto_index_file()))) return self.relative_output_path() - def _extract_lld_artifacts(self) -> Optional[str]: + def _extract_lld_artifacts( + self, logging_verbosity: LoggingVerbosity) -> Optional[str]: """Extract the .bc file with ThinLTO index from an lld ThinLTO invocation. """ if not os.path.exists(self.lld_src_bc()): - logging.info('%s does not exist.', self.lld_src_bc()) + if logging_verbosity > LoggingVerbosity.QUIET: + logging.info('%s does not exist.', self.lld_src_bc()) return None if not os.path.exists(self.lld_src_thinlto()): - logging.info('%s does not exist.', self.lld_src_thinlto()) + if logging_verbosity > LoggingVerbosity.QUIET: + logging.info('%s does not exist.', self.lld_src_thinlto()) return None os.makedirs(self.dest_dir(), exist_ok=True) @@ -185,20 +217,24 @@ def _extract_lld_artifacts(self) -> Optional[str]: assert os.path.exists(self.thinlto_index_file()) return self._obj_relative_path - def extract(self, - llvm_objcopy_path: Optional[str] = None, - cmd_filter: Optional[str] = None, - thinlto_build: Optional[str] = None, - cmd_section_name: Optional[str] = '.llvmcmd', - bitcode_section_name: Optional[str] = '.llvmbc') -> Optional[str]: + def extract( + self, + llvm_objcopy_path: Optional[str] = None, + cmd_filter: Optional[str] = None, + thinlto_build: Optional[str] = None, + cmd_section_name: Optional[str] = '.llvmcmd', + bitcode_section_name: Optional[str] = '.llvmbc', + logging_verbosity: Optional[LoggingVerbosity] = LoggingVerbosity.NORMAL + ) -> Optional[str]: if thinlto_build == 'local': - return self._extract_lld_artifacts() + return self._extract_lld_artifacts(logging_verbosity=logging_verbosity) return self._extract_clang_artifacts( llvm_objcopy_path=llvm_objcopy_path, cmd_filter=cmd_filter, is_thinlto=thinlto_build == 'distributed', cmd_section_name=cmd_section_name, - bitcode_section_name=bitcode_section_name) + bitcode_section_name=bitcode_section_name, + logging_verbosity=logging_verbosity) def convert_compile_command_to_objectfile( @@ -296,7 +332,8 @@ def make_spec(obj_file: str): def run_extraction(objs: List[TrainingIRExtractor], num_workers: int, llvm_objcopy_path: str, cmd_filter: str, thinlto_build: str, - cmd_section_name: str, bitcode_section_name: str): + cmd_section_name: str, bitcode_section_name: str, + logging_verbosity: LoggingVerbosity): """Extracts all specified object files into the corpus directory. Args: @@ -321,7 +358,8 @@ def run_extraction(objs: List[TrainingIRExtractor], num_workers: int, cmd_filter=cmd_filter, thinlto_build=thinlto_build, cmd_section_name=cmd_section_name, - bitcode_section_name=bitcode_section_name) + bitcode_section_name=bitcode_section_name, + logging_verbosity=logging_verbosity) with multiprocessing.Pool(num_workers) as pool: relative_output_paths = pool.map(extract_artifacts, objs)