Optimize llvm-nm (#13465)

juj · web-flow · commit 93472e311c68 · 2021-02-13T21:43:45.000+01:00
* Optimize llvm-nm invocations from Emscripten. In python creating a multiprocessing pool is *extremely* slow, and can take 500ms-1 second. If there are only few files to llvm-nm, do those sequentially since a single llvm-nm only takes about 10-20ms.

* Further optimize llvm-nm to avoid the multiprocessing pool completely.

* Further optimize multiple llvm-nm calls by just invoking llvm-nm once: it accepts a list of files to process.

* Flake

* Flake

* Handle llvm-nm nonzero exit code.

* Allow calling llvm_nm_multiple with zero files
diff --git a/tools/building.py b/tools/building.py
@@ -118,12 +118,6 @@ def extract_archive_contents(archive_file):
   }
 
 
-# Due to a python pickling issue, the following two functions must be at top
-# level, or multiprocessing pool spawn won't find them.
-def g_llvm_nm_uncached(filename):
-  return llvm_nm_uncached(filename)
-
-
 def g_multiprocessing_initializer(*args):
   for item in args:
     (key, value) = item.split('=', 1)
@@ -364,20 +358,82 @@ def make_paths_absolute(f):
     return os.path.abspath(f)
 
 
-# Runs llvm-nm in parallel for the given list of files.
+# Runs llvm-nm for the given list of files.
 # The results are populated in nm_cache
-# multiprocessing_pool: An existing multiprocessing pool to reuse for the operation, or None
-# to have the function allocate its own.
-def parallel_llvm_nm(files):
-  with ToolchainProfiler.profile_block('parallel_llvm_nm'):
-    pool = get_multiprocessing_pool()
-    object_contents = pool.map(g_llvm_nm_uncached, files)
+def llvm_nm_multiple(files):
+  with ToolchainProfiler.profile_block('llvm_nm_multiple'):
+    if len(files) == 0:
+      return []
+    # Run llvm-nm on files that we haven't cached yet
+    llvm_nm_files = [f for f in files if f not in nm_cache]
+
+    # We can issue multiple files in a single llvm-nm calls, but only if those
+    # files are all .o or .bc files. Because of llvm-nm output format, we cannot
+    # llvm-nm multiple .a files in one call, but those must be individually checked.
+    if len(llvm_nm_files) > 1:
+      llvm_nm_files = [f for f in files if f.endswith('.o') or f.endswith('.bc')]
+
+    if len(llvm_nm_files) > 0:
+      cmd = [LLVM_NM] + llvm_nm_files
+      results = run_process(cmd, stdout=PIPE, stderr=PIPE, check=False)
+
+      # If one or more of the input files cannot be processed, llvm-nm will return a non-zero error code, but it will still process and print
+      # out all the other files in order. So even if process return code is non zero, we should always look at what we got to stdout.
+      if results.returncode != 0:
+        logger.debug('Subcommand ' + ' '.join(cmd) + ' failed with return code ' + str(results.returncode) + '! (An input file was corrupt?)')
+
+      results = results.stdout
+
+      # llvm-nm produces a single listing of form
+      # file1.o:
+      # 00000001 T __original_main
+      #          U __stack_pointer
+      #
+      # file2.o:
+      # 0000005d T main
+      #          U printf
+      #
+      # ...
+      # so loop over the report to extract the results
+      # for each individual file.
+
+      filename = llvm_nm_files[0]
+
+      # When we dispatched more than one file, we must manually parse
+      # the file result delimiters (like shown structured above)
+      if len(llvm_nm_files) > 1:
+        file_start = 0
+        i = 0
+
+        while True:
+          nl = results.find('\n', i)
+          if nl < 0:
+            break
+          colon = results.rfind(':', i, nl)
+          if colon >= 0 and results[colon + 1] == '\n': # New file start?
+            nm_cache[filename] = parse_symbols(results[file_start:i - 1])
+            filename = results[i:colon].strip()
+            file_start = colon + 2
+          i = nl + 1
+
+        nm_cache[filename] = parse_symbols(results[file_start:])
+      else:
+        # We only dispatched a single file, we can just parse that directly
+        # to the output.
+        nm_cache[filename] = parse_symbols(results)
+
+    # Any .a files that have multiple .o files will have hard time parsing. Scan those
+    # sequentially to confirm. TODO: Move this to use run_multiple_processes()
+    # when available.
+    for f in files:
+      if f not in nm_cache:
+        nm_cache[f] = llvm_nm(f)
+
+  return [nm_cache[f] for f in files]
+
 
-    for i, file in enumerate(files):
-      if object_contents[i].returncode != 0:
-        logger.debug('llvm-nm failed on file ' + file + ': return code ' + str(object_contents[i].returncode) + ', error: ' + object_contents[i].output)
-      nm_cache[file] = object_contents[i]
-    return object_contents
+def llvm_nm(file):
+  return llvm_nm_multiple([file])[0]
 
 
 def read_link_inputs(files):
@@ -417,7 +473,7 @@ def clean_at_exit():
 
     # Next, extract symbols from all object files (either standalone or inside archives we just extracted)
     # The results are not used here directly, but populated to llvm-nm cache structure.
-    parallel_llvm_nm(object_names)
+    llvm_nm_multiple(object_names)
 
 
 def llvm_backend_args():
@@ -754,34 +810,6 @@ def parse_symbols(output):
   return ObjectFileInfo(0, None, set(defs), set(undefs), set(commons))
 
 
-def llvm_nm_uncached(filename, stdout=PIPE, stderr=PIPE):
-  # LLVM binary ==> list of symbols
-  proc = run_process([LLVM_NM, filename], stdout=stdout, stderr=stderr, check=False)
-  if proc.returncode == 0:
-    return parse_symbols(proc.stdout)
-  else:
-    return ObjectFileInfo(proc.returncode, str(proc.stdout) + str(proc.stderr))
-
-
-def llvm_nm(filename, stdout=PIPE, stderr=PIPE):
-  # Always use absolute paths to maximize cache usage
-  filename = os.path.abspath(filename)
-
-  if filename in nm_cache:
-    return nm_cache[filename]
-
-  ret = llvm_nm_uncached(filename, stdout, stderr)
-
-  if ret.returncode != 0:
-    logger.debug('llvm-nm failed on file ' + filename + ': return code ' + str(ret.returncode) + ', error: ' + ret.output)
-
-  # Even if we fail, write the results to the NM cache so that we don't keep trying to llvm-nm the
-  # failing file again later.
-  nm_cache[filename] = ret
-
-  return ret
-
-
 def emcc(filename, args=[], output_filename=None, stdout=None, stderr=None, env=None):
   if output_filename is None:
     output_filename = filename + '.o'
diff --git a/tools/system_libs.py b/tools/system_libs.py
@@ -1394,7 +1394,7 @@ def add_reverse_deps(need):
       add_reverse_deps(need) # recurse to get deps of deps
 
   # Scan symbols
-  symbolses = building.parallel_llvm_nm([os.path.abspath(t) for t in input_files])
+  symbolses = building.llvm_nm_multiple([os.path.abspath(t) for t in input_files])
 
   warn_on_unexported_main(symbolses)