coverage: introduce an engine runner to correctly handle instantiations

The different handled engines can have different way of being run,
especially if they're being run against one testcase or a full corpus.
This CL adds more flexibility onto correctly handling those different
fuzzers.

Change-Id: I0a09ba25e258c0c261abc57b1778db27645c64b9
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6429608
Auto-Submit: Paul Semel <[email protected]>
Reviewed-by: Weizhong Xia <[email protected]>
Reviewed-by: Ali Hijazi <[email protected]>
Commit-Queue: Ali Hijazi <[email protected]>
Cr-Commit-Position: refs/heads/main@{#1442395}
NOKEYCHECK=True
GitOrigin-RevId: 7865028a663d4e540719d9c18a0c044852e98e17
diff --git a/run_all_fuzzers.py b/run_all_fuzzers.py
index 7631852..312858b 100644
--- a/run_all_fuzzers.py
+++ b/run_all_fuzzers.py
@@ -9,15 +9,18 @@
 """
 
 import argparse
+import abc
+import dataclasses
 import glob
 import json
 import math
 import os
 import subprocess
 import sys
+import tempfile
 
 from multiprocessing import Process, Manager, cpu_count, Pool
-from typing import Mapping, Sequence
+from typing import Mapping, Sequence, Optional
 
 WHOLE_CORPUS_RETRIES = 2
 WHOLE_CORPUS_TIMEOUT_SECS = 1200
@@ -34,6 +37,118 @@
 LLVM_PROFDATA = 'third_party/llvm-build/Release+Asserts/bin/llvm-profdata'
 
 
+class EngineRunner(abc.ABC):
+  """This class abstracts running different engines against a full corpus or a
+  bunch of testcases. Implementers might provide different running commands
+  depending on the parameters.
+  """
+
+  @abc.abstractmethod
+  def run_full_corpus(self, env: Mapping[str, str], timeout: float,
+                      annotation: str, corpus_dir: Optional[str]) -> bool:
+    """Runs the current engine against the full corpus. It returns True if the
+    command succeeded and False otherwise.
+
+    Args:
+        env: the extra environment to forward to the command.
+        timeout: the potential timeout for the command.
+        annotation: some annotations for the command.
+        corpus_dir: optional corpus directory to run the engine against. If
+        None, this will run the target without any testcase (does nothing).
+
+    Returns:
+        whether the run succeed.
+    """
+    pass
+
+  @abc.abstractmethod
+  def run_testcases(self, env: Mapping[str, str], timeout: float,
+                    annotation: str, testcases: Sequence[str]) -> bool:
+    """Runs the current engine against some testcases (can be one). It returns
+    True if the command succeeded and False otherwise.
+
+    Args:
+        env: the extra environment to forward to the command.
+        timeout: the potential timeout for the command.
+        annotation: some annotations for the command.
+        testcases: the sequence of testcases.
+
+    Returns:
+        whether the run succeed.
+    """
+    pass
+
+  def _run_command(self, cmd: Sequence[str], env: Mapping[str, str],
+                   timeout: float, annotation: str):
+    _run_and_log(cmd, env, timeout, annotation)
+
+
[email protected]
+class CmdRunner(EngineRunner):
+  """A simple command runner. Depending on whether it's running in full corpus
+  mode or testcases mode, this will simply append the extra parameters at the
+  end of the provided command.
+  """
+  cmd: Sequence[str]
+
+  def run_full_corpus(self, env: Mapping[str, str], timeout: float,
+                      annotation: str, corpus_dir: Optional[str]) -> bool:
+    run_cmd = self.cmd
+    if corpus_dir:
+      run_cmd += [corpus_dir]
+    return self._run_command(run_cmd, env, timeout, annotation)
+
+  def run_testcases(self, env: Mapping[str, str], timeout: float,
+                    annotation: str, testcases: Sequence[str]) -> bool:
+    return self._run_command(self.cmd + testcases, env, timeout, annotation)
+
+
[email protected]
+class CentipedeRunner(EngineRunner):
+  """Runs a given target with the centipede fuzzing engine.
+  """
+  centipede_path: str
+  fuzz_target_path: str
+
+  def run_full_corpus(self, env: Mapping[str, str], timeout: float,
+                      annotation: str, corpus_dir: Optional[str]) -> bool:
+    dir = tempfile.TemporaryDirectory()
+    cmd = [
+        self.centipede_path, f'-binary={self.fuzz_target_path}',
+        '-shmem_size_mb=4096', '-address_space_limit_mb=0', '-rss_limit_mb=0',
+        '-symbolizer_path=/dev/null', '-num_runs=0', '-require_pc_table=false',
+        f'-workdir={dir.name}', '-populate_binary_info=false',
+        '-ignore_timeout_reports=true'
+    ]
+    if corpus_dir:
+      cmd += [f'-corpus_dir={corpus_dir}']
+    return self._run_command(cmd, env, timeout, annotation)
+
+  def run_testcases(self, env: Mapping[str, str], timeout: float,
+                    annotation: str, testcases: Sequence[str]) -> bool:
+    return self._run_command([self.fuzz_target_path] + testcases, env, timeout,
+                             annotation)
+
+
[email protected]
+class FuzzilliRunner(CmdRunner):
+  """Runs a given target with Fuzzilli.
+  """
+  corpus_files: Sequence[str]
+
+  def run_full_corpus(self, env: Mapping[str, str], timeout: float,
+                      annotation: str, corpus_dir: Optional[str]) -> bool:
+    # We are not reading the whole directory, since this might generate too
+    # long command lines, but we're rather using the corpus_files we were
+    # passed as arguments.
+    if not corpus_dir:
+      corpus_dir = ""
+    return self._run_command(
+        self.cmd +
+        [os.path.join(corpus_dir, file) for file in self.corpus_files], env,
+        timeout, annotation)
+
+
 def _profdata_merge(inputs: Sequence[str], output: str) -> bool:
   """Merges the given profraw files into a single file.
 
@@ -140,7 +255,7 @@
   failed_targets = args[2]
   num_targets = args[3]
   target = target_details['name']
-  cmd = target_details['cmd']
+  cmd_runner = target_details['cmd_runner']
   env = target_details['env']
   corpus_dir = target_details['corpus']
   corpus_files = target_details['files']
@@ -153,15 +268,11 @@
 
   fullcorpus_profraw = os.path.join(profraw_dir, target + "_%p.profraw")
   env['LLVM_PROFILE_FILE'] = fullcorpus_profraw
-  fullcorpus_cmd = cmd.copy()
-  if corpus_files not in [None, '*']:
-    # Fuzzilli's case
-    jsfiles = corpus_files.split()
-    fullcorpus_cmd.extend([os.path.join(corpus_dir, file) for file in jsfiles])
+
   _erase_profraws(fullcorpus_profraw)
   for i in range(WHOLE_CORPUS_RETRIES):
-    ok = _run_and_log(fullcorpus_cmd, env, WHOLE_CORPUS_TIMEOUT_SECS,
-                      f"full corpus attempt {i}")
+    ok = cmd_runner.run_full_corpus(env, WHOLE_CORPUS_TIMEOUT_SECS,
+                                    f"full corpus attempt {i}", corpus_dir)
     if ok:
       break
 
@@ -188,12 +299,10 @@
       specific_test_case_profraw = os.path.join(
           profraw_dir, target + "_" + str(count) + "_%p.profraw")
       test_case = os.path.join(corpus_dir, corpus_entry)
-      specific_test_case_cmd = cmd + [test_case]
       env['LLVM_PROFILE_FILE'] = specific_test_case_profraw
       _erase_profraws(specific_test_case_profraw)
-      _run_and_log(specific_test_case_cmd, env,
-                   INDIVIDUAL_TESTCASE_TIMEOUT_SECS,
-                   f"specific test case {count}")
+      cmd_runner.run_testcases(env, INDIVIDUAL_TESTCASE_TIMEOUT_SECS,
+                               f"specific test case {count}", [test_case])
       resulting_profraws = list(_matching_profraws(specific_test_case_profraw))
       if resulting_profraws:
         # We recorded valid profraws, let's merge them into
@@ -295,23 +404,15 @@
       if 'DISPLAY' in os.environ:
         # Inherit X settings from the real environment
         env['DISPLAY'] = os.environ['DISPLAY']
+      # This is necessary because some of our fuzzers are having redefinitions
+      # due to some dependencies redefining symbols.
+      env['ASAN_OPTIONS'] = 'detect_odr_violation=0'
       if args.fuzzer == CENTIPEDE:
-        # Centipede RunnerMain will by default set the watchdog thread to all
-        # zeros, which means we don't need to worry about rss_limit_mb or
-        # timeouts.
-        cmd = [fuzzer_target_binpath]
-        # The centipede fuzzing target needs to have all the files listed as
-        # inputs. Unfortunately, this means that if any of the testcases fails,
-        # we won't have coverage for any files in the corpus. For that reason,
-        # we prefer listing the files and fallback on gathering profiles per
-        # testcase if that happens.
-        files = ' '.join(os.listdir(fuzzer_target_corporadir))
+        cmd = CentipedeRunner(centipede_path=centipede_target_binpath,
+                              fuzz_target_path=fuzzer_target_binpath)
       else:  # libfuzzer
-        cmd = [
-            fuzzer_target_binpath, '-runs=0', '-rss_limit_mb=8192',
-            fuzzer_target_corporadir
-        ]
-        files = '*'
+        cmd = CmdRunner(
+            [fuzzer_target_binpath, '-runs=0', '-rss_limit_mb=8192'])
       all_target_details.append({
           'name':
           fuzzer_target,
@@ -323,12 +424,12 @@
           env,
           # RSS limit 8GB. Some of our fuzzers which involve running significant
           # chunks of Chromium code require more than the 2GB default.
-          'cmd':
+          'cmd_runner':
           cmd,
           'corpus':
           fuzzer_target_corporadir,
           'files':
-          files
+          '*'
       })
 
   # We also want to run ./chrome without a valid X server.
@@ -353,7 +454,8 @@
         os.path.join(REPORT_DIR, "chrome.profdata"),
         'env':
         env,
-        'cmd': [chrome_target_binpath],
+        'cmd_runner':
+        CmdRunner([chrome_target_binpath]),
         'corpus':
         None,
         'files':
@@ -399,8 +501,8 @@
           os.path.join(REPORT_DIR, f'{corpora_dir}_{i}.profdata'),
           'env':
           dict(),
-          'cmd':
-          cmd,
+          'cmd_runner':
+          FuzzilliRunner(cmd=cmd, corpus_files=chunk),
           'corpus':
           path_to_js_dir,
           'files':