coverage: introduce an engine runner to correctly handle instantiations
The different handled engines can have different way of being run,
especially if they're being run against one testcase or a full corpus.
This CL adds more flexibility onto correctly handling those different
fuzzers.
Change-Id: I0a09ba25e258c0c261abc57b1778db27645c64b9
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6429608
Auto-Submit: Paul Semel <[email protected]>
Reviewed-by: Weizhong Xia <[email protected]>
Reviewed-by: Ali Hijazi <[email protected]>
Commit-Queue: Ali Hijazi <[email protected]>
Cr-Commit-Position: refs/heads/main@{#1442395}
NOKEYCHECK=True
GitOrigin-RevId: 7865028a663d4e540719d9c18a0c044852e98e17
diff --git a/run_all_fuzzers.py b/run_all_fuzzers.py
index 7631852..312858b 100644
--- a/run_all_fuzzers.py
+++ b/run_all_fuzzers.py
@@ -9,15 +9,18 @@
"""
import argparse
+import abc
+import dataclasses
import glob
import json
import math
import os
import subprocess
import sys
+import tempfile
from multiprocessing import Process, Manager, cpu_count, Pool
-from typing import Mapping, Sequence
+from typing import Mapping, Sequence, Optional
WHOLE_CORPUS_RETRIES = 2
WHOLE_CORPUS_TIMEOUT_SECS = 1200
@@ -34,6 +37,118 @@
LLVM_PROFDATA = 'third_party/llvm-build/Release+Asserts/bin/llvm-profdata'
+class EngineRunner(abc.ABC):
+ """This class abstracts running different engines against a full corpus or a
+ bunch of testcases. Implementers might provide different running commands
+ depending on the parameters.
+ """
+
+ @abc.abstractmethod
+ def run_full_corpus(self, env: Mapping[str, str], timeout: float,
+ annotation: str, corpus_dir: Optional[str]) -> bool:
+ """Runs the current engine against the full corpus. It returns True if the
+ command succeeded and False otherwise.
+
+ Args:
+ env: the extra environment to forward to the command.
+ timeout: the potential timeout for the command.
+ annotation: some annotations for the command.
+ corpus_dir: optional corpus directory to run the engine against. If
+ None, this will run the target without any testcase (does nothing).
+
+ Returns:
+ whether the run succeed.
+ """
+ pass
+
+ @abc.abstractmethod
+ def run_testcases(self, env: Mapping[str, str], timeout: float,
+ annotation: str, testcases: Sequence[str]) -> bool:
+ """Runs the current engine against some testcases (can be one). It returns
+ True if the command succeeded and False otherwise.
+
+ Args:
+ env: the extra environment to forward to the command.
+ timeout: the potential timeout for the command.
+ annotation: some annotations for the command.
+ testcases: the sequence of testcases.
+
+ Returns:
+ whether the run succeed.
+ """
+ pass
+
+ def _run_command(self, cmd: Sequence[str], env: Mapping[str, str],
+ timeout: float, annotation: str):
+ _run_and_log(cmd, env, timeout, annotation)
+
+
[email protected]
+class CmdRunner(EngineRunner):
+ """A simple command runner. Depending on whether it's running in full corpus
+ mode or testcases mode, this will simply append the extra parameters at the
+ end of the provided command.
+ """
+ cmd: Sequence[str]
+
+ def run_full_corpus(self, env: Mapping[str, str], timeout: float,
+ annotation: str, corpus_dir: Optional[str]) -> bool:
+ run_cmd = self.cmd
+ if corpus_dir:
+ run_cmd += [corpus_dir]
+ return self._run_command(run_cmd, env, timeout, annotation)
+
+ def run_testcases(self, env: Mapping[str, str], timeout: float,
+ annotation: str, testcases: Sequence[str]) -> bool:
+ return self._run_command(self.cmd + testcases, env, timeout, annotation)
+
+
[email protected]
+class CentipedeRunner(EngineRunner):
+ """Runs a given target with the centipede fuzzing engine.
+ """
+ centipede_path: str
+ fuzz_target_path: str
+
+ def run_full_corpus(self, env: Mapping[str, str], timeout: float,
+ annotation: str, corpus_dir: Optional[str]) -> bool:
+ dir = tempfile.TemporaryDirectory()
+ cmd = [
+ self.centipede_path, f'-binary={self.fuzz_target_path}',
+ '-shmem_size_mb=4096', '-address_space_limit_mb=0', '-rss_limit_mb=0',
+ '-symbolizer_path=/dev/null', '-num_runs=0', '-require_pc_table=false',
+ f'-workdir={dir.name}', '-populate_binary_info=false',
+ '-ignore_timeout_reports=true'
+ ]
+ if corpus_dir:
+ cmd += [f'-corpus_dir={corpus_dir}']
+ return self._run_command(cmd, env, timeout, annotation)
+
+ def run_testcases(self, env: Mapping[str, str], timeout: float,
+ annotation: str, testcases: Sequence[str]) -> bool:
+ return self._run_command([self.fuzz_target_path] + testcases, env, timeout,
+ annotation)
+
+
[email protected]
+class FuzzilliRunner(CmdRunner):
+ """Runs a given target with Fuzzilli.
+ """
+ corpus_files: Sequence[str]
+
+ def run_full_corpus(self, env: Mapping[str, str], timeout: float,
+ annotation: str, corpus_dir: Optional[str]) -> bool:
+ # We are not reading the whole directory, since this might generate too
+ # long command lines, but we're rather using the corpus_files we were
+ # passed as arguments.
+ if not corpus_dir:
+ corpus_dir = ""
+ return self._run_command(
+ self.cmd +
+ [os.path.join(corpus_dir, file) for file in self.corpus_files], env,
+ timeout, annotation)
+
+
def _profdata_merge(inputs: Sequence[str], output: str) -> bool:
"""Merges the given profraw files into a single file.
@@ -140,7 +255,7 @@
failed_targets = args[2]
num_targets = args[3]
target = target_details['name']
- cmd = target_details['cmd']
+ cmd_runner = target_details['cmd_runner']
env = target_details['env']
corpus_dir = target_details['corpus']
corpus_files = target_details['files']
@@ -153,15 +268,11 @@
fullcorpus_profraw = os.path.join(profraw_dir, target + "_%p.profraw")
env['LLVM_PROFILE_FILE'] = fullcorpus_profraw
- fullcorpus_cmd = cmd.copy()
- if corpus_files not in [None, '*']:
- # Fuzzilli's case
- jsfiles = corpus_files.split()
- fullcorpus_cmd.extend([os.path.join(corpus_dir, file) for file in jsfiles])
+
_erase_profraws(fullcorpus_profraw)
for i in range(WHOLE_CORPUS_RETRIES):
- ok = _run_and_log(fullcorpus_cmd, env, WHOLE_CORPUS_TIMEOUT_SECS,
- f"full corpus attempt {i}")
+ ok = cmd_runner.run_full_corpus(env, WHOLE_CORPUS_TIMEOUT_SECS,
+ f"full corpus attempt {i}", corpus_dir)
if ok:
break
@@ -188,12 +299,10 @@
specific_test_case_profraw = os.path.join(
profraw_dir, target + "_" + str(count) + "_%p.profraw")
test_case = os.path.join(corpus_dir, corpus_entry)
- specific_test_case_cmd = cmd + [test_case]
env['LLVM_PROFILE_FILE'] = specific_test_case_profraw
_erase_profraws(specific_test_case_profraw)
- _run_and_log(specific_test_case_cmd, env,
- INDIVIDUAL_TESTCASE_TIMEOUT_SECS,
- f"specific test case {count}")
+ cmd_runner.run_testcases(env, INDIVIDUAL_TESTCASE_TIMEOUT_SECS,
+ f"specific test case {count}", [test_case])
resulting_profraws = list(_matching_profraws(specific_test_case_profraw))
if resulting_profraws:
# We recorded valid profraws, let's merge them into
@@ -295,23 +404,15 @@
if 'DISPLAY' in os.environ:
# Inherit X settings from the real environment
env['DISPLAY'] = os.environ['DISPLAY']
+ # This is necessary because some of our fuzzers are having redefinitions
+ # due to some dependencies redefining symbols.
+ env['ASAN_OPTIONS'] = 'detect_odr_violation=0'
if args.fuzzer == CENTIPEDE:
- # Centipede RunnerMain will by default set the watchdog thread to all
- # zeros, which means we don't need to worry about rss_limit_mb or
- # timeouts.
- cmd = [fuzzer_target_binpath]
- # The centipede fuzzing target needs to have all the files listed as
- # inputs. Unfortunately, this means that if any of the testcases fails,
- # we won't have coverage for any files in the corpus. For that reason,
- # we prefer listing the files and fallback on gathering profiles per
- # testcase if that happens.
- files = ' '.join(os.listdir(fuzzer_target_corporadir))
+ cmd = CentipedeRunner(centipede_path=centipede_target_binpath,
+ fuzz_target_path=fuzzer_target_binpath)
else: # libfuzzer
- cmd = [
- fuzzer_target_binpath, '-runs=0', '-rss_limit_mb=8192',
- fuzzer_target_corporadir
- ]
- files = '*'
+ cmd = CmdRunner(
+ [fuzzer_target_binpath, '-runs=0', '-rss_limit_mb=8192'])
all_target_details.append({
'name':
fuzzer_target,
@@ -323,12 +424,12 @@
env,
# RSS limit 8GB. Some of our fuzzers which involve running significant
# chunks of Chromium code require more than the 2GB default.
- 'cmd':
+ 'cmd_runner':
cmd,
'corpus':
fuzzer_target_corporadir,
'files':
- files
+ '*'
})
# We also want to run ./chrome without a valid X server.
@@ -353,7 +454,8 @@
os.path.join(REPORT_DIR, "chrome.profdata"),
'env':
env,
- 'cmd': [chrome_target_binpath],
+ 'cmd_runner':
+ CmdRunner([chrome_target_binpath]),
'corpus':
None,
'files':
@@ -399,8 +501,8 @@
os.path.join(REPORT_DIR, f'{corpora_dir}_{i}.profdata'),
'env':
dict(),
- 'cmd':
- cmd,
+ 'cmd_runner':
+ FuzzilliRunner(cmd=cmd, corpus_files=chunk),
'corpus':
path_to_js_dir,
'files':