| # |
| # Copyright 2024 WebAssembly Community Group participants |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| ''' |
| ClusterFuzz run.py script: when run by ClusterFuzz, it uses wasm-opt to generate |
| a fixed number of testcases. This is a "blackbox fuzzer", see |
| |
| https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/ |
| |
| This file should be bundled up together with the other files it needs, see |
| bundle_clusterfuzz.py. |
| ''' |
| |
| import os |
| import getopt |
| import math |
| import random |
| import subprocess |
| import sys |
| |
| |
| # The V8 flags we put in the "fuzzer flags" files, which tell ClusterFuzz how to |
| # run V8. By default we apply all staging flags. |
| FUZZER_FLAGS_FILE_CONTENTS = '--wasm-staging' |
| |
| # Maximum size of the random data that we feed into wasm-opt -ttf. This is |
| # smaller than fuzz_opt.py's INPUT_SIZE_MAX because that script is tuned for |
| # fuzzing large wasm files (to reduce the overhead we have of launching many |
| # processes per file), which is less of an issue on ClusterFuzz. |
| MAX_RANDOM_SIZE = 15 * 1024 |
| |
| # Max and median amount of extra JS operations we append, like extra compiles or |
| # runs of the wasm. We allow a high max, but the median is far lower, so that |
| # typical testcases are not long-running. |
| MAX_EXTRA_JS_OPERATIONS = 40 |
| MEDIAN_EXTRA_JS_OPERATIONS = 2 |
| |
| # The prefix for fuzz files. |
| FUZZ_FILENAME_PREFIX = 'fuzz-' |
| |
| # The prefix for flags files. |
| FLAGS_FILENAME_PREFIX = 'flags-' |
| |
| # The name of the fuzzer (appears after FUZZ_FILENAME_PREFIX / |
| # FLAGS_FILENAME_PREFIX). |
| FUZZER_NAME_PREFIX = 'binaryen-' |
| |
| # The root directory of the bundle this will be in, which is the directory of |
| # this very file. |
| ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) |
| |
| # The path to the wasm-opt binary that we run to generate testcases. |
| FUZZER_BINARY_PATH = os.path.join(ROOT_DIR, 'bin', 'wasm-opt') |
| |
| # The path to the fuzz_shell.js script that will execute the wasm in each |
| # testcase. |
| JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js') |
| |
| # The arguments we provide to wasm-opt to generate wasm files. |
| FUZZER_ARGS = [ |
| # Generate a wasm from random data. |
| '--translate-to-fuzz', |
| # Run some random passes, to further shape the random wasm we emit. |
| '--fuzz-passes', |
| # Enable all features but disable ones not yet ready for fuzzing. This may |
| # be a smaller set than fuzz_opt.py, as that enables a few experimental |
| # flags, while here we just fuzz with d8's --wasm-staging. |
| '-all', |
| '--disable-shared-everything', |
| '--disable-fp16', |
| ] |
| |
| |
| # Returns the file name for fuzz or flags files. |
| def get_file_name(prefix, index): |
| return f'{prefix}{FUZZER_NAME_PREFIX}{index}.js' |
| |
| |
| # We should only use the system's random number generation, which is the best. |
| # (We also use urandom below, which uses this under the hood.) |
| system_random = random.SystemRandom() |
| |
| |
| # Generate a random wasm file, and return a string that creates a typed array of |
| # those bytes, suitable for use in a JS file, in the form |
| # |
| # new Uint8Array([..wasm_contents..]) |
| # |
| # Receives the testcase index and the output dir. |
| def get_wasm_contents(i, output_dir): |
| input_data_file_path = os.path.join(output_dir, f'{i}.input') |
| wasm_file_path = os.path.join(output_dir, f'{i}.wasm') |
| |
| # wasm-opt may fail to run in rare cases (when the fuzzer emits code it |
| # detects as invalid). Just try again in such a case. |
| for attempt in range(0, 100): |
| # Generate random data. |
| random_size = system_random.randint(1, MAX_RANDOM_SIZE) |
| with open(input_data_file_path, 'wb') as file: |
| file.write(os.urandom(random_size)) |
| |
| # Generate wasm from the random data. |
| cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS |
| cmd += ['-o', wasm_file_path, input_data_file_path] |
| try: |
| subprocess.check_call(cmd) |
| except subprocess.CalledProcessError: |
| # Try again. |
| print('(oops, retrying wasm-opt)') |
| attempt += 1 |
| if attempt == 99: |
| # Something is very wrong! |
| raise |
| continue |
| # Success, leave the loop. |
| break |
| |
| # Generate a testcase from the wasm |
| with open(wasm_file_path, 'rb') as file: |
| wasm_contents = file.read() |
| |
| # Clean up temp files. |
| os.remove(wasm_file_path) |
| os.remove(input_data_file_path) |
| |
| # Convert to a string, and wrap into a typed array. |
| wasm_contents = ','.join([str(c) for c in wasm_contents]) |
| return f'new Uint8Array([{wasm_contents}])' |
| |
| |
| # Returns the contents of a .js fuzz file, given the index of the testcase and |
| # the output dir. |
| def get_js_file_contents(i, output_dir): |
| # Start with the standard JS shell. |
| with open(JS_SHELL_PATH) as file: |
| js = file.read() |
| |
| # Prepend the wasm contents, so they are used (rather than the normal |
| # mechanism where the wasm file's name is provided in argv). |
| wasm_contents = get_wasm_contents(i, output_dir) |
| pre = f'var binary = {wasm_contents};\n' |
| bytes = wasm_contents.count(',') |
| |
| # Sometimes add a second wasm file as well. |
| has_second = False |
| if system_random.random() < 0.333: |
| has_second = True |
| wasm_contents = get_wasm_contents(i, output_dir) |
| pre += f'var secondBinary = {wasm_contents};\n' |
| bytes += wasm_contents.count(',') |
| |
| js = pre + '\n' + js |
| |
| # The default JS builds and runs the wasm. Append some random additional |
| # operations as well, as more compiles and executions can find things. To |
| # approximate a number in the range [0, MAX_EXTRA_JS_OPERATIONS) but with a |
| # median of MEDIAN_EXTRA_JS_OPERATIONS, start in the range [0, 1) and then |
| # raise it to the proper power, as multiplying by itself keeps the range |
| # unchanged, but lowers the median. Specifically, the median begins at 0.5, |
| # so |
| # |
| # 0.5^power = MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS |
| # |
| # is what we want, and if we take log2 of each side, gives us |
| # |
| # power = log2(MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS) / log2(0.5) |
| # = -log2(MEDIAN_EXTRA_JS_OPERATIONS / MAX_EXTRA_JS_OPERATIONS) |
| power = -math.log2(float(MEDIAN_EXTRA_JS_OPERATIONS) / MAX_EXTRA_JS_OPERATIONS) |
| x = system_random.random() |
| x = math.pow(x, power) |
| num = math.floor(x * MAX_EXTRA_JS_OPERATIONS) |
| assert num >= 0 and num <= MAX_EXTRA_JS_OPERATIONS |
| |
| extra_js_operations = [ |
| # Compile and link the wasm again. Each link adds more to the total |
| # exports that we can call. |
| 'build(binary);\n', |
| # Run all the exports we've accumulated. |
| 'callExports();\n', |
| ] |
| if has_second: |
| extra_js_operations += [ |
| 'build(secondBinary);\n', |
| ] |
| |
| for i in range(num): |
| js += system_random.choice(extra_js_operations) |
| |
| print(f'Created {bytes} wasm bytes') |
| |
| return js |
| |
| |
| def main(argv): |
| # Parse the options. See |
| # https://google.github.io/clusterfuzz/setting-up-fuzzing/blackbox-fuzzing/#uploading-a-fuzzer |
| output_dir = '.' |
| num = 100 |
| expected_flags = ['input_dir=', 'output_dir=', 'no_of_files='] |
| optlist, _ = getopt.getopt(argv[1:], '', expected_flags) |
| for option, value in optlist: |
| if option == '--output_dir': |
| output_dir = value |
| elif option == '--no_of_files': |
| num = int(value) |
| |
| for i in range(1, num + 1): |
| testcase_file_path = os.path.join(output_dir, |
| get_file_name(FUZZ_FILENAME_PREFIX, i)) |
| |
| # Emit the JS file. |
| js_file_contents = get_js_file_contents(i, output_dir) |
| with open(testcase_file_path, 'w') as file: |
| file.write(js_file_contents) |
| |
| # Emit a corresponding flags file. |
| flags_file_path = os.path.join(output_dir, |
| get_file_name(FLAGS_FILENAME_PREFIX, i)) |
| with open(flags_file_path, 'w') as file: |
| file.write(FUZZER_FLAGS_FILE_CONTENTS) |
| |
| print(f'Created testcase: {testcase_file_path}') |
| |
| print(f'Created {num} testcases.') |
| |
| |
| if __name__ == '__main__': |
| main(sys.argv) |