blob: 40abe2f76f00ba1b2607e7ddb84fd48d27f97c76 [file] [log] [blame] [edit]
/*
* Copyright 2017 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Loads wasm plus a list of functions that are global ctors, i.e.,
// are to be executed. It then executes as many of them as it can,
// applying their changes to memory etc as needed, then writes it. In
// other words, this executes code at compile time to speed up
// startup later.
//
#include <memory>
#include "asmjs/shared-constants.h"
#include "ir/global-utils.h"
#include "ir/import-utils.h"
#include "ir/literal-utils.h"
#include "ir/memory-utils.h"
#include "ir/names.h"
#include "pass.h"
#include "support/colors.h"
#include "support/file.h"
#include "support/small_set.h"
#include "support/string.h"
#include "tool-options.h"
#include "wasm-builder.h"
#include "wasm-interpreter.h"
#include "wasm-io.h"
#include "wasm-validator.h"
using namespace wasm;
namespace {
struct FailToEvalException {
std::string why;
FailToEvalException(std::string why) : why(why) {}
};
// The prefix for a recommendation, so it is aligned properly with the rest of
// the output.
#define RECOMMENDATION "\n recommendation: "
class EvallingModuleRunner : public ModuleRunnerBase<EvallingModuleRunner> {
public:
EvallingModuleRunner(
Module& wasm,
ExternalInterface* externalInterface,
std::map<Name, std::shared_ptr<EvallingModuleRunner>> linkedInstances_ = {})
: ModuleRunnerBase(wasm, externalInterface, linkedInstances_) {}
Flow visitGlobalGet(GlobalGet* curr) {
// Error on reads of imported globals.
auto* global = wasm.getGlobal(curr->name);
if (global->imported()) {
throw FailToEvalException(std::string("read from imported global ") +
global->module.toString() + "." +
global->base.toString());
}
return ModuleRunnerBase<EvallingModuleRunner>::visitGlobalGet(curr);
}
};
// Build an artificial `env` module based on a module's imports, so that the
// interpreter can use correct object instances. It initializes usable global
// imports, and fills the rest with fake values since those are dangerous to
// use. we will fail if dangerous globals are used.
std::unique_ptr<Module> buildEnvModule(Module& wasm) {
auto env = std::make_unique<Module>();
env->name = "env";
// create empty functions with similar signature
ModuleUtils::iterImportedFunctions(wasm, [&](Function* func) {
if (func->module == env->name) {
Builder builder(*env);
auto* copied = ModuleUtils::copyFunction(func, *env);
copied->module = Name();
copied->base = Name();
copied->body = builder.makeUnreachable();
env->addExport(
builder.makeExport(func->base, copied->name, ExternalKind::Function));
}
});
// create tables with similar initial and max values
ModuleUtils::iterImportedTables(wasm, [&](Table* table) {
if (table->module == env->name) {
auto* copied = ModuleUtils::copyTable(table, *env);
copied->module = Name();
copied->base = Name();
env->addExport(Builder(*env).makeExport(
table->base, copied->name, ExternalKind::Table));
}
});
ModuleUtils::iterImportedGlobals(wasm, [&](Global* global) {
if (global->module == env->name) {
auto* copied = ModuleUtils::copyGlobal(global, *env);
copied->module = Name();
copied->base = Name();
Builder builder(*env);
copied->init = builder.makeConst(Literal::makeZero(global->type));
env->addExport(
builder.makeExport(global->base, copied->name, ExternalKind::Global));
}
});
// create an exported memory with the same initial and max size
ModuleUtils::iterImportedMemories(wasm, [&](Memory* memory) {
if (memory->module == env->name) {
auto* copied = ModuleUtils::copyMemory(memory, *env);
copied->module = Name();
copied->base = Name();
env->addExport(Builder(*env).makeExport(
memory->base, copied->name, ExternalKind::Memory));
}
});
return env;
}
// Whether to ignore external input to the program as it runs. If set, we will
// assume that stdin is empty, that any env vars we try to read are not set,
// that there are not arguments passed to main, etc.
static bool ignoreExternalInput = false;
struct CtorEvalExternalInterface : EvallingModuleRunner::ExternalInterface {
Module* wasm;
EvallingModuleRunner* instance;
std::map<Name, std::shared_ptr<EvallingModuleRunner>> linkedInstances;
// A representation of the contents of wasm memory as we execute.
std::unordered_map<Name, std::vector<char>> memories;
CtorEvalExternalInterface(
std::map<Name, std::shared_ptr<EvallingModuleRunner>> linkedInstances_ =
{}) {
linkedInstances.swap(linkedInstances_);
}
// Called when we want to apply the current state of execution to the Module.
// Until this is called the Module is never changed.
void applyToModule() {
clearApplyState();
// If nothing was ever written to memories then there is nothing to update.
if (!memories.empty()) {
applyMemoryToModule();
}
applyGlobalsToModule();
}
void init(Module& wasm_, EvallingModuleRunner& instance_) override {
wasm = &wasm_;
instance = &instance_;
for (auto& memory : wasm->memories) {
if (!memory->imported()) {
std::vector<char> data;
memories[memory->name] = data;
}
}
}
void importGlobals(GlobalValueSet& globals, Module& wasm_) override {
ModuleUtils::iterImportedGlobals(wasm_, [&](Global* global) {
auto it = linkedInstances.find(global->module);
if (it != linkedInstances.end()) {
auto* inst = it->second.get();
auto* globalExport = inst->wasm.getExportOrNull(global->base);
if (!globalExport) {
throw FailToEvalException(std::string("importGlobals: ") +
global->module.toString() + "." +
global->base.toString());
}
globals[global->name] = inst->globals[globalExport->value];
} else {
throw FailToEvalException(std::string("importGlobals: ") +
global->module.toString() + "." +
global->base.toString());
}
});
}
Literals callImport(Function* import, Literals& arguments) override {
Name WASI("wasi_snapshot_preview1");
if (ignoreExternalInput) {
if (import->module == WASI) {
if (import->base == "environ_sizes_get") {
if (arguments.size() != 2 || arguments[0].type != Type::i32 ||
import->getResults() != Type::i32) {
throw FailToEvalException("wasi environ_sizes_get has wrong sig");
}
// Write out a count of i32(0) and return __WASI_ERRNO_SUCCESS (0).
store32(arguments[0].geti32(), 0, wasm->memories[0]->name);
return {Literal(int32_t(0))};
}
if (import->base == "environ_get") {
if (arguments.size() != 2 || arguments[0].type != Type::i32 ||
import->getResults() != Type::i32) {
throw FailToEvalException("wasi environ_get has wrong sig");
}
// Just return __WASI_ERRNO_SUCCESS (0).
return {Literal(int32_t(0))};
}
if (import->base == "args_sizes_get") {
if (arguments.size() != 2 || arguments[0].type != Type::i32 ||
import->getResults() != Type::i32) {
throw FailToEvalException("wasi args_sizes_get has wrong sig");
}
// Write out an argc of i32(0) and return a __WASI_ERRNO_SUCCESS (0).
store32(arguments[0].geti32(), 0, wasm->memories[0]->name);
return {Literal(int32_t(0))};
}
if (import->base == "args_get") {
if (arguments.size() != 2 || arguments[0].type != Type::i32 ||
import->getResults() != Type::i32) {
throw FailToEvalException("wasi args_get has wrong sig");
}
// Just return __WASI_ERRNO_SUCCESS (0).
return {Literal(int32_t(0))};
}
// Otherwise, we don't recognize this import; continue normally to
// error.
}
}
std::string extra;
if (import->module == ENV && import->base == "___cxa_atexit") {
extra = RECOMMENDATION "build with -s NO_EXIT_RUNTIME=1 so that calls "
"to atexit are not emitted";
} else if (import->module == WASI && !ignoreExternalInput) {
extra = RECOMMENDATION "consider --ignore-external-input";
}
throw FailToEvalException(std::string("call import: ") +
import->module.toString() + "." +
import->base.toString() + extra);
}
// We assume the table is not modified FIXME
Literals callTable(Name tableName,
Index index,
HeapType sig,
Literals& arguments,
Type result,
EvallingModuleRunner& instance) override {
std::unordered_map<wasm::Name, std::vector<wasm::Name>>::iterator it;
auto* table = wasm->getTableOrNull(tableName);
if (!table) {
throw FailToEvalException("callTable on non-existing table");
}
// Look through the segments and find the function. Segments can overlap,
// so we want the last one.
Name targetFunc;
for (auto& segment : wasm->elementSegments) {
if (segment->table != tableName) {
continue;
}
Index start;
// look for the index in this segment. if it has a constant offset, we
// look in the proper range. if it instead gets a global, we rely on the
// fact that when not dynamically linking then the table is loaded at
// offset 0.
if (auto* c = segment->offset->dynCast<Const>()) {
start = c->value.getInteger();
} else if (segment->offset->is<GlobalGet>()) {
start = 0;
} else {
// wasm spec only allows const and global.get there
WASM_UNREACHABLE("invalid expr type");
}
auto end = start + segment->data.size();
if (start <= index && index < end) {
auto entry = segment->data[index - start];
if (auto* get = entry->dynCast<RefFunc>()) {
targetFunc = get->func;
} else {
throw FailToEvalException(
std::string("callTable on uninitialized entry"));
}
}
}
if (!targetFunc.is()) {
throw FailToEvalException(
std::string("callTable on index not found in static segments: ") +
std::to_string(index));
}
// If this is one of our functions, we can call it; if it was
// imported, fail.
auto* func = wasm->getFunction(targetFunc);
if (func->type != sig) {
throw FailToEvalException(std::string("callTable signature mismatch: ") +
targetFunc.toString());
}
if (!func->imported()) {
return instance.callFunctionInternal(targetFunc, arguments);
} else {
throw FailToEvalException(
std::string("callTable on imported function: ") +
targetFunc.toString());
}
}
Index tableSize(Name tableName) override {
throw FailToEvalException("table size");
}
Literal tableLoad(Name tableName, Index index) override {
throw FailToEvalException("table.get: TODO");
}
// called during initialization
void tableStore(Name tableName, Index index, const Literal& value) override {}
int8_t load8s(Address addr, Name memoryName) override {
return doLoad<int8_t>(addr, memoryName);
}
uint8_t load8u(Address addr, Name memoryName) override {
return doLoad<uint8_t>(addr, memoryName);
}
int16_t load16s(Address addr, Name memoryName) override {
return doLoad<int16_t>(addr, memoryName);
}
uint16_t load16u(Address addr, Name memoryName) override {
return doLoad<uint16_t>(addr, memoryName);
}
int32_t load32s(Address addr, Name memoryName) override {
return doLoad<int32_t>(addr, memoryName);
}
uint32_t load32u(Address addr, Name memoryName) override {
return doLoad<uint32_t>(addr, memoryName);
}
int64_t load64s(Address addr, Name memoryName) override {
return doLoad<int64_t>(addr, memoryName);
}
uint64_t load64u(Address addr, Name memoryName) override {
return doLoad<uint64_t>(addr, memoryName);
}
void store8(Address addr, int8_t value, Name memoryName) override {
doStore<int8_t>(addr, value, memoryName);
}
void store16(Address addr, int16_t value, Name memoryName) override {
doStore<int16_t>(addr, value, memoryName);
}
void store32(Address addr, int32_t value, Name memoryName) override {
doStore<int32_t>(addr, value, memoryName);
}
void store64(Address addr, int64_t value, Name memoryName) override {
doStore<int64_t>(addr, value, memoryName);
}
bool growMemory(Name memoryName,
Address /*oldSize*/,
Address /*newSize*/) override {
throw FailToEvalException("grow memory");
}
bool growTable(Name /*name*/,
const Literal& /*value*/,
Index /*oldSize*/,
Index /*newSize*/) override {
throw FailToEvalException("grow table");
}
void trap(const char* why) override {
throw FailToEvalException(std::string("trap: ") + why);
}
void hostLimit(const char* why) override {
throw FailToEvalException(std::string("trap: ") + why);
}
void throwException(const WasmException& exn) override {
std::stringstream ss;
ss << "exception thrown: " << exn;
throw FailToEvalException(ss.str());
}
private:
// TODO: handle unaligned too, see shell-interface
template<typename T> T* getMemory(Address address, Name memoryName) {
auto it = memories.find(memoryName);
assert(it != memories.end());
auto& memory = it->second;
// resize the memory buffer as needed.
auto max = address + sizeof(T);
if (max > memory.size()) {
memory.resize(max);
}
return (T*)(&memory[address]);
}
template<typename T> void doStore(Address address, T value, Name memoryName) {
// do a memcpy to avoid undefined behavior if unaligned
memcpy(getMemory<T>(address, memoryName), &value, sizeof(T));
}
template<typename T> T doLoad(Address address, Name memoryName) {
// do a memcpy to avoid undefined behavior if unaligned
T ret;
memcpy(&ret, getMemory<T>(address, memoryName), sizeof(T));
return ret;
}
// Clear the state of the operation of applying the interpreter's runtime
// information into the module.
//
// This happens each time we apply contents to the module, which is basically
// once per ctor function, but can be more fine-grained also if we execute a
// line at a time.
void clearApplyState() {
// The process of allocating "defining globals" begins here, from scratch
// each time (things live before may no longer be).
definingGlobals.clear();
// When we start to apply the state there should be no previous state left
// over.
assert(seenDataStack.empty());
}
void applyMemoryToModule() {
// Memory must have already been flattened into the standard form: one
// segment at offset 0, or none.
if (wasm->dataSegments.empty()) {
Builder builder(*wasm);
auto curr = builder.makeDataSegment();
curr->offset = builder.makeConst(int32_t(0));
curr->setName(Name::fromInt(0), false);
curr->memory = wasm->memories[0]->name;
wasm->addDataSegment(std::move(curr));
}
auto& segment = wasm->dataSegments[0];
assert(segment->offset->cast<Const>()->value.getInteger() == 0);
// Copy the current memory contents after execution into the Module's
// memory.
segment->data = memories[wasm->memories[0]->name];
}
// Serializing GC data requires more work than linear memory, because
// allocations have an identity, and they are created using struct.new /
// array.new, which we must emit in a proper location in the wasm. This
// affects how we serialize globals, which can contain GC data, and also, we
// use globals to store GC data, so overall the process of computing the
// globals is where most of the GC logic ends up.
//
// The general idea for handling GC data is as follows: After evaluating the
// code, we end up with some live allocations in the interpreter, which we
// need to somehow serialize into the wasm module. We will put each such live
// GC data item into its own "defining global", a global whose purpose is to
// create and store that data. Each such global is immutable, and has the
// exact type of the data, for simplicity. Every other reference to that GC
// data in the interpreter's memory can then be serialized by simply emitting
// a global.get of that defining global.
void applyGlobalsToModule() {
Builder builder(*wasm);
if (!wasm->features.hasGC()) {
// Without GC, we can simply serialize the globals in place as they are.
for (const auto& [name, values] : instance->globals) {
wasm->getGlobal(name)->init = getSerialization(values);
}
return;
}
// We need to emit the "defining globals" of GC data before the existing
// globals, as the normal ones may refer to them. We do this by removing all
// the existing globals, and then adding them one by one, during which time
// we call getSerialization() for their init expressions. If their init
// refes to GC data, then we will allocate a defining global for that data,
// and refer to it. Put another way, we place the existing globals back into
// the module one at a time, adding their dependencies as we go.
auto oldGlobals = std::move(wasm->globals);
wasm->updateMaps();
for (auto& oldGlobal : oldGlobals) {
// Serialize the global's value. While doing so, pass in the name of this
// global, as we may be able to reuse the global as the defining global
// for the value. See getSerialization() for more details.
Name name;
if (!oldGlobal->mutable_ && oldGlobal->type == oldGlobal->init->type) {
// This has the properties we need of a defining global - immutable and
// of the precise type - so use it.
name = oldGlobal->name;
}
// If there is a value here to serialize, do so. (If there is no value,
// then this global was added after the interpreter initialized the
// module, which means it is a new global we've added since; we don't need
// to do anything for such a global - if it is needed it will show up as a
// dependency of something, and be emitted at the right time and place.)
auto iter = instance->globals.find(oldGlobal->name);
if (iter != instance->globals.end()) {
oldGlobal->init = getSerialization(iter->second, name);
wasm->addGlobal(std::move(oldGlobal));
}
}
}
public:
// Maps each GC data in the interpreter to its defining global: the global in
// which it is created, and then all other users of it can just global.get
// that.
std::unordered_map<GCData*, Name> definingGlobals;
// The data we have seen so far on the stack. This is used to guard against
// infinite recursion, which would otherwise happen if there is a cycle among
// the live objects, which we don't handle yet.
//
// Pick a constant of 2 here to handle the common case of an object with a
// reference to another object that is already in a defining global.
SmallSet<GCData*, 2> seenDataStack;
// If |possibleDefiningGlobal| is provided, it is the name of a global that we
// are in the init expression of, and which can be reused as defining global,
// if the other conditions are suitable.
Expression* getSerialization(const Literal& value,
Name possibleDefiningGlobal = Name()) {
Builder builder(*wasm);
if (!value.isData()) {
// This can be handled normally.
return builder.makeConstantExpression(value);
}
// This is GC data, which we must handle in a more careful way.
auto* data = value.getGCData().get();
assert(data);
// There was actual GC data allocated here.
auto type = value.type;
auto& definingGlobal = definingGlobals[data];
if (!definingGlobal.is()) {
// This is the first usage of this allocation. Generate a struct.new /
// array.new for it.
auto& values = value.getGCData()->values;
std::vector<Expression*> args;
// The initial values for this allocation may themselves be GC
// allocations. Recurse and add globals as necessary.
// TODO: Handle cycles. That will require code in the start function. For
// now, just error if we detect an infinite recursion.
if (seenDataStack.count(data)) {
Fatal() << "Cycle in live GC data, which we cannot serialize yet.";
}
seenDataStack.insert(data);
for (auto& value : values) {
args.push_back(getSerialization(value));
}
seenDataStack.erase(data);
Expression* init;
auto heapType = type.getHeapType();
if (heapType.isStruct()) {
init = builder.makeStructNew(heapType, args);
} else if (heapType.isArray()) {
// TODO: for repeated identical values, can use ArrayNew
init = builder.makeArrayInit(heapType, args);
} else {
WASM_UNREACHABLE("bad gc type");
}
if (possibleDefiningGlobal.is()) {
// No need to allocate a new global, as we are in the definition of
// one. Just return the initialization expression, which will be
// placed in that global's |init| field, and first note this as the
// defining global.
definingGlobal = possibleDefiningGlobal;
return init;
}
// Allocate a new defining global.
auto name = Names::getValidGlobalName(*wasm, "ctor-eval$global");
wasm->addGlobal(builder.makeGlobal(name, type, init, Builder::Immutable));
definingGlobal = name;
}
// Refer to this GC allocation by reading from the global that is
// designated to contain it.
return builder.makeGlobalGet(definingGlobal, value.type);
}
Expression* getSerialization(const Literals& values,
Name possibleDefiningGlobal = Name()) {
assert(values.size() == 1);
return getSerialization(values[0], possibleDefiningGlobal);
}
};
// The outcome of evalling a ctor is one of three states:
//
// 1. We failed to eval it completely (but perhaps we succeeded partially). In
// that case the std::optional here contains nothing.
// 2. We evalled it completely, and it is a function with no return value, so
// it contains an empty Literals.
// 3. We evalled it completely, and it is a function with a return value, so
// it contains Literals with those results.
using EvalCtorOutcome = std::optional<Literals>;
// Eval a single ctor function. Returns whether we succeeded to completely
// evaluate the ctor (which means that the caller can proceed to try to eval
// further ctors if there are any), and if we did, the results if the function
// returns any.
EvalCtorOutcome evalCtor(EvallingModuleRunner& instance,
CtorEvalExternalInterface& interface,
Name funcName,
Name exportName) {
auto& wasm = instance.wasm;
auto* func = wasm.getFunction(funcName);
// We don't know the values of parameters, so give up if there are any, unless
// we are ignoring them.
if (func->getNumParams() > 0 && !ignoreExternalInput) {
std::cout << " ...stopping due to params\n";
std::cout << RECOMMENDATION "consider --ignore-external-input";
return EvalCtorOutcome();
}
// If there are params, we are ignoring them (or we would have quit earlier);
// set those up with zeros.
// TODO: Have a safer option here, either
// 1. Statically or dynamically stop evalling when a param is actually
// used, or
// 2. Split out --ignore-external-input into separate flags.
Literals params;
for (Index i = 0; i < func->getNumParams(); i++) {
auto type = func->getLocalType(i);
if (!LiteralUtils::canMakeZero(type)) {
std::cout << " ...stopping due to non-zeroable param\n";
return EvalCtorOutcome();
}
params.push_back(Literal::makeZero(type));
}
// We want to handle the form of the global constructor function in LLVM. That
// looks like this:
//
// (func $__wasm_call_ctors
// (call $ctor.1)
// (call $ctor.2)
// (call $ctor.3)
// )
//
// Some of those ctors may be inlined, however, which would mean that the
// function could have locals, control flow, etc. However, we assume for now
// that it does not have parameters at least (whose values we can't tell).
// And for now we look for a toplevel block and process its children one at a
// time. This allows us to eval some of the $ctor.* functions (or their
// inlined contents) even if not all.
//
// TODO: Support complete partial evalling, that is, evaluate parts of an
// arbitrary function, and not just a sequence in a single toplevel
// block.
if (auto* block = func->body->dynCast<Block>()) {
// Go through the items in the block and try to execute them. We do all this
// in a single function scope for all the executions.
EvallingModuleRunner::FunctionScope scope(func, params, instance);
// After we successfully eval a line we will apply the changes here. This is
// the same idea as applyToModule() - we must only do it after an entire
// atomic "chunk" has been processed, we do not want partial updates from
// an item in the block that we only partially evalled.
std::vector<Literals> appliedLocals;
Literals results;
Index successes = 0;
for (auto* curr : block->list) {
Flow flow;
try {
flow = instance.visit(curr);
} catch (FailToEvalException& fail) {
if (successes == 0) {
std::cout << " ...stopping (in block) since could not eval: "
<< fail.why << "\n";
} else {
std::cout << " ...partial evalling successful, but stopping since "
"could not eval: "
<< fail.why << "\n";
}
break;
}
// So far so good! Apply the results.
interface.applyToModule();
appliedLocals = scope.locals;
successes++;
// Note the values here, if any. If we are exiting the function now then
// these will be returned.
results = flow.values;
if (flow.breaking()) {
// We are returning out of the function (either via a return, or via a
// break to |block|, which has the same outcome. That means we don't
// need to execute any more lines, and can consider them to be executed.
std::cout << " ...stopping in block due to break\n";
// Mark us as having succeeded on the entire block, since we have: we
// are skipping the rest, which means there is no problem there. We must
// set this here so that lower down we realize that we've evalled
// everything.
successes = block->list.size();
break;
}
}
if (successes > 0 && successes < block->list.size()) {
// We managed to eval some but not all. That means we can't just remove
// the entire function, but need to keep parts of it - the parts we have
// not evalled - around. To do so, we create a copy of the function with
// the partially-evalled contents and make the export use that (as the
// function may be used in other places than the export, which we do not
// want to affect).
auto copyName = Names::getValidFunctionName(wasm, funcName);
auto* copyFunc = ModuleUtils::copyFunction(func, wasm, copyName);
wasm.getExport(exportName)->value = copyName;
// Remove the items we've evalled.
Builder builder(wasm);
auto* copyBlock = copyFunc->body->cast<Block>();
for (Index i = 0; i < successes; i++) {
copyBlock->list[i] = builder.makeNop();
}
// Write out the values of locals, that is the local state after evalling
// the things we've just nopped. For simplicity we just write out all of
// locals, and leave it to the optimizer to remove redundant or
// unnecessary operations.
std::vector<Expression*> localSets;
for (Index i = 0; i < copyFunc->getNumLocals(); i++) {
auto value = appliedLocals[i];
localSets.push_back(
builder.makeLocalSet(i, interface.getSerialization(value)));
}
// Put the local sets at the front of the block. We know there must be a
// nop in that position (since we've evalled at least one item in the
// block, and replaced it with a nop), so we can overwrite it.
copyBlock->list[0] = builder.makeBlock(localSets);
// Interesting optimizations may be possible both due to removing some but
// not all of the code, and due to the locals we just added.
PassRunner passRunner(&wasm,
PassOptions::getWithDefaultOptimizationOptions());
passRunner.addDefaultFunctionOptimizationPasses();
passRunner.runOnFunction(copyFunc);
}
// Return true if we evalled the entire block. Otherwise, even if we evalled
// some of it, the caller must stop trying to eval further things.
if (successes == block->list.size()) {
return EvalCtorOutcome(results);
} else {
return EvalCtorOutcome();
}
}
// Otherwise, we don't recognize a pattern that allows us to do partial
// evalling. So simply call the entire function at once and see if we can
// optimize that.
Literals results;
try {
results = instance.callFunction(funcName, params);
} catch (FailToEvalException& fail) {
std::cout << " ...stopping since could not eval: " << fail.why << "\n";
return EvalCtorOutcome();
}
// Success! Apply the results.
interface.applyToModule();
return EvalCtorOutcome(results);
}
// Eval all ctors in a module.
void evalCtors(Module& wasm,
std::vector<std::string>& ctors,
std::vector<std::string>& keptExports) {
std::unordered_set<std::string> keptExportsSet(keptExports.begin(),
keptExports.end());
std::map<Name, std::shared_ptr<EvallingModuleRunner>> linkedInstances;
// build and link the env module
auto envModule = buildEnvModule(wasm);
CtorEvalExternalInterface envInterface;
auto envInstance =
std::make_shared<EvallingModuleRunner>(*envModule, &envInterface);
linkedInstances[envModule->name] = envInstance;
CtorEvalExternalInterface interface(linkedInstances);
try {
// create an instance for evalling
EvallingModuleRunner instance(wasm, &interface, linkedInstances);
// go one by one, in order, until we fail
// TODO: if we knew priorities, we could reorder?
for (auto& ctor : ctors) {
std::cout << "trying to eval " << ctor << '\n';
Export* ex = wasm.getExportOrNull(ctor);
if (!ex) {
Fatal() << "export not found: " << ctor;
}
auto funcName = ex->value;
auto outcome = evalCtor(instance, interface, funcName, ctor);
if (!outcome) {
std::cout << " ...stopping\n";
return;
}
// Success! And we can continue to try more.
std::cout << " ...success on " << ctor << ".\n";
// Remove the export if we should.
auto* exp = wasm.getExport(ctor);
if (!keptExportsSet.count(ctor)) {
wasm.removeExport(exp->name);
} else {
// We are keeping around the export, which should now refer to an
// empty function since calling the export should do nothing.
auto* func = wasm.getFunction(exp->value);
auto copyName = Names::getValidFunctionName(wasm, func->name);
auto* copyFunc = ModuleUtils::copyFunction(func, wasm, copyName);
if (func->getResults() == Type::none) {
copyFunc->body = Builder(wasm).makeNop();
} else {
copyFunc->body = interface.getSerialization(*outcome);
}
wasm.getExport(exp->name)->value = copyName;
}
}
} catch (FailToEvalException& fail) {
// that's it, we failed to even create the instance
std::cout << " ...stopping since could not create module instance: "
<< fail.why << "\n";
return;
}
}
static bool canEval(Module& wasm) {
// Check if we can flatten memory. We need to do so currently because of how
// we assume memory is simple and flat. TODO
if (!MemoryUtils::flatten(wasm)) {
std::cout << " ...stopping since could not flatten memory\n";
return false;
}
return true;
}
} // anonymous namespace
//
// main
//
int main(int argc, const char* argv[]) {
Name entry;
std::vector<std::string> passes;
bool emitBinary = true;
bool debugInfo = false;
String::Split ctors;
String::Split keptExports;
const std::string WasmCtorEvalOption = "wasm-ctor-eval options";
ToolOptions options("wasm-ctor-eval", "Execute code at compile time");
options
.add("--output",
"-o",
"Output file (stdout if not specified)",
WasmCtorEvalOption,
Options::Arguments::One,
[](Options* o, const std::string& argument) {
o->extra["output"] = argument;
Colors::setEnabled(false);
})
.add("--emit-text",
"-S",
"Emit text instead of binary for the output file",
WasmCtorEvalOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) { emitBinary = false; })
.add("--debuginfo",
"-g",
"Emit names section and debug info",
WasmCtorEvalOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& arguments) { debugInfo = true; })
.add("--ctors",
"-c",
"Comma-separated list of global constructor functions to evaluate",
WasmCtorEvalOption,
Options::Arguments::One,
[&](Options* o, const std::string& argument) {
ctors = String::Split(argument, ",");
})
.add(
"--kept-exports",
"-ke",
"Comma-separated list of ctors whose exports we keep around even if we "
"eval those ctors",
WasmCtorEvalOption,
Options::Arguments::One,
[&](Options* o, const std::string& argument) {
keptExports = String::Split(argument, ",");
})
.add("--ignore-external-input",
"-ipi",
"Assumes no env vars are to be read, stdin is empty, etc.",
WasmCtorEvalOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) {
ignoreExternalInput = true;
})
.add_positional("INFILE",
Options::Arguments::One,
[](Options* o, const std::string& argument) {
o->extra["infile"] = argument;
});
options.parse(argc, argv);
auto input(read_file<std::string>(options.extra["infile"], Flags::Text));
Module wasm;
options.applyFeatures(wasm);
{
if (options.debug) {
std::cout << "reading...\n";
}
ModuleReader reader;
try {
reader.read(options.extra["infile"], wasm);
} catch (ParseException& p) {
p.dump(std::cout);
Fatal() << "error in parsing input";
}
}
if (!WasmValidator().validate(wasm)) {
std::cout << wasm << '\n';
Fatal() << "error in validating input";
}
if (canEval(wasm)) {
evalCtors(wasm, ctors, keptExports);
// Do some useful optimizations after the evalling
{
PassRunner passRunner(&wasm);
passRunner.add("memory-packing"); // we flattened it, so re-optimize
// TODO: just do -Os for the one function
passRunner.add("remove-unused-names");
passRunner.add("dce");
passRunner.add("merge-blocks");
passRunner.add("vacuum");
passRunner.add("remove-unused-module-elements");
passRunner.run();
}
}
if (options.extra.count("output") > 0) {
if (options.debug) {
std::cout << "writing..." << std::endl;
}
ModuleWriter writer;
writer.setBinary(emitBinary);
writer.setDebugInfo(debugInfo);
writer.write(wasm, options.extra["output"]);
}
}