| /* |
| * Copyright 2022 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // When we see a call foo(arg1, arg2) and at least one of the arguments has a |
| // more refined type than is declared in the function being called, create a |
| // copy of the function with the refined type. That copy can then potentially be |
| // optimized in useful ways later. |
| // |
| // Inlining also monomorphizes in effect. What this pass does is handle the |
| // cases where inlining cannot be done. |
| // |
| // To see when monomorphizing makes sense, this optimizes the target function |
| // both with and without the more refined types. If the refined types help then |
| // the version with might remove a cast, for example. Note that while doing so |
| // we keep the optimization results of the version without - there is no reason |
| // to forget them since we've gone to the trouble anyhow. So this pass may have |
| // the side effect of performing minor optimizations on functions. There is also |
| // a variant of the pass that always monomorphizes, even when it does not seem |
| // helpful, which is useful for testing, and possibly in cases where we need |
| // more than just local optimizations to see the benefit - for example, perhaps |
| // GUFA ends up more powerful later on. |
| // |
| // TODO: When we optimize we could run multiple cycles: A calls B calls C might |
| // end up with the refined+optimized B now having refined types in its |
| // call to C, which it did not have before. This is in fact the expected |
| // pattern of incremental monomorphization. Doing it in the pass could be |
| // more efficient as later cycles can focus only on what was just |
| // optimized and changed. Also, operating on functions just modified would |
| // help the case of A calls B and we end up optimizing A after we consider |
| // A->B, and the optimized version sends more refined types to B, which |
| // could unlock more potential. |
| // TODO: We could sort the functions so that we start from root functions and |
| // end on leaves. That would make it more likely for a single iteration to |
| // do more work, as if A->B->C then we'd do A->B and optimize B and only |
| // then look at B->C. |
| // TODO: Also run the result-refining part of SignatureRefining, as if we |
| // refine the result then callers of the function may benefit, even if |
| // there is no benefit in the function itself. |
| // TODO: If this is too slow, we could "group" things, for example we could |
| // compute the LUB of a bunch of calls to a target and then investigate |
| // that one case and use it in all those callers. |
| // TODO: Not just direct calls? But updating vtables is complex. |
| // TODO: Not just types? We could monomorphize using Literal values. E.g. for |
| // function references, if we monomorphized we'd end up specializing qsort |
| // for the particular functions it is given. |
| // |
| |
| #include "ir/cost.h" |
| #include "ir/find_all.h" |
| #include "ir/module-utils.h" |
| #include "ir/names.h" |
| #include "ir/type-updating.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "wasm-type.h" |
| #include "wasm.h" |
| |
| namespace wasm { |
| |
| namespace { |
| |
| struct Monomorphize : public Pass { |
| // If set, we run some opts to see if monomorphization helps, and skip it if |
| // not. |
| bool onlyWhenHelpful; |
| |
| Monomorphize(bool onlyWhenHelpful) : onlyWhenHelpful(onlyWhenHelpful) {} |
| |
| void run(Module* module) override { |
| if (!module->features.hasGC()) { |
| return; |
| } |
| |
| // TODO: parallelize, see comments below |
| |
| // Note the list of all functions. We'll be adding more, and do not want to |
| // operate on those. |
| std::vector<Name> funcNames; |
| ModuleUtils::iterDefinedFunctions( |
| *module, [&](Function* func) { funcNames.push_back(func->name); }); |
| |
| // Find the calls in each function and optimize where we can, changing them |
| // to call more refined targets. |
| for (auto name : funcNames) { |
| auto* func = module->getFunction(name); |
| for (auto* call : FindAll<Call>(func->body).list) { |
| if (call->type == Type::unreachable) { |
| // Ignore unreachable code. |
| // TODO: return_call? |
| continue; |
| } |
| |
| if (call->target == name) { |
| // Avoid recursion, which adds some complexity (as we'd be modifying |
| // ourselves if we apply optimizations). |
| continue; |
| } |
| |
| call->target = getRefinedTarget(call, module); |
| } |
| } |
| } |
| |
| // Given a call, make a copy of the function it is calling that has more |
| // refined arguments that fit the arguments being passed perfectly. |
| Name getRefinedTarget(Call* call, Module* module) { |
| auto target = call->target; |
| auto* func = module->getFunction(target); |
| if (func->imported()) { |
| // Nothing to do since this calls outside of the module. |
| return target; |
| } |
| auto params = func->getParams(); |
| bool hasRefinedParam = false; |
| for (Index i = 0; i < call->operands.size(); i++) { |
| if (call->operands[i]->type != params[i]) { |
| hasRefinedParam = true; |
| break; |
| } |
| } |
| if (!hasRefinedParam) { |
| // Nothing to do since all params are fully refined already. |
| return target; |
| } |
| |
| std::vector<Type> refinedTypes; |
| for (auto* operand : call->operands) { |
| refinedTypes.push_back(operand->type); |
| } |
| auto refinedParams = Type(refinedTypes); |
| auto iter = funcParamMap.find({target, refinedParams}); |
| if (iter != funcParamMap.end()) { |
| return iter->second; |
| } |
| |
| // This is the first time we see this situation. Let's see if it is worth |
| // monomorphizing. |
| |
| // Create a new function with refined parameters as a copy of the original. |
| // (Note we must clear stack IR on the original: atm we do not have the |
| // ability to copy stack IR, so we'd hit an internal error. But as we will |
| // be optimizing the function anyhow, we'd be throwing away stack IR later |
| // so this isn't a problem.) |
| func->stackIR.reset(); |
| auto refinedTarget = Names::getValidFunctionName(*module, target); |
| auto* refinedFunc = ModuleUtils::copyFunction(func, *module, refinedTarget); |
| TypeUpdating::updateParamTypes(refinedFunc, refinedTypes, *module); |
| refinedFunc->type = HeapType(Signature(refinedParams, func->getResults())); |
| |
| // Assume we'll choose to use the refined target, but if we are being |
| // careful then we might change our mind. |
| auto chosenTarget = refinedTarget; |
| if (onlyWhenHelpful) { |
| // Optimize both functions using minimal opts, hopefully enough to see if |
| // there is a benefit to the refined types (such as the new types allowing |
| // a cast to be removed). |
| // TODO: Atm this can be done many times per function as it is once per |
| // function and per set of types sent to it. Perhaps have some |
| // total limit to avoid slow runtimes. |
| // TODO: We can end up optimizing |func| more than once. It may be |
| // different each time if the previous optimization helped, but |
| // often it will be identical. We could save the original version |
| // and use that as the starting point here (and cache the optimized |
| // version), but then we'd be throwing away optimization results. Or |
| // we could see if later optimizations do not further decrease the |
| // cost, and if so, use a cached value for the cost on such |
| // "already maximally optimized" functions. The former approach is |
| // more amenable to parallelization, as it avoids path dependence - |
| // the other approaches are deterministic but they depend on the |
| // order in which we see things. But it does require saving a copy |
| // of the function, which uses memory, which is avoided if we just |
| // keep optimizing from the current contents as we go. It's not |
| // obvious which approach is best here. |
| doMinimalOpts(func); |
| doMinimalOpts(refinedFunc); |
| |
| auto costBefore = CostAnalyzer(func->body).cost; |
| auto costAfter = CostAnalyzer(refinedFunc->body).cost; |
| if (costAfter >= costBefore) { |
| // We failed to improve. Remove the new function and return the old |
| // target. |
| module->removeFunction(refinedTarget); |
| chosenTarget = target; |
| } |
| } |
| |
| // Mark the chosen target in the map, so we don't do this work again: every |
| // pair of target and refinedParams is only considered once. |
| funcParamMap[{target, refinedParams}] = chosenTarget; |
| |
| return chosenTarget; |
| } |
| |
| // Run minimal function-level optimizations on a function. This optimizes at |
| // -O1 which is very fast and runs in linear time basically, and so it should |
| // be reasonable to run as part of this pass: -O1 is several times faster than |
| // a full -O2, in particular, and so if we run this as part of -O2 we should |
| // not be making it much slower overall. |
| // TODO: Perhaps we don't need all of -O1, and can focus on specific things we |
| // expect to help. That would be faster, but we'd always run the risk of |
| // missing things, especially as new passes are added later and we don't |
| // think to add them here. |
| // Alternatively, perhaps we should have a mode that does use -O1 or |
| // even -O2 or above, as in theory any optimization could end up |
| // mattering a lot here. |
| void doMinimalOpts(Function* func) { |
| PassRunner runner(getPassRunner()); |
| runner.options.optimizeLevel = 1; |
| // Local subtyping is not run in -O1, but we really do want it here since |
| // the entire point is that parameters now have more refined types, which |
| // can lead to locals reading them being refinable as well. |
| runner.add("local-subtyping"); |
| runner.addDefaultFunctionOptimizationPasses(); |
| runner.setIsNested(true); |
| runner.runOnFunction(func); |
| } |
| |
| // Maps [func name, param types] to the name of a new function whose params |
| // have those types. |
| // |
| // Note that this can contain funcParamMap{A, types} = A, that is, that maps |
| // a function name to itself. That indicates we found no benefit from |
| // refining with those particular types, and saves us from computing it again |
| // later on. |
| std::unordered_map<std::pair<Name, Type>, Name> funcParamMap; |
| }; |
| |
| } // anonymous namespace |
| |
| Pass* createMonomorphizePass() { return new Monomorphize(true); } |
| |
| Pass* createMonomorphizeAlwaysPass() { return new Monomorphize(false); } |
| |
| } // namespace wasm |