blob: 51cc98547e1d7bdb0b3f243d52bb34f22bbc93fc [file] [log] [blame] [edit]
/*
* Copyright 2021 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Analyzes stores and loads of non-local state, and optimizes them in various
// ways. For example, a store that is never read, because the global state will
// be trampled anyhow, can be removed as dead.
//
// "Store" is used generically here to mean a write to a non-local location,
// which includes:
//
// * Stores to linear memory (Store).
// * Stores to globals (GlobalSet).
// * Stores to GC data (StructSet, ArraySet)
//
// This pass optimizes all of the above. It does so using a generic framework in
// order to share as much code as possible between them. This has downsides for
// globals, in particular, as they could be optimized with an IR that is tailor-
// made for scanning of global indexes (much as we do in our analyses of locals
// in other places). However, global operations are also less common than memory
// and GC operations, so hopefully the tradeoff is reasonable.
//
// The generic framework here can handle both "statically" connected loads and
// stores - for example, a load of a global of index N, after a store to that
// index - and "dynamically" connected loads and stores - for example, a load of
// a GC struct field N from a pointer P, after a store to that same pointer and
// field. "Dynamic" here is used in the sense that we don't have a simple static
// indexing of all the things we care about, sometimes called "lanes" in other
// implementations. Instead we need to care about pointer identity, aliasing,
// etc., which means we need to "dynamically" compare loads and stores and not
// just a "lane" index computed for them. Note that in theory an index could
// still be computed for such things, but probably at the cost of greater
// complexity; if speed becomes an issue, then a refactoring in that direction
// may be necessary. Such a refactoring might have downsides, however: While
// having a single index for each operation is efficient, we would also need a
// a way to represent "wildcards" - things that affect multiple indexes. For
// example, an indirect call must be assumed to affect anything. But more
// complex cases include a GC store of a certain type, which we can infer may
// affect anything with a relevant subtype (but others cannot alias). To handle
// that, we would need to store a set of indexes, already losing much of the
// benefit of the indexed approach. Instead, the current code keeps things very
// simple by just asking "may these two things interact", in which we can just
// check the subtyping, etc.
//
#include <cfg/cfg-traversal.h>
#include <ir/effects.h>
#include <ir/local-graph.h>
#include <ir/properties.h>
#include <ir/replacer.h>
#include <pass.h>
#include <support/unique_deferring_queue.h>
#include <wasm-builder.h>
#include <wasm.h>
namespace wasm {
namespace {
// A variation of LocalGraph that can also compare expressions to check for
// their equivalence. Basic LocalGraph just looks at locals, while this class
// goes further and looks at the structure of the expression, taking into
// account fallthrough values and other factors, in order to handle common
// cases of obviously-identical things. To achieve that, it needs to know the
// pass options and features used, which we avoid adding to the basic
// LocalGraph.
struct ComparingLocalGraph : public LocalGraph {
PassOptions& passOptions;
Module& wasm;
ComparingLocalGraph(Function* func, PassOptions& passOptions, Module& wasm)
: LocalGraph(func), passOptions(passOptions), wasm(wasm) {}
// Check whether the values of two expressions will definitely be equal at
// runtime.
// TODO: move to LocalGraph if we find more users?
bool equalValues(Expression* a, Expression* b) {
a = Properties::getFallthrough(a, passOptions, wasm);
b = Properties::getFallthrough(b, passOptions, wasm);
if (auto* aGet = a->dynCast<LocalGet>()) {
if (auto* bGet = b->dynCast<LocalGet>()) {
if (LocalGraph::equivalent(aGet, bGet)) {
return true;
}
}
}
if (auto* aConst = a->dynCast<Const>()) {
if (auto* bConst = b->dynCast<Const>()) {
return aConst->value == bConst->value;
}
}
return false;
}
};
// Parent class of all implementations of the logic of identifying stores etc.
// One implementation of Logic can handle globals, another memory, and another
// GC, etc., implementing the various hooks appropriately.
struct Logic {
Function* func;
Logic(Function* func, PassOptions& passOptions, Module& wasm) : func(func) {}
//============================================================================
// Hooks to identify relevant things to include in the analysis.
//============================================================================
// Returns whether an expression is a store.
//
// The main code will automatically ignore unreachable (irrelevant) stores.
bool isStore(Expression* curr) { WASM_UNREACHABLE("unimp"); };
// Returns whether an expression is a load.
//
// The main code will automatically ignore unreachable (irrelevant) loads.
bool isLoad(Expression* curr) { WASM_UNREACHABLE("unimp"); };
// Returns whether the expression is a barrier to our analysis: something that
// we should stop when we see it, because it could do things that we cannot
// analyze. A barrier will definitely pose a problem for us, as opposed to
// something mayInteract() returns true for - we will check for interactions
// later for mayInteract()s, but with barriers we don't need to.
//
// The default behavior here considers all calls to be barriers. Subclasses
// can use whole-program information to do better.
bool isBarrier(Expression* curr, const ShallowEffectAnalyzer& currEffects) {
// TODO: ignore throws of an exception that is definitely caught in this
// function
// TODO: if we add an "ignore after trap mode" (to assume nothing happens
// after a trap) then we could stop assuming any trap can lead to
// access of global data, likely greatly reducing the number of
// barriers.
return currEffects.calls || currEffects.throws() || currEffects.trap ||
currEffects.branchesOut;
};
// Returns whether an expression may interact with loads and stores in
// interesting ways. This is only called if isStore(), isLoad(), and
// isBarrier() all return false; that is, if we cannot identify the expression
// as one of those simple categories, this allows us to still care about it in
// our analysis.
bool mayInteract(Expression* curr, const ShallowEffectAnalyzer& currEffects) {
WASM_UNREACHABLE("unimp");
}
//============================================================================
// Hooks that run during the analysis
//============================================================================
// Returns whether an expression is a load that corresponds to a store, that
// is, that loads the exact data that the store writes.
bool isLoadFrom(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store) {
WASM_UNREACHABLE("unimp");
};
// Returns whether an expression tramples a store completely, overwriting all
// the store's written data.
//
// This is only called if isLoadFrom() returns false, as we assume there is no
// single instruction of interest to us that can do both.
bool isTrample(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store) {
WASM_UNREACHABLE("unimp");
};
// Returns whether an expression may interact with another in a way that we
// cannot fully analyze, and so we must give up and assume the very worst.
// This is only called if isLoadFrom() and isTrample() both return false.
//
// This is similar to mayInteract(), but considers a specific interaction with
// another particular expression. mayInteract() leads to it being included in
// the analysis, during which mayInteractWith() will be called.
bool mayInteractWith(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store) {
WASM_UNREACHABLE("unimp");
};
//============================================================================
// Hooks used when applying optimizations after the analysis.
//============================================================================
// Given a store that is not needed, get drops of its children to replace it
// with. This effectively removes the store without removing its children.
Expression* replaceStoreWithDrops(Expression* store, Builder& builder) {
WASM_UNREACHABLE("unimp");
};
};
// Represent all barriers in a simple way.
static Nop nop;
static Expression* const barrier = &nop;
// Information in a basic block in the main analysis. All we use is a simple
// list of relevant expressions (stores, loads, and things that interact with
// them).
struct BasicBlockInfo {
std::vector<Expression*> exprs;
};
// Core code to generate the relevant CFG, analyze it, and optimize it.
//
// This is as generic as possible over what a "store" actually is; all the
// specific logic of handling globals vs memory vs the GC heap is all left to a
// to a LogicType that this is templated on, which subclasses from Logic.
template<typename LogicType>
struct DeadStoreCFG
: public CFGWalker<DeadStoreCFG<LogicType>,
UnifiedExpressionVisitor<DeadStoreCFG<LogicType>>,
BasicBlockInfo> {
Function* func;
PassOptions& passOptions;
LogicType logic;
DeadStoreCFG(Module& wasm, Function* func, PassOptions& passOptions)
: func(func), passOptions(passOptions), logic(func, passOptions, wasm) {
this->setModule(&wasm);
}
~DeadStoreCFG() {}
void visitExpression(Expression* curr) {
if (!this->currBasicBlock) {
return;
}
ShallowEffectAnalyzer currEffects(passOptions, *this->getModule(), curr);
auto& exprs = this->currBasicBlock->contents.exprs;
// Add all relevant things to the list of exprs for the current basic block.
if (isStore(curr) || isLoad(curr)) {
exprs.push_back(curr);
} else if (logic.isBarrier(curr, currEffects)) {
// Barriers can be very common, so as a minor optimization avoid having
// consecutive ones; a single barrier will stop us.
if (exprs.empty() || exprs.back() != barrier) {
exprs.push_back(barrier);
}
} else if (logic.mayInteract(curr, currEffects)) {
exprs.push_back(curr);
}
}
// Filter out unreachable (irrelevant) loads and stores.
bool isStore(Expression* curr) {
return curr->type != Type::unreachable && logic.isStore(curr);
}
bool isLoad(Expression* curr) {
return curr->type != Type::unreachable && logic.isLoad(curr);
}
// All the stores we can optimize, that is, stores that write to a non-local
// place from which we have a full understanding of all the loads. This data
// structure maps such an understood store to the list of loads for it. In
// particular, if that list is empty then the store is dead (since we have
// a full understanding of all the loads, and there are none), and if the list
// is non-empty then only those loads read that store's value, and nothing
// else.
std::unordered_map<Expression*, std::vector<Expression*>> understoodStores;
using Self = DeadStoreCFG<LogicType>;
using BasicBlock = typename CFGWalker<Self,
UnifiedExpressionVisitor<Self>,
BasicBlockInfo>::BasicBlock;
void analyze() {
// create the CFG by walking the IR
this->walkFunction(func);
// Flow the values and conduct the analysis. This finds each relevant store
// and then flows from it to all possible uses through the CFG.
//
// TODO: Optimize. This is a pretty naive way to flow the values, but it
// should be reasonable assuming most stores are quickly seen as
// having possible interactions (e.g., when we encounter a barrier),
// and so most flows are halted very quickly.
for (auto& block : this->basicBlocks) {
for (size_t i = 0; i < block->contents.exprs.size(); i++) {
auto* store = block->contents.exprs[i];
if (!isStore(store)) {
continue;
}
// The store is assumed to be understood (and hence present on the map)
// until we see a problem.
understoodStores[store];
// Flow this store forward through basic blocks, looking for
// interactions.
UniqueNonrepeatingDeferredQueue<BasicBlock*> work;
// When we find something we cannot optimize, stop flowing and mark the
// store as unoptimizable.
auto halt = [&]() {
work.clear();
understoodStores.erase(store);
};
// Scan through a block, starting from a certain position, looking for
// interactions.
auto scanBlock = [&](BasicBlock* block, size_t from) {
for (size_t i = from; i < block->contents.exprs.size(); i++) {
auto* curr = block->contents.exprs[i];
if (curr == barrier) {
halt();
return;
}
ShallowEffectAnalyzer currEffects(
passOptions, *this->getModule(), curr);
if (logic.isLoadFrom(curr, currEffects, store)) {
// We found a definite load of this store, note it.
understoodStores[store].push_back(curr);
} else if (logic.isTrample(curr, currEffects, store)) {
// We do not need to look any further along this block, or in
// anything it can reach, as this store has been trampled.
return;
} else if (logic.mayInteractWith(curr, currEffects, store)) {
// Stop: we cannot fully analyze the uses of this store.
halt();
return;
}
}
// We reached the end of the block, prepare to flow onward.
for (auto* out : block->out) {
work.push(out);
}
if (block == this->exit) {
// Any value flowing out can be reached by global code outside the
// function after we leave.
halt();
}
};
// Start the flow in the current location in the block, right after the
// store itself.
scanBlock(block.get(), i + 1);
// Next, continue flowing through other blocks.
while (!work.empty()) {
auto* curr = work.pop();
scanBlock(curr, 0);
}
}
}
}
// Optimizes the function, and returns whether we made any changes.
bool optimize() {
analyze();
Builder builder(*this->getModule());
ExpressionReplacer replacer;
// Optimize the stores that have no unknown interactions.
for (auto& kv : understoodStores) {
auto* store = kv.first;
const auto& loads = kv.second;
if (loads.empty()) {
// This store has no loads, which means it is trampled by other stores
// before it is read, and so it can just be dropped.
//
// Note that this is valid even if we care about implicit traps, such as
// a trap from a store that is out of bounds. We are removing one store,
// but it was trampled later, which means that a trap will still occur
// at that time, if the store is out of bounds; furthermore, we do not
// delay the trap in a noticeable way since if the path between the
// stores crosses anything that affects global state then we would not
// have considered the store to be trampled (it could have been
// interacted with, which would have stopped the analysis).
replacer.replacements[store] =
logic.replaceStoreWithDrops(store, builder);
}
// TODO: When there are loads, we can replace the loads as well (by saving
// the value to a local for that global, etc.).
// Note that we may need to leave the loads if they have side
// effects, like a possible trap on memory loads, but they can be
// left as dropped, the same as with store inputs.
}
if (replacer.replacements.empty()) {
return false;
}
replacer.walk(this->func->body);
return true;
}
};
// A logic that uses a local graph, as it needs to compare pointers.
// TODO: run the LocalGraph only on relevant locals (only i32s can be pointers
// for loads and stores; only references can be pointers for GC)
struct ComparingLogic : public Logic {
ComparingLocalGraph localGraph;
ComparingLogic(Function* func, PassOptions& passOptions, Module& wasm)
: Logic(func, passOptions, wasm), localGraph(func, passOptions, wasm) {}
};
// Optimize module globals: GlobalSet/GlobalGet.
struct GlobalLogic : public Logic {
GlobalLogic(Function* func, PassOptions& passOptions, Module& wasm)
: Logic(func, passOptions, wasm) {}
bool isStore(Expression* curr) { return curr->is<GlobalSet>(); }
bool isLoad(Expression* curr) { return curr->is<GlobalGet>(); }
bool mayInteract(Expression* curr, const ShallowEffectAnalyzer& currEffects) {
// Globals are easy to statically analyze: there are no interactions we
// cannot be sure about.
return false;
}
bool isLoadFrom(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store_) {
if (auto* load = curr->dynCast<GlobalGet>()) {
auto* store = store_->cast<GlobalSet>();
return load->name == store->name;
}
return false;
}
bool isTrample(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store_) {
if (auto* otherStore = curr->dynCast<GlobalSet>()) {
auto* store = store_->cast<GlobalSet>();
return otherStore->name == store->name;
}
return false;
}
bool mayInteractWith(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store) {
return false;
}
Expression* replaceStoreWithDrops(Expression* store, Builder& builder) {
return builder.makeDrop(store->cast<GlobalSet>()->value);
}
};
// Optimize memory stores/loads.
struct MemoryLogic : public ComparingLogic {
MemoryLogic(Function* func, PassOptions& passOptions, Module& wasm)
: ComparingLogic(func, passOptions, wasm) {}
bool isStore(Expression* curr) { return curr->is<Store>(); }
bool isLoad(Expression* curr) { return curr->is<Load>(); }
bool mayInteract(Expression* curr, const ShallowEffectAnalyzer& currEffects) {
return currEffects.readsMemory || currEffects.writesMemory;
}
bool isLoadFrom(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store_) {
if (curr->type == Type::unreachable) {
return false;
}
if (auto* load = curr->dynCast<Load>()) {
auto* store = store_->cast<Store>();
// Atomic stores are dangerous, since they have additional trapping
// behavior - they trap on unaligned addresses. For simplicity, only
// consider the case where atomicity is identical.
// TODO: use ignoreImplicitTraps
if (store->isAtomic != load->isAtomic) {
return false;
}
// TODO: For now, only handle the obvious case where the operations are
// identical in size and offset.
return load->bytes == store->bytes &&
load->bytes == load->type.getByteSize() &&
load->offset == store->offset &&
localGraph.equalValues(load->ptr, store->ptr);
}
return false;
}
bool isTrample(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store_) {
if (auto* otherStore = curr->dynCast<Store>()) {
auto* store = store_->cast<Store>();
// As in isLoadFrom, atomic stores are dangerous.
if (store->isAtomic != otherStore->isAtomic) {
return false;
}
// TODO: Compare in detail. For now, handle the obvious case where the
// stores are identical in size, offset, etc., so that identical
// repeat stores are handled. (An example of a case we do not handle
// yet is a store of 1 byte that is trampled by a store of 2 bytes.)
return otherStore->bytes == store->bytes &&
otherStore->offset == store->offset &&
localGraph.equalValues(otherStore->ptr, store->ptr);
}
return false;
}
bool mayInteractWith(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store) {
// Anything we did not identify so far is dangerous.
//
// Among other things, this includes compare-and-swap, which does both a
// read and a write, which our infrastructure is not build to optimize.
return currEffects.readsMemory || currEffects.writesMemory;
}
Expression* replaceStoreWithDrops(Expression* store, Builder& builder) {
auto* castStore = store->cast<Store>();
return builder.makeSequence(builder.makeDrop(castStore->ptr),
builder.makeDrop(castStore->value));
}
};
// Optimize GC data: StructGet/StructSet.
// TODO: Arrays.
struct GCLogic : public ComparingLogic {
GCLogic(Function* func, PassOptions& passOptions, Module& wasm)
: ComparingLogic(func, passOptions, wasm) {}
bool isStore(Expression* curr) { return curr->is<StructSet>(); }
bool isLoad(Expression* curr) { return curr->is<StructGet>(); }
bool mayInteract(Expression* curr, const ShallowEffectAnalyzer& currEffects) {
return currEffects.readsMutableStruct || currEffects.writesStruct;
}
bool isLoadFrom(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store_) {
if (auto* load = curr->dynCast<StructGet>()) {
auto* store = store_->cast<StructSet>();
// Note that we do not need to check the type: we check that the
// reference is identical, and if it is then the types must be compatible
// in addition to them pointing to the same memory.
return localGraph.equalValues(load->ref, store->ref) &&
load->index == store->index;
}
return false;
}
bool isTrample(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store_) {
if (auto* otherStore = curr->dynCast<StructSet>()) {
auto* store = store_->cast<StructSet>();
// See note in isLoadFrom about typing.
return localGraph.equalValues(otherStore->ref, store->ref) &&
otherStore->index == store->index;
}
return false;
}
// Check whether two GC operations may alias memory.
template<typename U, typename V> bool mayAlias(U* u, V* v) {
// If one of the inputs is unreachable, it does not execute, and so there
// cannot be aliasing.
auto uType = u->ref->type;
auto vType = v->ref->type;
if (uType == Type::unreachable || vType == Type::unreachable) {
return false;
}
// If the index does not match, no aliasing is possible.
if (u->index != v->index) {
return false;
}
// Even if the index is identical, aliasing still may be impossible. For
// aliasing to occur, the same data must be pointed to by both references,
// which means the actual data is a subtype of both the present types. For
// that to be possible, one of the present heap types must be a subtype of
// the other (note that we check heap types, in order to ignore
// nullability).
auto uHeapType = uType.getHeapType();
auto vHeapType = vType.getHeapType();
return HeapType::isSubType(uHeapType, vHeapType) ||
HeapType::isSubType(vHeapType, uHeapType);
}
bool mayInteractWith(Expression* curr,
const ShallowEffectAnalyzer& currEffects,
Expression* store_) {
auto* store = store_->cast<StructSet>();
// We already checked isLoadFrom and isTrample and it was neither of those,
// so just check if the memory can possibly alias.
if (auto* otherStore = curr->dynCast<StructSet>()) {
return mayAlias(otherStore, store);
}
if (auto* load = curr->dynCast<StructGet>()) {
return mayAlias(load, store);
}
// This is not a load or a store that we recognize; check for generic heap
// interactions.
return currEffects.readsMutableStruct || currEffects.writesStruct;
}
Expression* replaceStoreWithDrops(Expression* store, Builder& builder) {
auto* castStore = store->cast<StructSet>();
return builder.makeSequence(builder.makeDrop(castStore->ref),
builder.makeDrop(castStore->value));
}
};
// Perform dead store elimination 100% locally, that is, without any whole-
// program analysis. This is not very powerful, but can catch simple patterns of
// obviously dead stores, and is useful for testing.
//
// This does all the optimizations (globals, memory, GC) in sequence on each
// function, which is good for cache locality. That is the reason there are not
// separate passes for each one.
// TODO: the optimizations can perhaps share more things between them
struct LocalDeadStoreElimination
: public WalkerPass<PostWalker<LocalDeadStoreElimination>> {
bool isFunctionParallel() { return true; }
Pass* create() { return new LocalDeadStoreElimination; }
void doWalkFunction(Function* func) {
// Optimize globals.
DeadStoreCFG<GlobalLogic>(*getModule(), func, getPassOptions()).optimize();
// Optimize memory.
DeadStoreCFG<MemoryLogic>(*getModule(), func, getPassOptions()).optimize();
// Optimize GC heap.
if (getModule()->features.hasGC()) {
DeadStoreCFG<GCLogic>(*getModule(), func, getPassOptions()).optimize();
}
}
};
} // anonymous namespace
Pass* createLocalDeadStoreEliminationPass() {
return new LocalDeadStoreElimination();
}
} // namespace wasm