llvm/Analysis/ReleaseModeModelRunner.h

0001 //===- ReleaseModeModelRunner.h - Fast, precompiled model runner  ---------===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file implements a model runner wrapping an AOT compiled ML model.
0010 // Only inference is supported.
0011 //
0012 //===----------------------------------------------------------------------===//
0013
0014 #ifndef LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
0015 #define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
0016
0017 #include "llvm/ADT/StringExtras.h"
0018 #include "llvm/Analysis/MLModelRunner.h"
0019 #include "llvm/Analysis/TensorSpec.h"
0020 #include "llvm/Support/ErrorHandling.h"
0021 #include "llvm/Support/MD5.h"
0022
0023 #include <memory>
0024
0025 namespace llvm {
0026
0027 /// ReleaseModeModelRunner - production mode implementation of the
0028 /// MLModelRunner. It uses an AOT-compiled SavedModel for efficient execution.
0029 struct EmbeddedModelRunnerOptions {
0030   /// Feed and Fetch feature prefixes - i.e. a feature named "foo" will be
0031   /// looked up as {FeedPrefix}_foo; and the output named "bar" will be looked
0032   /// up as {FetchPrefix}_bar
0033   StringRef FeedPrefix = "feed_";
0034   StringRef FetchPrefix = "fetch_";
0035
0036   /// ModelSelector is the name (recognized by the AOT-ed model) of a sub-model
0037   /// to use. "" is allowed if the model doesn't support sub-models.
0038   StringRef ModelSelector = "";
0039
0040   EmbeddedModelRunnerOptions &setFeedPrefix(StringRef Value) {
0041     FeedPrefix = Value;
0042     return *this;
0043   }
0044   EmbeddedModelRunnerOptions &setFetchPrefix(StringRef Value) {
0045     FetchPrefix = Value;
0046     return *this;
0047   }
0048   EmbeddedModelRunnerOptions &setModelSelector(StringRef Value) {
0049     ModelSelector = Value;
0050     return *this;
0051   }
0052 };
0053
0054 template <class TGen>
0055 class ReleaseModeModelRunner final : public MLModelRunner {
0056 public:
0057   /// FeatureNames' type should be an indexed collection of std::string, like
0058   /// std::array or std::vector, that has a size() method.
0059   template <class FType>
0060   ReleaseModeModelRunner(LLVMContext &Ctx, const FType &InputSpec,
0061                          StringRef DecisionName,
0062                          const EmbeddedModelRunnerOptions &Options = {})
0063       : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size() + 1),
0064         CompiledModel(std::make_unique<TGen>()) {
0065     assert(CompiledModel && "The CompiledModel should be valid");
0066     // Set up the model_selector past all the InputSpecs in all cases.
0067     //   - if the model doesn't have such a feature, but the user requested it,
0068     //   we report error. Same if the model supports it but the user didn't
0069     //   specify it
0070     //   - finally, we compute the MD5 hash of the user input and set the value
0071     //   of the model selector to {high, low}
0072     bool InputIsPresent = true;
0073     populateTensor(InputSpec.size(),
0074                    TensorSpec::createSpec<uint64_t>("model_selector", {2}),
0075                    Options.FeedPrefix, InputIsPresent);
0076
0077     // If we hit the "report an error" cases outlined above, continue with the
0078     // set up in case there's some custom diagnostics handler installed and it
0079     // doesn't promptly exit.
0080     if (Options.ModelSelector.empty() && InputIsPresent)
0081       Ctx.emitError(
0082           "A model selector was not specified but the underlying model "
0083           "requires selecting one because it exposes a model_selector input");
0084     uint64_t High = 0;
0085     uint64_t Low = 0;
0086     if (!Options.ModelSelector.empty()) {
0087       if (!InputIsPresent)
0088         Ctx.emitError("A model selector was specified but the underlying model "
0089                       "does not expose a model_selector input");
0090       const auto Hash = MD5::hash(arrayRefFromStringRef(Options.ModelSelector));
0091       High = Hash.high();
0092       Low = Hash.low();
0093     }
0094     getTensor<uint64_t>(InputSpec.size())[0] = High;
0095     getTensor<uint64_t>(InputSpec.size())[1] = Low;
0096     // At this point, the model selector is set up. If the user didn't provide
0097     // one, but the model has a model_selector, it'll be set to (0, 0) which
0098     // the composite model should treat as error as part of its implementation
0099     // (but that should only matter if there is a custom handler that doesn't
0100     // exit on error)
0101     for (size_t I = 0; I < InputSpec.size(); ++I)
0102       populateTensor(I, InputSpec[I], Options.FeedPrefix, InputIsPresent);
0103
0104     ResultIndex = CompiledModel->LookupResultIndex(Options.FetchPrefix.str() +
0105                                                    DecisionName.str());
0106     assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model");
0107   }
0108
0109   virtual ~ReleaseModeModelRunner() = default;
0110
0111   static bool classof(const MLModelRunner *R) {
0112     return R->getKind() == MLModelRunner::Kind::Release;
0113   }
0114
0115 private:
0116   // fetch the model-provided buffer for the given Spec, or let MLModelRunner
0117   // create a scratch buffer. Indicate back to the caller if the model had that
0118   // input in the first place.
0119   void populateTensor(size_t Pos, const TensorSpec &Spec, StringRef Prefix,
0120                       bool &InputIsPresent) {
0121     const int Index =
0122         CompiledModel->LookupArgIndex((Prefix + Spec.name()).str());
0123     void *Buffer = nullptr;
0124     InputIsPresent = Index >= 0;
0125     if (InputIsPresent)
0126       Buffer = CompiledModel->arg_data(Index);
0127     setUpBufferForTensor(Pos, Spec, Buffer);
0128   }
0129
0130   void *evaluateUntyped() override {
0131     CompiledModel->Run();
0132     return CompiledModel->result_data(ResultIndex);
0133   }
0134
0135   int32_t ResultIndex = -1;
0136   std::unique_ptr<TGen> CompiledModel;
0137 };
0138
0139 /// A mock class satisfying the interface expected by ReleaseModeModelRunner for
0140 /// its `TGen` parameter. Useful to avoid conditional compilation complexity, as
0141 /// a compile-time replacement for a real AOT-ed model.
0142 class NoopSavedModelImpl final {
0143 #define NOOP_MODEL_ERRMSG                                                      \
0144   "The mock AOT-ed saved model is a compile-time stub and should not be "      \
0145   "called."
0146
0147 public:
0148   NoopSavedModelImpl() = default;
0149   int LookupArgIndex(const std::string &) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
0150   int LookupResultIndex(const std::string &) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
0151   void Run() { llvm_unreachable(NOOP_MODEL_ERRMSG); }
0152   void *result_data(int) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
0153   void *arg_data(int) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
0154 #undef NOOP_MODEL_ERRMSG
0155 };
0156
0157 template <class T> bool isEmbeddedModelEvaluatorValid() { return true; }
0158
0159 template <> inline bool isEmbeddedModelEvaluatorValid<NoopSavedModelImpl>() {
0160   return false;
0161 }
0162 } // namespace llvm
0163
0164 #endif // LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H