model_variant_handle.h Source File

model_variant_handle.h Source File#

SDK qb Runtime Library: model_variant_handle.h Source File
SDK qb Runtime Library v1.0
MCS001-
model_variant_handle.h
Go to the documentation of this file.
1
4
5#ifndef QBRUNTIME_MODEL_VARIANT_HANDLE_H_
6#define QBRUNTIME_MODEL_VARIANT_HANDLE_H_
7
8#include <stdint.h>
9
10#include <vector>
11
12#include "qbruntime/export.h"
14#include "qbruntime/type.h"
15
16namespace mobilint {
17
22
23class ModelImpl;
24
34class QBRUNTIME_EXPORT ModelVariantHandle {
35public:
36 ModelVariantHandle(const ModelVariantHandle& other) = delete;
37 ModelVariantHandle(ModelVariantHandle&& other) = delete;
38 ModelVariantHandle& operator=(const ModelVariantHandle& rhs) = delete;
39 ModelVariantHandle& operator=(ModelVariantHandle&& rhs) noexcept = delete;
40 ~ModelVariantHandle();
41
47 int getVariantIdx() const;
48
54 const std::vector<std::vector<int64_t>>& getModelInputShape() const;
55
61 const std::vector<std::vector<int64_t>>& getModelOutputShape() const;
62
68 const std::vector<BufferInfo>& getInputBufferInfo() const;
69
75 const std::vector<BufferInfo>& getOutputBufferInfo() const;
76
82 std::vector<Scale> getInputScale() const;
83
89 std::vector<Scale> getOutputScale() const;
90
113
114 // Acquire buffer
115 std::vector<Buffer> acquireInputBuffer(
116 const std::vector<std::vector<int>>& seqlens = {}) const;
117 std::vector<Buffer> acquireOutputBuffer(
118 const std::vector<std::vector<int>>& seqlens = {}) const;
119 std::vector<std::vector<Buffer>> acquireInputBuffers(
120 int batch_size, const std::vector<std::vector<int>>& seqlens = {}) const;
121 std::vector<std::vector<Buffer>> acquireOutputBuffers(
122 int batch_size, const std::vector<std::vector<int>>& seqlens = {}) const;
123
124 // Deallocate acquired Input/Output buffer
125 StatusCode releaseBuffer(std::vector<Buffer>& buffer) const;
126 StatusCode releaseBuffers(std::vector<std::vector<Buffer>>& buffers) const;
127
128 // Reposition single batch
129 StatusCode repositionInputs(const std::vector<float*>& input,
130 std::vector<Buffer>& input_buf,
131 const std::vector<std::vector<int>>& seqlens = {}) const;
132 StatusCode repositionOutputs(const std::vector<Buffer>& output_buf,
133 std::vector<float*>& output,
134 const std::vector<std::vector<int>>& seqlens = {}) const;
135 StatusCode repositionOutputs(const std::vector<Buffer>& output_buf,
136 std::vector<std::vector<float>>& output,
137 const std::vector<std::vector<int>>& seqlens = {}) const;
138 StatusCode repositionInputs(const std::vector<uint8_t*>& input,
139 std::vector<Buffer>& input_buf,
140 const std::vector<std::vector<int>>& seqlens = {}) const;
141
142 // Reposition multiple batches
143 StatusCode repositionInputs(const std::vector<float*>& input,
144 std::vector<std::vector<Buffer>>& input_buf,
145 const std::vector<std::vector<int>>& seqlens = {}) const;
146 StatusCode repositionOutputs(const std::vector<std::vector<Buffer>>& output_buf,
147 std::vector<float*>& output,
148 const std::vector<std::vector<int>>& seqlens = {}) const;
149 StatusCode repositionOutputs(const std::vector<std::vector<Buffer>>& output_buf,
150 std::vector<std::vector<float>>& output,
151 const std::vector<std::vector<int>>& seqlens = {}) const;
152 StatusCode repositionInputs(const std::vector<uint8_t*>& input,
153 std::vector<std::vector<Buffer>>& input_buf,
154 const std::vector<std::vector<int>>& seqlens = {}) const;
156
157private:
158 ModelVariantHandle(int variant_idx, const ModelImpl& model_impl);
159
160 const int mIdx;
161 const ModelImpl& mModelImpl;
162
163 friend class ModelImpl;
164};
165
166} // namespace mobilint
167
168#endif // QBRUNTIME_MODEL_VARIANT_HANDLE_H_
const std::vector< std::vector< int64_t > > & getModelOutputShape() const
Returns the output shape for this model variant.
const std::vector< BufferInfo > & getOutputBufferInfo() const
Returns the output buffer information for this variant.
std::vector< Scale > getInputScale() const
Returns the input quantization scale(s) for this variant.
StatusCode
Enumerates status codes for the qbruntime.
Definition status_code.h:26
std::vector< Scale > getOutputScale() const
Returns the output quantization scale(s) for this variant.
const std::vector< BufferInfo > & getInputBufferInfo() const
Returns the input buffer information for this variant.
const std::vector< std::vector< int64_t > > & getModelInputShape() const
Returns the input shape for this model variant.
int getVariantIdx() const
Returns the index of this model variant.