type.h Source File

type.h Source File#

SDK qb Runtime Library: type.h Source File
SDK qb Runtime Library v1.0
MCS001-
type.h
Go to the documentation of this file.
1// Copyright ⓒ 2019- Mobilint Inc. All rights reserved.
5
6#ifndef QBRUNTIME_TYPE_H_
7#define QBRUNTIME_TYPE_H_
8
9#include <cstdint>
10#include <string>
11#include <tuple>
12#include <vector>
13
14#include "qbruntime/export.h"
15
16namespace mobilint {
23
29QBRUNTIME_EXPORT std::string getQbRuntimeVersion();
30
36QBRUNTIME_EXPORT std::string getQbRuntimeGitVersion();
37
45QBRUNTIME_EXPORT std::string getQbRuntimeVendor();
46
55QBRUNTIME_EXPORT std::string getQbRuntimeProduct();
56
64enum class Cluster : int32_t {
65 Cluster0 = 1 << 16,
66 Cluster1 = 2 << 16,
67 Error = 0x7FFF'0000,
68};
69
77enum class Core : int32_t {
78 Core0 = 1,
79 Core1 = 2,
80 Core2 = 3,
81 Core3 = 4,
82 All = 0x0000'FFFC,
83 GlobalCore = 0x0000'FFFE,
84 Error = 0x0000'FFFF,
85};
86
94
98struct Scale {
99 std::vector<float> scale_list;
100 float scale = 0.0F;
101 bool is_uniform = false;
102
109 float operator[](int i) const {
110 if (is_uniform) {
111 return scale;
112 }
113 return scale_list[i];
114 }
115};
116
123struct CoreId {
126
133 bool operator==(const CoreId& rhs) const {
134 return std::tie(cluster, core) == std::tie(rhs.cluster, rhs.core);
135 }
136
143 bool operator<(const CoreId& rhs) const {
144 return std::tie(cluster, core) < std::tie(rhs.cluster, rhs.core);
145 }
146};
147
153struct Buffer {
154 int8_t* data = nullptr;
155 uint64_t size = 0;
156};
157
169enum class CoreMode : uint8_t {
170 Single = 0,
171 Multi = 1,
172 Global = 2,
175 Error = 0xF,
176};
177
182 // clang-format off
183 uint32_t original_height = 0;
184 uint32_t original_width = 0;
185 uint32_t original_channel = 0;
186 uint32_t reshaped_height = 0;
187 uint32_t reshaped_width = 0;
188 uint32_t reshaped_channel = 0;
189 uint32_t height = 0;
190 uint32_t width = 0;
191 uint32_t channel = 0;
192 uint32_t max_height = 0;
193 uint32_t max_width = 0;
194 uint32_t max_channel = 0;
195 uint32_t max_cache_size = 0;
196 // clang-format on
197
202 uint32_t original_size() const {
204 }
205
210 uint32_t reshaped_size() const {
212 }
213
218 uint32_t size() const { return height * width * channel; }
219};
220
233class QBRUNTIME_EXPORT ModelConfig {
234public:
240
254 bool setSingleCoreMode(int num_cores);
255
267 bool setSingleCoreMode(std::vector<CoreId> core_ids);
268
281 bool setMultiCoreMode(std::vector<Cluster> clusters = {Cluster::Cluster0,
283
297 bool setGlobal4CoreMode(std::vector<Cluster> clusters = {Cluster::Cluster0,
299
310
318 CoreMode getCoreMode() const { return mCoreMode; }
319
328 CoreAllocationPolicy getCoreAllocationPolicy() const { return mCoreAllocationPolicy; }
329
338 int getNumCores() const { return mNumCores; }
339
355 bool forceSingleNPUBundle(int npu_bundle_index);
356
366 int getForcedNPUBundleIndex() const { return mForcedNPUBundleIndex; }
367
378 const std::vector<CoreId>& getCoreIds() const { return mCoreIds; }
379
380 const std::vector<Cluster>& getClusters() const { return mClusters; }
381
397 void setAsyncPipelineEnabled(bool enable);
398
404 bool getAsyncPipelineEnabled() const { return mAsyncPipelineEnabled; }
405
426 void setActivationSlots(int count);
427
435 int getActivationSlots() const { return mActivationSlots; }
436
437 explicit ModelConfig(int num_cores);
438
439 bool setGlobalCoreMode(std::vector<Cluster> clusters);
440
444 std::vector<uint64_t> early_latencies;
448 std::vector<uint64_t> finish_latencies;
449
450private:
451 CoreMode mCoreMode = CoreMode::Single;
453 std::vector<Cluster> mClusters;
454 std::vector<CoreId> mCoreIds;
455 int mNumCores;
456 int mForcedNPUBundleIndex = -1; // -1 means single npu bundle usage is not forced.
457 bool mAsyncPipelineEnabled = false;
458 int mActivationSlots = 1;
459};
460
464enum class LogLevel : char {
465 DEBUG = 1,
466 INFO = 2,
467 WARN = 3,
468 ERR = 4,
469 FATAL = 5,
470 OFF = 6,
471};
472
476enum class CacheType : uint8_t { Default = 0, Batch, Error = 0x0F };
477
481struct CacheInfo {
482 CacheType cache_type = CacheType::Error;
483 std::string name;
484 std::string layer_hash;
485 uint64_t size = 0;
486 size_t num_batches = 0;
487};
488
489QBRUNTIME_EXPORT void setLogLevel(LogLevel level);
490
503QBRUNTIME_EXPORT bool startTracingEvents(const char* path);
504
511QBRUNTIME_EXPORT void stopTracingEvents();
512
530QBRUNTIME_EXPORT std::string getModelSummary(const std::string& mxq_path);
531
533
534} // namespace mobilint
535
536#endif
bool setGlobal8CoreMode()
Sets the model to use global8-core mode for inference.
bool setGlobalCoreMode(std::vector< Cluster > clusters)
CoreMode getCoreMode() const
Gets the core mode to be applied to the model.
Definition type.h:318
bool getAsyncPipelineEnabled() const
Returns whether the asynchronous pipeline is enabled in this configuration.
Definition type.h:404
bool setGlobal4CoreMode(std::vector< Cluster > clusters={Cluster::Cluster0, Cluster::Cluster1})
Sets the model to use global4-core mode for inference with a specified set of NPU clusters.
void setAsyncPipelineEnabled(bool enable)
Enables or disables the asynchronous pipeline required for asynchronous inference.
ModelConfig(int num_cores)
bool setSingleCoreMode(int num_cores)
Sets the model to use single-core mode for inference with a specified number of local cores.
ModelConfig()
Default constructor. This default-constructed object is initially set to single-core mode with all NP...
bool setMultiCoreMode(std::vector< Cluster > clusters={Cluster::Cluster0, Cluster::Cluster1})
Sets the model to use multi-core mode for batch inference.
int getActivationSlots() const
Returns activation buffer slot count.
Definition type.h:435
const std::vector< CoreId > & getCoreIds() const
Returns the list of NPU CoreIds to be used for model inference.
Definition type.h:378
std::vector< uint64_t > early_latencies
Definition type.h:444
std::vector< uint64_t > finish_latencies
Definition type.h:448
bool setSingleCoreMode(std::vector< CoreId > core_ids)
Sets the model to use single-core mode for inference with a specific set of NPU local cores.
CoreAllocationPolicy getCoreAllocationPolicy() const
Gets the core allocation policy to be applied to the model.
Definition type.h:328
bool forceSingleNPUBundle(int npu_bundle_index)
Forces the use of a specific NPU bundle.
int getNumCores() const
Gets the number of cores to be allocated for the model.
Definition type.h:338
int getForcedNPUBundleIndex() const
Retrieves the index of the forced NPU bundle.
Definition type.h:366
void setActivationSlots(int count)
Sets activation buffer slots for multi-activation supported model.
QBRUNTIME_EXPORT std::string getQbRuntimeProduct()
Retrieves product information of the qbruntime.
QBRUNTIME_EXPORT bool startTracingEvents(const char *path)
Starts event tracing and prepares to save the trace log to a specified file.
QBRUNTIME_EXPORT std::string getModelSummary(const std::string &mxq_path)
Generates a structured summary of the specified MXQ model.
Cluster
Enumerates clusters in the ARIES NPU.
Definition type.h:64
QBRUNTIME_EXPORT void stopTracingEvents()
Stops event tracing and writes the recorded trace log.
QBRUNTIME_EXPORT std::string getQbRuntimeVendor()
Retrieves the vendor name of the qbruntime.
CacheType
CacheType.
Definition type.h:476
LogLevel
LogLevel.
Definition type.h:464
CoreMode
Defines the core mode for NPU execution.
Definition type.h:169
QBRUNTIME_EXPORT std::string getQbRuntimeGitVersion()
Retrieves the Git commit hash of the qbruntime.
QBRUNTIME_EXPORT std::string getQbRuntimeVersion()
Retrieves the version of the qbruntime.
CoreAllocationPolicy
Core allocation policy.
Definition type.h:90
Core
Enumerates cores within a cluster in the ARIES NPU.
Definition type.h:77
Struct representing input/output buffer information.
Definition type.h:181
uint32_t original_height
Definition type.h:183
uint32_t max_cache_size
Definition type.h:195
uint32_t width
Definition type.h:190
uint32_t reshaped_channel
Definition type.h:188
uint32_t height
Definition type.h:189
uint32_t reshaped_height
Definition type.h:186
uint32_t original_size() const
Returns the total size of the original input/output.
Definition type.h:202
uint32_t max_channel
Definition type.h:194
uint32_t original_channel
Definition type.h:185
uint32_t channel
Definition type.h:191
uint32_t reshaped_size() const
Returns the total size of the reshaped input/output.
Definition type.h:210
uint32_t max_width
Definition type.h:193
uint32_t size() const
Returns the total size of the NPU input/output.
Definition type.h:218
uint32_t original_width
Definition type.h:184
uint32_t max_height
Definition type.h:192
uint32_t reshaped_width
Definition type.h:187
A simple byte-sized buffer.
Definition type.h:153
uint64_t size
Definition type.h:155
int8_t * data
Definition type.h:154
Struct representing KV-cache information.
Definition type.h:481
Represents a unique identifier for an NPU core.
Definition type.h:123
bool operator<(const CoreId &rhs) const
Compares two CoreId objects for ordering.
Definition type.h:143
bool operator==(const CoreId &rhs) const
Checks if two CoreId objects are equal.
Definition type.h:133
Cluster cluster
Definition type.h:124
Struct for scale values.
Definition type.h:98
float operator[](int i) const
Returns the scale value at the specified index.
Definition type.h:109
float scale
Definition type.h:100
bool is_uniform
Definition type.h:101
std::vector< float > scale_list
Definition type.h:99