type.h Source File

type.h Source File#

SDK qb Runtime Library: type.h Source File
SDK qb Runtime Library v1.1
MCS001-
type.h
Go to the documentation of this file.
1// Copyright ⓒ 2019- Mobilint Inc. All rights reserved.
5
6#ifndef QBRUNTIME_TYPE_H_
7#define QBRUNTIME_TYPE_H_
8
9#include <cstdint>
10#include <string>
11#include <tuple>
12#include <vector>
13
14#include "qbruntime/export.h"
15
16namespace mobilint {
23
29QBRUNTIME_EXPORT std::string getQbRuntimeVersion();
30
36QBRUNTIME_EXPORT std::string getQbRuntimeGitVersion();
37
45QBRUNTIME_EXPORT std::string getQbRuntimeVendor();
46
55QBRUNTIME_EXPORT std::string getQbRuntimeProduct();
56
64enum class Cluster : int32_t {
65 Cluster0 = 1 << 16,
66 Cluster1 = 2 << 16,
67 Error = 0x7FFF'0000,
68};
69
77enum class Core : int32_t {
78 Core0 = 1,
79 Core1 = 2,
80 Core2 = 3,
81 Core3 = 4,
82 All = 0x0000'FFFC,
83 GlobalCore = 0x0000'FFFE,
84 Error = 0x0000'FFFF,
85};
86
94
98struct Scale {
99 std::vector<float> scale_list;
100 float scale = 0.0F;
101 bool is_uniform = false;
102
109 float operator[](int i) const {
110 if (is_uniform) {
111 return scale;
112 }
113 return scale_list[i];
114 }
115};
116
123struct CoreId {
126
133 bool operator==(const CoreId& rhs) const {
134 return std::tie(cluster, core) == std::tie(rhs.cluster, rhs.core);
135 }
136
143 bool operator<(const CoreId& rhs) const {
144 return std::tie(cluster, core) < std::tie(rhs.cluster, rhs.core);
145 }
146};
147
153struct Buffer {
154 int8_t* data = nullptr;
155 uint64_t size = 0;
156};
157
170enum class CoreMode : uint8_t {
171 Single = 0,
172 Multi = 1,
173 Global = 2,
176 Auto = 0xE,
177 Error = 0xF,
178};
179
184 // clang-format off
185 uint32_t original_height = 0;
186 uint32_t original_width = 0;
187 uint32_t original_channel = 0;
188 uint32_t reshaped_height = 0;
189 uint32_t reshaped_width = 0;
190 uint32_t reshaped_channel = 0;
191 uint32_t height = 0;
192 uint32_t width = 0;
193 uint32_t channel = 0;
194 uint32_t max_height = 0;
195 uint32_t max_width = 0;
196 uint32_t max_channel = 0;
197 uint32_t max_cache_size = 0;
198 // clang-format on
199
204 uint32_t original_size() const {
206 }
207
212 uint32_t reshaped_size() const {
214 }
215
220 uint32_t size() const { return height * width * channel; }
221};
222
235class QBRUNTIME_EXPORT ModelConfig {
236public:
242
256
270 bool setSingleCoreMode(int num_cores);
271
283 bool setSingleCoreMode(std::vector<CoreId> core_ids);
284
297 bool setMultiCoreMode(std::vector<Cluster> clusters = {Cluster::Cluster0,
299
313 bool setGlobal4CoreMode(std::vector<Cluster> clusters = {Cluster::Cluster0,
315
326
334 CoreMode getCoreMode() const { return mCoreMode; }
335
344 CoreAllocationPolicy getCoreAllocationPolicy() const { return mCoreAllocationPolicy; }
345
354 int getNumCores() const { return mNumCores; }
355
371 bool forceSingleNPUBundle(int npu_bundle_index);
372
382 int getForcedNPUBundleIndex() const { return mForcedNPUBundleIndex; }
383
394 const std::vector<CoreId>& getCoreIds() const { return mCoreIds; }
395
396 const std::vector<Cluster>& getClusters() const { return mClusters; }
397
413 void setAsyncPipelineEnabled(bool enable);
414
420 bool getAsyncPipelineEnabled() const { return mAsyncPipelineEnabled; }
421
442 void setActivationSlots(int count);
443
451 int getActivationSlots() const { return mActivationSlots; }
452
453 explicit ModelConfig(int num_cores);
454
455 bool setGlobalCoreMode(std::vector<Cluster> clusters);
456
460 std::vector<uint64_t> early_latencies;
464 std::vector<uint64_t> finish_latencies;
465
466private:
467 CoreMode mCoreMode = CoreMode::Auto;
469 std::vector<Cluster> mClusters;
470 std::vector<CoreId> mCoreIds;
471 int mNumCores;
472 int mForcedNPUBundleIndex = -1; // -1 means single npu bundle usage is not forced.
473 bool mAsyncPipelineEnabled = false;
474 int mActivationSlots = 1;
475};
476
480enum class LogLevel : char {
481 DEBUG = 1,
482 INFO = 2,
483 WARN = 3,
484 ERR = 4,
485 FATAL = 5,
486 OFF = 6,
487};
488
492enum class CacheType : uint8_t { Default = 0, Batch, Error = 0x0F };
493
497struct CacheInfo {
498 CacheType cache_type = CacheType::Error;
499 std::string name;
500 std::string layer_hash;
501 uint64_t size = 0;
502 size_t num_batches = 0;
503};
504
508enum class DataType : uint8_t { Float32 = 0, Float16, Int8, Uint8, Error = 0x0F };
509
510QBRUNTIME_EXPORT void setLogLevel(LogLevel level);
511
524QBRUNTIME_EXPORT bool startTracingEvents(const char* path);
525
532QBRUNTIME_EXPORT void stopTracingEvents();
533
551QBRUNTIME_EXPORT std::string getModelSummary(const std::string& mxq_path);
552
558QBRUNTIME_EXPORT std::vector<int> getAvailableDeviceNumbers();
559
561
562} // namespace mobilint
563
564#endif
bool setGlobal8CoreMode()
Sets the model to use global8-core mode for inference.
bool setGlobalCoreMode(std::vector< Cluster > clusters)
CoreMode getCoreMode() const
Gets the core mode to be applied to the model.
Definition type.h:334
bool getAsyncPipelineEnabled() const
Returns whether the asynchronous pipeline is enabled in this configuration.
Definition type.h:420
bool setGlobal4CoreMode(std::vector< Cluster > clusters={Cluster::Cluster0, Cluster::Cluster1})
Sets the model to use global4-core mode for inference with a specified set of NPU clusters.
bool setAutoCoreMode()
Sets the model to detect CoreMode automatically.
void setAsyncPipelineEnabled(bool enable)
Enables or disables the asynchronous pipeline required for asynchronous inference.
ModelConfig(int num_cores)
bool setSingleCoreMode(int num_cores)
Sets the model to use single-core mode for inference with a specified number of local cores.
ModelConfig()
Default constructor. This default-constructed object is initially set to auto-core mode.
bool setMultiCoreMode(std::vector< Cluster > clusters={Cluster::Cluster0, Cluster::Cluster1})
Sets the model to use multi-core mode for batch inference.
int getActivationSlots() const
Returns activation buffer slot count.
Definition type.h:451
const std::vector< CoreId > & getCoreIds() const
Returns the list of NPU CoreIds to be used for model inference.
Definition type.h:394
std::vector< uint64_t > early_latencies
Definition type.h:460
std::vector< uint64_t > finish_latencies
Definition type.h:464
bool setSingleCoreMode(std::vector< CoreId > core_ids)
Sets the model to use single-core mode for inference with a specific set of NPU local cores.
CoreAllocationPolicy getCoreAllocationPolicy() const
Gets the core allocation policy to be applied to the model.
Definition type.h:344
bool forceSingleNPUBundle(int npu_bundle_index)
Forces the use of a specific NPU bundle.
int getNumCores() const
Gets the number of cores to be allocated for the model.
Definition type.h:354
int getForcedNPUBundleIndex() const
Retrieves the index of the forced NPU bundle.
Definition type.h:382
void setActivationSlots(int count)
Sets activation buffer slots for multi-activation supported model.
DataType
DataType.
Definition type.h:508
QBRUNTIME_EXPORT std::string getQbRuntimeProduct()
Retrieves product information of the qbruntime.
QBRUNTIME_EXPORT bool startTracingEvents(const char *path)
Starts event tracing and prepares to save the trace log to a specified file.
QBRUNTIME_EXPORT std::vector< int > getAvailableDeviceNumbers()
Get the number of available NPU devices.
QBRUNTIME_EXPORT std::string getModelSummary(const std::string &mxq_path)
Generates a structured summary of the specified MXQ model.
Cluster
Enumerates clusters in the ARIES NPU.
Definition type.h:64
QBRUNTIME_EXPORT void stopTracingEvents()
Stops event tracing and writes the recorded trace log.
QBRUNTIME_EXPORT std::string getQbRuntimeVendor()
Retrieves the vendor name of the qbruntime.
CacheType
CacheType.
Definition type.h:492
LogLevel
LogLevel.
Definition type.h:480
CoreMode
Defines the core mode for NPU execution.
Definition type.h:170
QBRUNTIME_EXPORT std::string getQbRuntimeGitVersion()
Retrieves the Git commit hash of the qbruntime.
QBRUNTIME_EXPORT std::string getQbRuntimeVersion()
Retrieves the version of the qbruntime.
CoreAllocationPolicy
Core allocation policy.
Definition type.h:90
Core
Enumerates cores within a cluster in the ARIES NPU.
Definition type.h:77
Struct representing input/output buffer information.
Definition type.h:183
uint32_t original_height
Definition type.h:185
uint32_t max_cache_size
Definition type.h:197
uint32_t width
Definition type.h:192
uint32_t reshaped_channel
Definition type.h:190
uint32_t height
Definition type.h:191
uint32_t reshaped_height
Definition type.h:188
uint32_t original_size() const
Returns the total size of the original input/output.
Definition type.h:204
uint32_t max_channel
Definition type.h:196
uint32_t original_channel
Definition type.h:187
uint32_t channel
Definition type.h:193
uint32_t reshaped_size() const
Returns the total size of the reshaped input/output.
Definition type.h:212
uint32_t max_width
Definition type.h:195
uint32_t size() const
Returns the total size of the NPU input/output.
Definition type.h:220
uint32_t original_width
Definition type.h:186
uint32_t max_height
Definition type.h:194
uint32_t reshaped_width
Definition type.h:189
A simple byte-sized buffer.
Definition type.h:153
uint64_t size
Definition type.h:155
int8_t * data
Definition type.h:154
Struct representing KV-cache information.
Definition type.h:497
Represents a unique identifier for an NPU core.
Definition type.h:123
bool operator<(const CoreId &rhs) const
Compares two CoreId objects for ordering.
Definition type.h:143
bool operator==(const CoreId &rhs) const
Checks if two CoreId objects are equal.
Definition type.h:133
Cluster cluster
Definition type.h:124
Struct for scale values.
Definition type.h:98
float operator[](int i) const
Returns the scale value at the specified index.
Definition type.h:109
float scale
Definition type.h:100
bool is_uniform
Definition type.h:101
std::vector< float > scale_list
Definition type.h:99