type.h Source File

type.h Source File#

Runtime Library: type.h Source File
Runtime Library v0.30
Mobilint SDK qb
type.h
Go to the documentation of this file.
1// Copyright ⓒ 2019- Mobilint Inc. All rights reserved.
5
6#ifndef MACCEL_TYPE_H_
7#define MACCEL_TYPE_H_
8
9#include <cstdint>
10#include <string>
11#include <tuple>
12#include <vector>
13
14#include "maccel/export.h"
15
16namespace mobilint {
23
31enum class Cluster : int32_t {
32 Cluster0 = 1 << 16,
33 Cluster1 = 2 << 16,
34 Error = 0x7FFF'0000,
35};
36
44enum class Core : int32_t {
45 Core0 = 1,
46 Core1 = 2,
47 Core2 = 3,
48 Core3 = 4,
49 All = 0x0000'FFFC,
50 GlobalCore = 0x0000'FFFE,
51 Error = 0x0000'FFFF,
52};
53
58 Unified,
59 Separated,
60 Undefined,
61};
62
66enum class CoreStatus {
67 Vacant,
68 Ready,
69 Idle,
70 Running,
71};
72
76enum class SchedulePolicy {
77 FIFO,
78 LIFO,
79 ByPriority,
80 Undefined,
81};
82
86enum class LatencySetPolicy {
87 Auto,
88 Manual,
89};
90
95 Maintain,
96 DropExpired,
97 Undefined,
98};
99
103enum class InferenceResult {
104 Successful,
105 Expired,
106 Unexpected,
107};
108
113 Auto,
114 Manual,
115};
116
120struct Scale {
121 std::vector<float> scale_list;
122 float scale = 0.0F;
123 bool is_uniform = false;
124
131 float operator[](int i) const {
132 if (is_uniform) {
133 return scale;
134 }
135 return scale_list[i];
136 }
137};
138
139class Statistics;
140
147struct CoreId {
150
157 bool operator==(const CoreId& rhs) const {
158 return std::tie(cluster, core) == std::tie(rhs.cluster, rhs.core);
159 }
160
167 bool operator<(const CoreId& rhs) const {
168 return std::tie(cluster, core) < std::tie(rhs.cluster, rhs.core);
169 }
170};
171
177struct Buffer {
178 int8_t* data = nullptr;
179 uint64_t size = 0;
180};
181
193enum class CoreMode : uint8_t {
194 Single = 0,
195 Multi = 1,
196 Global = 2,
199 Error = 0xF,
200};
201
206 // clang-format off
207 uint32_t original_height = 0;
208 uint32_t original_width = 0;
209 uint32_t original_channel = 0;
210 uint32_t reshaped_height = 0;
211 uint32_t reshaped_width = 0;
212 uint32_t reshaped_channel = 0;
213 uint32_t height = 0;
214 uint32_t width = 0;
215 uint32_t channel = 0;
216 uint32_t max_height = 0;
217 uint32_t max_width = 0;
218 uint32_t max_channel = 0;
219 uint32_t max_cache_size = 0;
220 // clang-format on
221
226 uint32_t original_size() const {
228 }
229
234 uint32_t reshaped_size() const {
236 }
237
242 uint32_t size() const { return height * width * channel; }
243};
244
257class MACCEL_EXPORT ModelConfig {
258public:
264
278 bool setSingleCoreMode(int num_cores);
279
291 bool setSingleCoreMode(std::vector<CoreId> core_ids);
292
303 bool setMultiCoreMode(std::vector<Cluster> clusters);
304
316 bool setGlobal4CoreMode(std::vector<Cluster> clusters);
317
328
336 CoreMode getCoreMode() const { return mCoreMode; }
337
346 CoreAllocationPolicy getCoreAllocationPolicy() const { return mCoreAllocationPolicy; }
347
356 int getNumCores() const { return mNumCores; }
357
373 bool forceSingleNPUBundle(int npu_bundle_index);
374
384 int getForcedNPUBundleIndex() const { return mForcedNPUBundleIndex; }
385
396 const std::vector<CoreId>& getCoreIds() const { return mCoreIds; }
397
413 void setAsyncPipelineEnabled(bool enable);
414
420 bool getAsyncPipelineEnabled() const { return mAsyncPipelineEnabled; }
421
422 explicit ModelConfig(int num_cores);
423
426 bool include(Cluster cluster, Core core);
427 bool include(Cluster cluster);
428 bool include(Core core);
429
430 bool exclude(Cluster cluster, Core core);
431 bool exclude(Cluster cluster);
432 bool exclude(Core core);
433
434 bool setGlobalCoreMode(std::vector<Cluster> clusters);
435
436 bool setAutoMode(int num_cores = 1);
438
442 SchedulePolicy schedule_policy = SchedulePolicy::FIFO;
446 LatencySetPolicy latency_set_policy = LatencySetPolicy::Auto;
450 MaintenancePolicy maintenance_policy = MaintenancePolicy::Maintain;
454 std::vector<uint64_t> early_latencies;
458 std::vector<uint64_t> finish_latencies;
459
460private:
461 CoreMode mCoreMode = CoreMode::Single;
463 std::vector<CoreId> mCoreIds;
464 int mNumCores;
465 int mForcedNPUBundleIndex = -1; // -1 means single npu bundle usage is not forced.
466 bool mAsyncPipelineEnabled = false;
467};
468
472enum class LogLevel : char {
473 DEBUG = 1,
474 INFO = 2,
475 WARN = 3,
476 ERR = 4,
477 FATAL = 5,
478 OFF = 6,
479};
480
484enum class CacheType : uint8_t { Default = 0, Batch, Error = 0x0F };
485
489struct CacheInfo {
490 CacheType cache_type = CacheType::Error;
491 std::string name;
492 std::string layer_hash;
493 uint64_t size = 0;
494 size_t num_batches = 0;
495};
496
497MACCEL_EXPORT void setLogLevel(LogLevel level);
498
511MACCEL_EXPORT bool startTracingEvents(const char* path);
512
519MACCEL_EXPORT void stopTracingEvents();
520
538MACCEL_EXPORT std::string getModelSummary(const std::string& mxq_path);
539
541
542} // namespace mobilint
543
544#endif
bool setGlobal8CoreMode()
Sets the model to use global8-core mode for inference.
bool setGlobalCoreMode(std::vector< Cluster > clusters)
bool setGlobal4CoreMode(std::vector< Cluster > clusters)
Sets the model to use global4-core mode for inference with a specified set of NPU clusters.
CoreMode getCoreMode() const
Gets the core mode to be applied to the model.
Definition type.h:336
bool getAsyncPipelineEnabled() const
Returns whether the asynchronous pipeline is enabled in this configuration.
Definition type.h:420
MaintenancePolicy maintenance_policy
Definition type.h:450
bool setMultiCoreMode(std::vector< Cluster > clusters)
Sets the model to use multi-core mode for batch inference.
bool include(Cluster cluster)
bool exclude(Core core)
void setAsyncPipelineEnabled(bool enable)
Enables or disables the asynchronous pipeline required for asynchronous inference.
bool exclude(Cluster cluster, Core core)
ModelConfig(int num_cores)
bool setSingleCoreMode(int num_cores)
Sets the model to use single-core mode for inference with a specified number of local cores.
ModelConfig()
Default constructor. This default-constructed object is initially set to single-core mode with all NP...
LatencySetPolicy latency_set_policy
Definition type.h:446
const std::vector< CoreId > & getCoreIds() const
Returns the list of NPU CoreIds to be used for model inference.
Definition type.h:396
std::vector< uint64_t > early_latencies
Definition type.h:454
std::vector< uint64_t > finish_latencies
Definition type.h:458
bool exclude(Cluster cluster)
bool setSingleCoreMode(std::vector< CoreId > core_ids)
Sets the model to use single-core mode for inference with a specific set of NPU local cores.
CoreAllocationPolicy getCoreAllocationPolicy() const
Gets the core allocation policy to be applied to the model.
Definition type.h:346
bool forceSingleNPUBundle(int npu_bundle_index)
Forces the use of a specific NPU bundle.
int getNumCores() const
Gets the number of cores to be allocated for the model.
Definition type.h:356
int getForcedNPUBundleIndex() const
Retrieves the index of the forced NPU bundle.
Definition type.h:384
bool include(Core core)
bool setAutoMode(int num_cores=1)
SchedulePolicy schedule_policy
Definition type.h:442
bool include(Cluster cluster, Core core)
MACCEL_EXPORT bool startTracingEvents(const char *path)
Starts event tracing and prepares to save the trace log to a specified file.
InferenceResult
Definition type.h:103
MaintenancePolicy
Definition type.h:94
CollaborationModel
Definition type.h:57
CoreStatus
Definition type.h:66
SchedulePolicy
Definition type.h:76
Cluster
Enumerates clusters in the ARIES NPU.
Definition type.h:31
CacheType
CacheType.
Definition type.h:484
LogLevel
LogLevel.
Definition type.h:472
CoreMode
Defines the core mode for NPU execution.
Definition type.h:193
MACCEL_EXPORT std::string getModelSummary(const std::string &mxq_path)
Generates a structured summary of the specified MXQ model.
LatencySetPolicy
Definition type.h:86
MACCEL_EXPORT void stopTracingEvents()
Stops event tracing and writes the recorded trace log.
CoreAllocationPolicy
Core allocation policy.
Definition type.h:112
Core
Enumerates cores within a cluster in the ARIES NPU.
Definition type.h:44
Struct representing input/output buffer information.
Definition type.h:205
uint32_t original_height
Definition type.h:207
uint32_t max_cache_size
Definition type.h:219
uint32_t width
Definition type.h:214
uint32_t reshaped_channel
Definition type.h:212
uint32_t height
Definition type.h:213
uint32_t reshaped_height
Definition type.h:210
uint32_t original_size() const
Returns the total size of the original input/output.
Definition type.h:226
uint32_t max_channel
Definition type.h:218
uint32_t original_channel
Definition type.h:209
uint32_t channel
Definition type.h:215
uint32_t reshaped_size() const
Returns the total size of the reshaped input/output.
Definition type.h:234
uint32_t max_width
Definition type.h:217
uint32_t size() const
Returns the total size of the NPU input/output.
Definition type.h:242
uint32_t original_width
Definition type.h:208
uint32_t max_height
Definition type.h:216
uint32_t reshaped_width
Definition type.h:211
A simple byte-sized buffer.
Definition type.h:177
uint64_t size
Definition type.h:179
int8_t * data
Definition type.h:178
Struct representing KV-cache information.
Definition type.h:489
Represents a unique identifier for an NPU core.
Definition type.h:147
bool operator<(const CoreId &rhs) const
Compares two CoreId objects for ordering.
Definition type.h:167
bool operator==(const CoreId &rhs) const
Checks if two CoreId objects are equal.
Definition type.h:157
Cluster cluster
Definition type.h:148
Struct for scale values.
Definition type.h:120
float operator[](int i) const
Returns the scale value at the specified index.
Definition type.h:131
float scale
Definition type.h:122
bool is_uniform
Definition type.h:123
std::vector< float > scale_list
Definition type.h:121