type.py Source File

type.py Source File#

SDK qb Runtime Library: type.py Source File
SDK qb Runtime Library v1.0
MCS001-
type.py
Go to the documentation of this file.
1
4
5from typing import List, Optional, Tuple
6from enum import Enum
7
8import numpy as np
9
10import qbruntime.qbruntime as _cQbRuntime
11
12
15
16
17class Cluster(Enum):
18 """
19 @brief Enumerates clusters in the ARIES NPU.
20
21 @note The ARIES NPU consists of two clusters, each containing one global core and
22 four local cores, totaling eight local cores. REGULUS has only a single cluster
23 (Cluster0) with one local core (Core0).
24 """
25
26 Cluster0 = _cQbRuntime.Cluster.Cluster0
27 Cluster1 = _cQbRuntime.Cluster.Cluster1
28 Error = _cQbRuntime.Cluster.Error
29
30
31class Core(Enum):
32 """
33 @brief Enumerates cores within a cluster in the ARIES NPU.
34
35 @note The ARIES NPU consists of two clusters, each containing one global core and
36 four local cores, totaling eight local cores. REGULUS has only a single cluster
37 (Cluster0) with one local core (Core0).
38 """
39
40 Core0 = _cQbRuntime.Core.Core0
41 Core1 = _cQbRuntime.Core.Core1
42 Core2 = _cQbRuntime.Core.Core2
43 Core3 = _cQbRuntime.Core.Core3
44 All = _cQbRuntime.Core.All
45 GlobalCore = _cQbRuntime.Core.GlobalCore
46 Error = _cQbRuntime.Core.Error
47
48
50 """@brief Core allocation policy"""
51
52 Auto = _cQbRuntime.CoreAllocationPolicy.Auto
53 Manual = _cQbRuntime.CoreAllocationPolicy.Manual
54
55
56class Scale:
57 """@brief Struct for scale values."""
58
59 def __init__(self, scale: float, is_uniform: bool, scale_list: List[float]):
60 self._scale = _cQbRuntime.Scale()
61 self._scale.scale = scale
62 self._scale.is_uniform = is_uniform
63 self._scale.scale_list = scale_list
64
65 @classmethod
66 def from_cpp(cls, _scale: _cQbRuntime.Scale):
67 return cls(_scale.scale, _scale.is_uniform, _scale.scale_list)
68
69 @property
70 def scale_list(self) -> List[float]:
71 return self._scale.scale_list
72
73 @property
74 def scale(self) -> float:
75 return self._scale.scale
76
77 @property
78 def is_uniform(self) -> bool:
79 return self._scale.is_uniform
80
81 @scale_list.setter
82 def scale_list(self, value: List[float]):
83 self._scale.scale_list = value
84
85 @scale.setter
86 def scale(self, value: float):
87 self._scale.scale = value
88
89 @is_uniform.setter
90 def is_uniform(self, value: bool):
91 self._scale.is_uniform = value
92
93 def __getitem__(self, i: int) -> float:
94 """
95 @brief Returns the scale value at the specified index.
96
97 @param[in] i Index.
98 @return Scale value.
99 """
100 return self._scale[i]
101
102 def __repr__(self):
103 d = {
104 "scale": self.scale,
105 "is_uniform": self.is_uniform,
106 "scale_list": self.scale_list,
107 }
108 return "{}({})".format(
109 self.__class__.__name__,
110 ", ".join("{}={}".format(k, v) for k, v in d.items()),
111 )
112
113
114class CoreId:
115 """
116 @brief Represents a unique identifier for an NPU core.
117
118 A CoreId consists of a Cluster and a Core, identifying a specific core
119 within an NPU.
120 """
121
122 def __init__(self, cluster: Cluster, core: Core):
123 self._core_id = _cQbRuntime.CoreId()
124 self._core_id.cluster = cluster.value
125 self._core_id.core = core.value
126
127 @classmethod
128 def from_cpp(cls, _core_id: _cQbRuntime.CoreId):
129 return cls(Cluster(_core_id.cluster), Core(_core_id.core))
130
131 @property
132 def cluster(self) -> Cluster:
133 return Cluster(self._core_id.cluster)
134
135 @property
136 def core(self) -> Core:
137 return Core(self._core_id.core)
138
139 @cluster.setter
140 def cluster(self, value: Cluster):
141 self._core_id.cluster = value.value
142
143 @core.setter
144 def core(self, value: Core):
145 self._core_id.core = value.value
146
147 def __eq__(self, other) -> bool:
148 """
149 @brief Checks if two CoreId objects are equal.
150
151 @return True if both CoreId objects are identical, False otherwise.
152 """
153 return self._core_id == other._core_id
154
155 def __lt__(self, other) -> bool:
156 """
157 @brief Compares two CoreId objects for ordering.
158
159 @return True if this CoreId is less than the given CoreId, False otherwise.
160 """
161 return self._core_id < other._core_id
162
163 def __repr__(self):
164 d = {"cluster": self.cluster, "core": self.core}
165 return "{}({})".format(
166 self.__class__.__name__,
167 ", ".join("{}={}".format(k, v) for k, v in d.items()),
168 )
169
170
171class Buffer:
172 """
173 @brief A simple byte-sized buffer.
174
175 This struct represents a contiguous block of memory for storing byte-sized data.
176 """
177
178 def __init__(self, _buffer: Optional[_cQbRuntime.Buffer] = None):
179 self._buffer = _cQbRuntime.Buffer() if _buffer is None else _buffer
180
181 @property
182 def size(self) -> int:
183 return self._buffer.size
184
185 @size.setter
186 def size(self, value: int):
187 self._buffer.size = value
188
189 def set_buffer(self, arr: np.ndarray):
190 self._buffer.set_buffer(np.ascontiguousarray(arr))
191
192 def __repr__(self):
193 return f"{self.__class__.__name__}(size={self._buffer.size})"
194
195
196class CoreMode(Enum):
197 """
198 @brief Defines the core mode for NPU execution.
199
200 Supported core modes include single-core, multi-core, global4-core, and global8-core.
201 For detailed explanations of each mode, refer to the following functions:
202
203 - `ModelConfig.set_single_core_mode()`
204 - `ModelConfig.set_multi_core_mode()`
205 - `ModelConfig.set_global4_core_mode()`
206 - `ModelConfig.set_global8_core_mode()`
207 """
208
209 Single = _cQbRuntime.CoreMode.Single
210 Multi = _cQbRuntime.CoreMode.Multi
211 Global = _cQbRuntime.CoreMode.Global
212 Global4 = _cQbRuntime.CoreMode.Global4
213 Global8 = _cQbRuntime.CoreMode.Global8
214 Error = _cQbRuntime.CoreMode.Error
215
216
218 """@brief Struct representing input/output buffer information."""
219
220 def __init__(
221 self,
222 original_height: int = 0,
223 original_width: int = 0,
224 original_channel: int = 0,
225 reshaped_height: int = 0,
226 reshaped_width: int = 0,
227 reshaped_channel: int = 0,
228 height: int = 0,
229 width: int = 0,
230 channel: int = 0,
231 max_height: int = 0,
232 max_width: int = 0,
233 max_channel: int = 0,
234 max_cache_size: int = 0,
235 ):
236 self._buffer_info = _cQbRuntime.BufferInfo()
237 self._buffer_info.original_height = original_height
238 self._buffer_info.original_width = original_width
239 self._buffer_info.original_channel = original_channel
240 self._buffer_info.reshaped_height = reshaped_height
241 self._buffer_info.reshaped_width = reshaped_width
242 self._buffer_info.reshaped_channel = reshaped_channel
243 self._buffer_info.height = height
244 self._buffer_info.width = width
245 self._buffer_info.channel = channel
246 self._buffer_info.max_height = max_height
247 self._buffer_info.max_width = max_width
248 self._buffer_info.max_channel = max_channel
249 self._buffer_info.max_cache_size = max_cache_size
250
251 @classmethod
252 def from_cpp(cls, _buffer_info: _cQbRuntime.BufferInfo):
253 return cls(
254 _buffer_info.original_height,
255 _buffer_info.original_width,
256 _buffer_info.original_channel,
257 _buffer_info.reshaped_height,
258 _buffer_info.reshaped_width,
259 _buffer_info.reshaped_channel,
260 _buffer_info.height,
261 _buffer_info.width,
262 _buffer_info.channel,
263 _buffer_info.max_height,
264 _buffer_info.max_width,
265 _buffer_info.max_channel,
266 _buffer_info.max_cache_size,
267 )
268
269 @property
270 def original_height(self) -> int:
271 """Height of original input/output"""
272 return self._buffer_info.original_height
273
274 @property
275 def original_width(self) -> int:
276 """Width of original input/output"""
277 return self._buffer_info.original_width
278
279 @property
280 def original_channel(self) -> int:
281 """Channel of original input/output"""
282 return self._buffer_info.original_channel
283
284 @property
285 def reshaped_height(self) -> int:
286 """Height of reshaped input/output"""
287 return self._buffer_info.reshaped_height
288
289 @property
290 def reshaped_width(self) -> int:
291 """Width of reshaped input/output"""
292 return self._buffer_info.reshaped_width
293
294 @property
295 def reshaped_channel(self) -> int:
296 """Channel of reshaped input/output"""
297 return self._buffer_info.reshaped_channel
298
299 @property
300 def height(self) -> int:
301 """Height of NPU input/output"""
302 return self._buffer_info.height
303
304 @property
305 def width(self) -> int:
306 """Width of NPU input/output"""
307 return self._buffer_info.width
308
309 @property
310 def channel(self) -> int:
311 """Channel of NPU input/output"""
312 return self._buffer_info.channel
313
314 @property
315 def max_height(self) -> int:
316 """Maximum height of original input/output if data is sequential."""
317 return self._buffer_info.max_height
318
319 @property
320 def max_width(self) -> int:
321 """Maximum width of original input/output if data is sequential."""
322 return self._buffer_info.max_width
323
324 @property
325 def max_channel(self) -> int:
326 """Maximum channel of original input/output if data is sequential."""
327 return self._buffer_info.max_channel
328
329 @property
330 def max_cache_size(self) -> int:
331 """Maximum KV-cache size, relevant for LLM models using KV cache."""
332 return self._buffer_info.max_cache_size
333
334 @original_height.setter
335 def original_height(self, value: int):
336 self._buffer_info.original_height = value
337
338 @original_width.setter
339 def original_width(self, value: int):
340 self._buffer_info.original_width = value
341
342 @original_channel.setter
343 def original_channel(self, value: int):
344 self._buffer_info.original_channel = value
345
346 @reshaped_height.setter
347 def reshaped_height(self, value: int):
348 self._buffer_info.reshaped_height = value
349
350 @reshaped_width.setter
351 def reshaped_width(self, value: int):
352 self._buffer_info.reshaped_width = value
353
354 @reshaped_channel.setter
355 def reshaped_channel(self, value: int):
356 self._buffer_info.reshaped_channel = value
357
358 @height.setter
359 def height(self, value: int):
360 self._buffer_info.height = value
361
362 @width.setter
363 def width(self, value: int):
364 self._buffer_info.width = value
365
366 @channel.setter
367 def channel(self, value: int):
368 self._buffer_info.channel = value
369
370 @max_height.setter
371 def max_height(self, value: int):
372 self._buffer_info.max_height = value
373
374 @max_width.setter
375 def max_width(self, value: int):
376 self._buffer_info.max_width = value
377
378 @max_channel.setter
379 def max_channel(self, value: int):
380 self._buffer_info.max_channel = value
381
382 @max_cache_size.setter
383 def max_cache_size(self, value: int):
384 self._buffer_info.max_cache_size = value
385
386 def original_size(self) -> int:
387 """
388 @brief Returns the total size of the original input/output.
389
390 @return The data size.
391 """
392 return self._buffer_info.original_size()
393
394 def reshaped_size(self) -> int:
395 """
396 @brief Returns the total size of the reshaped input/output.
397
398 @return The data size.
399 """
400 return self._buffer_info.reshaped_size()
401
402 def size(self) -> int:
403 """
404 @brief Returns the total size of the NPU input/output.
405
406 @return The data size.
407 """
408 return self._buffer_info.size()
409
410 def original_shape(self) -> Tuple[int, int, int]:
411 return self._buffer_info.original_shape()
412
413 def original_shape_chw(self) -> Tuple[int, int, int]:
414 return self._buffer_info.original_shape_chw()
415
416 def reshaped_shape(self) -> Tuple[int, int, int]:
417 return self._buffer_info.reshaped_shape()
418
419 def reshaped_shape_chw(self) -> Tuple[int, int, int]:
420 return self._buffer_info.reshaped_shape_chw()
421
422 def shape(self) -> Tuple[int, int, int]:
423 return self._buffer_info.shape()
424
425 def shape_chw(self) -> Tuple[int, int, int]:
426 return self._buffer_info.shape_chw()
427
428 def __repr__(self):
429 d = {
430 "original_height": self._buffer_info.original_height,
431 "original_width": self._buffer_info.original_width,
432 "original_channel": self._buffer_info.original_channel,
433 "reshaped_height": self._buffer_info.reshaped_height,
434 "reshaped_width": self._buffer_info.reshaped_width,
435 "reshaped_channel": self._buffer_info.reshaped_channel,
436 "height": self._buffer_info.height,
437 "width": self._buffer_info.width,
438 "channel": self._buffer_info.channel,
439 "max_height": self._buffer_info.max_height,
440 "max_width": self._buffer_info.max_width,
441 "max_channel": self._buffer_info.max_channel,
442 "max_cache_size": self._buffer_info.max_cache_size,
443 }
444 return "{}({})".format(
445 self.__class__.__name__,
446 ", ".join("{}={}".format(k, v) for k, v in d.items()),
447 )
448
449
451 """
452 @brief Configures a core mode and core allocation of a model for NPU inference.
453 The `ModelConfig` class provides methods for setting a core mode and allocating
454 cores for NPU inference. Supported core modes are single-core, multi-core,
455 global4-core, and global8-core. Users can also specify which cores to allocate for
456 the model. Additionally, the configuration offers an option to enforce the use of a
457 specific NPU bundle.
458
459 @note Deprecated functions are included for backward compatibility, but it is
460 recommended to use the newer core mode configuration methods.
461 """
462
463 def __init__(self, num_cores: Optional[int] = None):
464 """
465 @brief Default constructor. This default-constructed object is initially set to
466 single-core mode with all NPU local cores included.
467 """
468 self._model_config = (
469 _cQbRuntime.ModelConfig()
470 if num_cores is None
471 else _cQbRuntime.ModelConfig(num_cores)
472 )
473
475 self, num_cores: Optional[int] = None, core_ids: Optional[List[CoreId]] = None
476 ) -> bool:
477 """
478 @brief Sets the model to use single-core mode for inference with a specified number
479 of local cores.
480
481 In single-core mode, each local core executes model inference independently.
482 The number of cores used is specified by the `num_cores` parameter, and the core
483 allocation policy is set to `CoreAllocationPolicy.Auto`, meaning the model will be
484 automatically allocated to available local cores when the model is launched to the
485 NPU, specifically when the `Model.launch()` function is called. Or The user can
486 specify a list of CoreIds to determine which cores to use for inference.
487
488 @note Use exactly one of `num_cores` or `core_ids`, not both.
489
490 @param[in] num_cores The number of local cores to use for inference.
491 @param[in] core_ids A list of CoreIds to be used for model inference.
492
493 @return True if the mode was successfully set, False otherwise.
494 """
495 if num_cores is not None and core_ids is None:
496 return self._model_config.set_single_core_mode(num_cores)
497 elif core_ids is not None and num_cores is None:
499 [core_id._core_id for core_id in core_ids]
500 )
501 raise ValueError(
502 "`set_single_core_mode` needs either `num_cores` and `core_ids`."
503 )
504
505 def set_global_core_mode(self, clusters: List[Cluster]) -> bool:
506 """@deprecated"""
507 return self._model_config.set_global_core_mode([c.value for c in clusters])
508
510 self, clusters: List[Cluster] = [Cluster.Cluster0, Cluster.Cluster1]
511 ) -> bool:
512 """
513 @brief Sets the model to use global4-core mode for inference with a specified set
514 of NPU clusters.
515
516 For Aries NPU, there are two clusters, each consisting of four local cores. In
517 global4-core mode, four local cores within the same cluster work together to
518 execute the model inference.
519
520 @param[in] clusters A list of clusters to be used for model inference.
521
522 @return True if the mode was successfully set, False otherwise.
523 """
524 return self._model_config.set_global4_core_mode([c.value for c in clusters])
525
526 def set_global8_core_mode(self) -> bool:
527 """
528 @brief Sets the model to use global8-core mode for inference.
529
530 For Aries NPU, there are two clusters, each consisting of four local cores. In
531 global8-core mode, all eight local cores across the two clusters work together to
532 execute the model inference.
533
534 @return True if the mode was successfully set, False otherwise.
535 """
537
538 def get_core_mode(self) -> CoreMode:
539 """
540 @brief Gets the core mode to be applied to the model.
541
542 This reflects the core mode that will be used when the model is created.
543
544 @return The `CoreMode` to be applied to the model.
545 """
547
549 self, clusters: List[Cluster] = [Cluster.Cluster0, Cluster.Cluster1]
550 ) -> bool:
551 """
552 @brief Sets the model to use multi-core mode for batch inference.
553
554 In multi-core mode, on Aries NPU, the four local cores within a cluster work
555 together to process batch inference tasks efficiently. This mode is optimized for
556 batch processing.
557
558 @param[in] clusters A list of clusters to be used for multi-core batch inference.
559
560 @return True if the mode was successfully set, False otherwise.
561 """
562 return self._model_config.set_multi_core_mode([c.value for c in clusters])
563
564 def get_core_allocation_policy(self) -> CoreAllocationPolicy:
565 """
566 @brief Gets the core allocation policy to be applied to the model.
567
568 This reflects the core allocation policy that will be used when the model is
569 created.
570
571 @return The `CoreAllocationPolicy` to be applied to the model.
572 """
574
575 def get_num_cores(self) -> int:
576 """
577 @brief Gets the number of cores to be allocated for the model.
578
579 This represents the number of cores that will be allocated for inference
580 when the model is launched to the NPU.
581
582 @return The number of cores to be allocated for the model.
583 """
584 return self._model_config.get_num_cores()
585
586 def force_single_npu_bundle(self, npu_bundle_index: int) -> bool:
587 """
588 @brief Forces the use of a specific NPU bundle.
589
590 This function forces the selection of a specific NPU bundle. If a non-negative
591 index is provided, the corresponding NPU bundle is selected and runs without CPU
592 offloading. If -1 is provided, all NPU bundles are used with CPU offloading
593 enabled.
594
595 @param[in] npu_bundle_index The index of the NPU bundle to force. A non-negative
596 integer selects a specific NPU bundle (runs without CPU
597 offloading), or -1 to enable all NPU bundles with CPU
598 offloading.
599
600 @return True if the index is valid and the NPU bundle is successfully set,
601 False if the index is invalid (less than -1).
602 """
603 return self._model_config.force_single_npu_bundle(npu_bundle_index)
604
606 """
607 @brief Retrieves the index of the forced NPU bundle.
608
609 This function returns the index of the NPU bundle that has been forced using the
610 `force_single_npu_bundle` function. If no NPU bundle is forced, the returned value
611 will be -1.
612
613 @return The index of the forced NPU bundle, or -1 if no bundle is forced.
614 """
616
617 def set_async_pipeline_enabled(self, enable: bool) -> None:
618 """
619 @brief Enables or disables the asynchronous pipeline required for asynchronous
620 inference.
621
622 Call this function with `enable` set to `True` if you intend to use
623 `Model.infer_async()`, as the asynchronous pipeline is necessary for their operation.
624
625 If you are only using synchronous inference, such as `Model.infer()` or
626 `Model.infer_to_float()`, it is recommended to keep the asynchronous pipeline disabled
627 to avoid unnecessary overhead.
628
629 @param[in] enable Set to `True` to enable the asynchronous pipeline; set to `False`
630 to disable it.
631 """
632 return self._model_config.set_async_pipeline_enabled(enable)
633
634 def get_async_pipeline_enabled(self) -> bool:
635 """
636 @brief Returns whether the asynchronous pipeline is enabled in this configuration.
637
638 @return `True` if the asynchronous pipeline is enabled; `False` otherwise.
639 """
641
642 def set_activation_slots(self, num: int) -> None:
643 """
644 @brief Sets activation buffer slots for multi-activation supported model.
645
646 all this function if you want to set the number of activation buffer slots manually.
647
648 If you do not call this function, the default number of activation buffer slots
649 is set differently depending on the CoreMode.
650
651 - `CoreMode.Single` : 2 * (the number of target core ids)
652 - `CoreMode.Multi` : 2 * (the number of target clusters)
653 - `CoreMode.Global4` : 2 * (the number of target clusters)
654 - `CoreMode.Global8` : 2
655
656 @note This function has no effect on MXQ file in version earlier than MXQv7.
657
658 @note Currently, LLM model's activation slot is fixed to 1 and ignoring `count`.
659
660 @param[in] count Multi activation counts. Must be >= 1.
661 """
662 return self._model_config.set_activation_slots(num)
663
664 def get_activation_slots(self) -> int:
665 """
666 @brief Returns activation buffer slot count.
667
668 @note This function has no meaning on MXQ file in version earlier than MXQv7.
669
670 @return Activation buffer slot count.
671 """
673
674 @property
675 def early_latencies(self) -> List[int]:
676 return self._model_config.early_latencies
677
678 @property
679 def finish_latencies(self) -> List[int]:
680 return self._model_config.finish_latencies
681
682 @early_latencies.setter
683 def early_latencies(self, latencies: List[int]):
684 """@deprecated This setting has no effect."""
685 self._model_config.early_latencies = latencies
686
687 @finish_latencies.setter
688 def finish_latencies(self, latencies: List[int]):
689 """@deprecated This setting has no effect."""
690 self._model_config.finish_latencies = latencies
691
692 def get_core_ids(self) -> List[CoreId]:
693 """
694 @brief Returns the list of NPU CoreIds to be used for model inference.
695
696 This function returns a list of NPU CoreIds that the model will use for
697 inference. When `set_single_core_mode(num_cores)` is called and the
698 core allocation policy is set to CoreAllocationPolicy.Auto, it will return an
699 empty list.
700
701 @return A list of NPU CoreIds.
702 """
703 return [
704 CoreId(Cluster(core_id.cluster), Core(core_id.core))
705 for core_id in self._model_config.core_ids
706 ]
707
708 def __repr__(self):
709 d = {
710 "core_mode": self.get_core_mode(),
711 "core_allocation_policy": self.get_core_allocation_policy(),
712 "core_ids": self.get_core_ids(),
713 "num_cores": self.get_num_cores(),
714 "forced_npu_bundle_index": self.get_forced_npu_bundle_index(),
715 }
716 return "{}({})".format(
717 self.__class__.__name__,
718 ", ".join("{}={}".format(k, v) for k, v in d.items()),
719 )
720
721
722class LogLevel(Enum):
723 """@brief LogLevel"""
724
725 DEBUG = _cQbRuntime.LogLevel.DEBUG
726 INFO = _cQbRuntime.LogLevel.INFO
727 WARN = _cQbRuntime.LogLevel.WARN
728 ERR = _cQbRuntime.LogLevel.ERR
729 FATAL = _cQbRuntime.LogLevel.FATAL
730 OFF = _cQbRuntime.LogLevel.OFF
731
732
733def set_log_level(level: LogLevel):
734 _cQbRuntime.set_log_level(level.value)
735
736
737class CacheType(Enum):
738 """@brief CacheType"""
739
740 Default = _cQbRuntime.CacheType.Default
741 Batch = _cQbRuntime.CacheType.Batch
742 Error = _cQbRuntime.CacheType.Error
743
744
746 """@brief Struct representing KV-cache information."""
747
748 def __init__(
749 self,
750 cache_type: CacheType = CacheType.Error,
751 name: str = "",
752 layer_hash: str = "",
753 size: int = 0,
754 num_batches: int = 0,
755 ):
756 self._cache_info = _cQbRuntime.CacheInfo()
757 self._cache_info.cache_type = cache_type.value
758 self._cache_info.name = name
759 self._cache_info.layer_hash = layer_hash
760 self._cache_info.size = size
761 self._cache_info.num_batches = num_batches
762
763 @classmethod
764 def from_cpp(cls, _cache_info: _cQbRuntime.CacheInfo):
765 return cls(
766 CacheType(_cache_info.cache_type),
767 _cache_info.name,
768 _cache_info.layer_hash,
769 _cache_info.size,
770 _cache_info.num_batches,
771 )
772
773 @property
774 def cache_type(self) -> CacheType:
775 return CacheType(self._cache_info.cache_type)
776
777 @property
778 def name(self) -> str:
779 return self._cache_info.name
780
781 @property
782 def layer_hash(self) -> str:
783 return self._cache_info.layer_hash
784
785 @property
786 def size(self) -> int:
787 return self._cache_info.size
788
789 @property
790 def num_batches(self) -> int:
791 return self._cache_info.num_batches
792
793 @cache_type.setter
794 def cache_type(self, value: CacheType):
795 self._cache_info.cache_type = value.value
796
797 @name.setter
798 def name(self, value: str):
799 self._cache_info.name = value
800
801 @layer_hash.setter
802 def layer_hash(self, value: str):
803 self._cache_info.layer_hash = value
804
805 @size.setter
806 def size(self, value: int):
807 self._cache_info.size = value
808
809 @num_batches.setter
810 def num_batches(self, value: int):
811 self._cache_info.num_batches = value
812
813
814def start_tracing_events(path: str) -> bool:
815 """
816 @brief Starts event tracing and prepares to save the trace log to a specified file.
817
818 The trace log is recorded in "Chrome Tracing JSON format," which can be
819 viewed at https://ui.perfetto.dev/.
820
821 The trace log is not written immediately; it is saved only when
822 stop_tracing_events() is called.
823
824 @param[in] path The file path where the trace log should be stored.
825 @return True if tracing starts successfully, False otherwise.
826 """
827 return _cQbRuntime.start_tracing_events(path)
828
829
831 """
832 @brief Stops event tracing and writes the recorded trace log.
833
834 This function finalizes tracing and saves the collected trace data
835 to the file specified when start_tracing_events() was called.
836 """
837 _cQbRuntime.stop_tracing_events()
838
839
840def get_model_summary(mxq_path: str) -> str:
841 """
842 @brief Generates a structured summary of the specified MXQ model.
843
844 Returns an overview of the model contained in the MXQ file, including:
845 - Target NPU hardware
846 - Supported core modes and their associated cores
847 - The total number of model variants
848 - For each variant:
849 - Input and output tensor shapes
850 - A list of layers with their types, output shapes, and input layer indices
851
852 The summary is returned as a human-readable string in a table and is useful for
853 inspecting model compatibility, structure, and input/output shapes.
854
855 @param[in] mxq_path Path to the MXQ model file.
856 @return A formatted string containing the model summary.
857 """
858 return _cQbRuntime.get_model_summary(mxq_path)
859
860
861
Struct representing input/output buffer information.
Definition type.py:217
int reshaped_width(self)
Width of reshaped input/output.
Definition type.py:290
int max_channel(self)
Maximum channel of original input/output if data is sequential.
Definition type.py:325
int original_height(self)
Height of original input/output.
Definition type.py:270
int width(self)
Width of NPU input/output.
Definition type.py:305
int max_cache_size(self)
Maximum KV-cache size, relevant for LLM models using KV cache.
Definition type.py:330
int max_height(self)
Maximum height of original input/output if data is sequential.
Definition type.py:315
int original_size(self)
Returns the total size of the original input/output.
Definition type.py:386
int height(self)
Height of NPU input/output.
Definition type.py:300
int original_width(self)
Width of original input/output.
Definition type.py:275
int channel(self)
Channel of NPU input/output.
Definition type.py:310
int size(self)
Returns the total size of the NPU input/output.
Definition type.py:402
int max_width(self)
Maximum width of original input/output if data is sequential.
Definition type.py:320
int reshaped_channel(self)
Channel of reshaped input/output.
Definition type.py:295
int original_channel(self)
Channel of original input/output.
Definition type.py:280
int reshaped_size(self)
Returns the total size of the reshaped input/output.
Definition type.py:394
int reshaped_height(self)
Height of reshaped input/output.
Definition type.py:285
A simple byte-sized buffer.
Definition type.py:171
Struct representing KV-cache information.
Definition type.py:745
Enumerates clusters in the ARIES NPU.
Definition type.py:17
Core allocation policy.
Definition type.py:49
Represents a unique identifier for an NPU core.
Definition type.py:114
bool __eq__(self, other)
Checks if two CoreId objects are equal.
Definition type.py:147
Cluster cluster(self)
Definition type.py:132
Core core(self)
Definition type.py:136
bool __lt__(self, other)
Compares two CoreId objects for ordering.
Definition type.py:155
Defines the core mode for NPU execution.
Definition type.py:196
Enumerates cores within a cluster in the ARIES NPU.
Definition type.py:31
Configures a core mode and core allocation of a model for NPU inference.
Definition type.py:450
List[CoreId] get_core_ids(self)
Returns the list of NPU CoreIds to be used for model inference.
Definition type.py:692
bool get_forced_npu_bundle_index(self)
Retrieves the index of the forced NPU bundle.
Definition type.py:605
bool set_global8_core_mode(self)
Sets the model to use global8-core mode for inference.
Definition type.py:526
bool set_multi_core_mode(self, List[Cluster] clusters=[Cluster.Cluster0, Cluster.Cluster1])
Sets the model to use multi-core mode for batch inference.
Definition type.py:550
bool set_global_core_mode(self, List[Cluster] clusters)
Definition type.py:505
bool force_single_npu_bundle(self, int npu_bundle_index)
Forces the use of a specific NPU bundle.
Definition type.py:586
__init__(self, Optional[int] num_cores=None)
Default constructor.
Definition type.py:463
CoreMode get_core_mode(self)
Gets the core mode to be applied to the model.
Definition type.py:538
None set_async_pipeline_enabled(self, bool enable)
Enables or disables the asynchronous pipeline required for asynchronous inference.
Definition type.py:617
bool get_async_pipeline_enabled(self)
Returns whether the asynchronous pipeline is enabled in this configuration.
Definition type.py:634
int get_activation_slots(self)
Returns activation buffer slot count.
Definition type.py:664
None set_activation_slots(self, int num)
Sets activation buffer slots for multi-activation supported model.
Definition type.py:642
int get_num_cores(self)
Gets the number of cores to be allocated for the model.
Definition type.py:575
bool set_global4_core_mode(self, List[Cluster] clusters=[Cluster.Cluster0, Cluster.Cluster1])
Sets the model to use global4-core mode for inference with a specified set of NPU clusters.
Definition type.py:511
CoreAllocationPolicy get_core_allocation_policy(self)
Gets the core allocation policy to be applied to the model.
Definition type.py:564
bool set_single_core_mode(self, Optional[int] num_cores=None, Optional[List[CoreId]] core_ids=None)
Sets the model to use single-core mode for inference with a specified number of local cores.
Definition type.py:476
Struct for scale values.
Definition type.py:56
bool is_uniform(self)
Definition type.py:78
float scale(self)
Definition type.py:74
float __getitem__(self, int i)
Returns the scale value at the specified index.
Definition type.py:93
List[float] scale_list(self)
Definition type.py:70
str get_model_summary(str mxq_path)
Generates a structured summary of the specified MXQ model.
Definition type.py:840
bool start_tracing_events(str path)
Starts event tracing and prepares to save the trace log to a specified file.
Definition type.py:814
stop_tracing_events()
Stops event tracing and writes the recorded trace log.
Definition type.py:830