type.py Source File

type.py Source File#

SDK qb Runtime Library: type.py Source File
SDK qb Runtime Library v1.1
MCS001-
type.py
Go to the documentation of this file.
1
4
5from typing import List, Optional, Tuple
6from enum import Enum
7
8import numpy as np
9
10import qbruntime.qbruntime as _cQbRuntime
11
12
15
16
17class Cluster(Enum):
18 """
19 @brief Enumerates clusters in the ARIES NPU.
20
21 @note The ARIES NPU consists of two clusters, each containing one global core and
22 four local cores, totaling eight local cores. REGULUS has only a single cluster
23 (Cluster0) with one local core (Core0).
24 """
25
26 Cluster0 = _cQbRuntime.Cluster.Cluster0
27 Cluster1 = _cQbRuntime.Cluster.Cluster1
28 Error = _cQbRuntime.Cluster.Error
29
30
31class Core(Enum):
32 """
33 @brief Enumerates cores within a cluster in the ARIES NPU.
34
35 @note The ARIES NPU consists of two clusters, each containing one global core and
36 four local cores, totaling eight local cores. REGULUS has only a single cluster
37 (Cluster0) with one local core (Core0).
38 """
39
40 Core0 = _cQbRuntime.Core.Core0
41 Core1 = _cQbRuntime.Core.Core1
42 Core2 = _cQbRuntime.Core.Core2
43 Core3 = _cQbRuntime.Core.Core3
44 All = _cQbRuntime.Core.All
45 GlobalCore = _cQbRuntime.Core.GlobalCore
46 Error = _cQbRuntime.Core.Error
47
48
50 """@brief Core allocation policy"""
51
52 Auto = _cQbRuntime.CoreAllocationPolicy.Auto
53 Manual = _cQbRuntime.CoreAllocationPolicy.Manual
54
55
56class Scale:
57 """@brief Struct for scale values."""
58
59 def __init__(self, scale: float, is_uniform: bool, scale_list: List[float]):
60 self._scale = _cQbRuntime.Scale()
61 self._scale.scale = scale
62 self._scale.is_uniform = is_uniform
63 self._scale.scale_list = scale_list
64
65 @classmethod
66 def from_cpp(cls, _scale: _cQbRuntime.Scale):
67 return cls(_scale.scale, _scale.is_uniform, _scale.scale_list)
68
69 @property
70 def scale_list(self) -> List[float]:
71 return self._scale.scale_list
72
73 @property
74 def scale(self) -> float:
75 return self._scale.scale
76
77 @property
78 def is_uniform(self) -> bool:
79 return self._scale.is_uniform
80
81 @scale_list.setter
82 def scale_list(self, value: List[float]):
83 self._scale.scale_list = value
84
85 @scale.setter
86 def scale(self, value: float):
87 self._scale.scale = value
88
89 @is_uniform.setter
90 def is_uniform(self, value: bool):
91 self._scale.is_uniform = value
92
93 def __getitem__(self, i: int) -> float:
94 """
95 @brief Returns the scale value at the specified index.
96
97 @param[in] i Index.
98 @return Scale value.
99 """
100 return self._scale[i]
101
102 def __repr__(self):
103 d = {
104 "scale": self.scale,
105 "is_uniform": self.is_uniform,
106 "scale_list": self.scale_list,
107 }
108 return "{}({})".format(
109 self.__class__.__name__,
110 ", ".join("{}={}".format(k, v) for k, v in d.items()),
111 )
112
113
114class CoreId:
115 """
116 @brief Represents a unique identifier for an NPU core.
117
118 A CoreId consists of a Cluster and a Core, identifying a specific core
119 within an NPU.
120 """
121
122 def __init__(self, cluster: Cluster, core: Core):
123 self._core_id = _cQbRuntime.CoreId()
124 self._core_id.cluster = cluster.value
125 self._core_id.core = core.value
126
127 @classmethod
128 def from_cpp(cls, _core_id: _cQbRuntime.CoreId):
129 return cls(Cluster(_core_id.cluster), Core(_core_id.core))
130
131 @property
132 def cluster(self) -> Cluster:
133 return Cluster(self._core_id.cluster)
134
135 @property
136 def core(self) -> Core:
137 return Core(self._core_id.core)
138
139 @cluster.setter
140 def cluster(self, value: Cluster):
141 self._core_id.cluster = value.value
142
143 @core.setter
144 def core(self, value: Core):
145 self._core_id.core = value.value
146
147 def __eq__(self, other) -> bool:
148 """
149 @brief Checks if two CoreId objects are equal.
150
151 @return True if both CoreId objects are identical, False otherwise.
152 """
153 return self._core_id == other._core_id
154
155 def __lt__(self, other) -> bool:
156 """
157 @brief Compares two CoreId objects for ordering.
158
159 @return True if this CoreId is less than the given CoreId, False otherwise.
160 """
161 return self._core_id < other._core_id
162
163 def __repr__(self):
164 d = {"cluster": self.cluster, "core": self.core}
165 return "{}({})".format(
166 self.__class__.__name__,
167 ", ".join("{}={}".format(k, v) for k, v in d.items()),
168 )
169
170
171class Buffer:
172 """
173 @brief A simple byte-sized buffer.
174
175 This struct represents a contiguous block of memory for storing byte-sized data.
176 """
177
178 def __init__(self, _buffer: Optional[_cQbRuntime.Buffer] = None):
179 self._buffer = _cQbRuntime.Buffer() if _buffer is None else _buffer
180
181 @property
182 def size(self) -> int:
183 return self._buffer.size
184
185 @size.setter
186 def size(self, value: int):
187 self._buffer.size = value
188
189 def set_buffer(self, arr: np.ndarray):
190 self._buffer.set_buffer(np.ascontiguousarray(arr))
191
192 def __repr__(self):
193 return f"{self.__class__.__name__}(size={self._buffer.size})"
194
195
196class CoreMode(Enum):
197 """
198 @brief Defines the core mode for NPU execution.
199
200 Supported core modes include single-core, multi-core, global4-core, and global8-core.
201 For detailed explanations of each mode, refer to the following functions:
202
203 - `ModelConfig.set_auto_core_mode()`
204 - `ModelConfig.set_single_core_mode()`
205 - `ModelConfig.set_multi_core_mode()`
206 - `ModelConfig.set_global4_core_mode()`
207 - `ModelConfig.set_global8_core_mode()`
208 """
209
210 Single = _cQbRuntime.CoreMode.Single
211 Multi = _cQbRuntime.CoreMode.Multi
212 Global = _cQbRuntime.CoreMode.Global
213 Global4 = _cQbRuntime.CoreMode.Global4
214 Global8 = _cQbRuntime.CoreMode.Global8
215 Auto = _cQbRuntime.CoreMode.Auto
216 Error = _cQbRuntime.CoreMode.Error
217
218
220 """@brief Struct representing input/output buffer information."""
221
222 def __init__(
223 self,
224 original_height: int = 0,
225 original_width: int = 0,
226 original_channel: int = 0,
227 reshaped_height: int = 0,
228 reshaped_width: int = 0,
229 reshaped_channel: int = 0,
230 height: int = 0,
231 width: int = 0,
232 channel: int = 0,
233 max_height: int = 0,
234 max_width: int = 0,
235 max_channel: int = 0,
236 max_cache_size: int = 0,
237 ):
238 self._buffer_info = _cQbRuntime.BufferInfo()
239 self._buffer_info.original_height = original_height
240 self._buffer_info.original_width = original_width
241 self._buffer_info.original_channel = original_channel
242 self._buffer_info.reshaped_height = reshaped_height
243 self._buffer_info.reshaped_width = reshaped_width
244 self._buffer_info.reshaped_channel = reshaped_channel
245 self._buffer_info.height = height
246 self._buffer_info.width = width
247 self._buffer_info.channel = channel
248 self._buffer_info.max_height = max_height
249 self._buffer_info.max_width = max_width
250 self._buffer_info.max_channel = max_channel
251 self._buffer_info.max_cache_size = max_cache_size
252
253 @classmethod
254 def from_cpp(cls, _buffer_info: _cQbRuntime.BufferInfo):
255 return cls(
256 _buffer_info.original_height,
257 _buffer_info.original_width,
258 _buffer_info.original_channel,
259 _buffer_info.reshaped_height,
260 _buffer_info.reshaped_width,
261 _buffer_info.reshaped_channel,
262 _buffer_info.height,
263 _buffer_info.width,
264 _buffer_info.channel,
265 _buffer_info.max_height,
266 _buffer_info.max_width,
267 _buffer_info.max_channel,
268 _buffer_info.max_cache_size,
269 )
270
271 @property
272 def original_height(self) -> int:
273 """Height of original input/output"""
274 return self._buffer_info.original_height
275
276 @property
277 def original_width(self) -> int:
278 """Width of original input/output"""
279 return self._buffer_info.original_width
280
281 @property
282 def original_channel(self) -> int:
283 """Channel of original input/output"""
284 return self._buffer_info.original_channel
285
286 @property
287 def reshaped_height(self) -> int:
288 """Height of reshaped input/output"""
289 return self._buffer_info.reshaped_height
290
291 @property
292 def reshaped_width(self) -> int:
293 """Width of reshaped input/output"""
294 return self._buffer_info.reshaped_width
295
296 @property
297 def reshaped_channel(self) -> int:
298 """Channel of reshaped input/output"""
299 return self._buffer_info.reshaped_channel
300
301 @property
302 def height(self) -> int:
303 """Height of NPU input/output"""
304 return self._buffer_info.height
305
306 @property
307 def width(self) -> int:
308 """Width of NPU input/output"""
309 return self._buffer_info.width
310
311 @property
312 def channel(self) -> int:
313 """Channel of NPU input/output"""
314 return self._buffer_info.channel
315
316 @property
317 def max_height(self) -> int:
318 """Maximum height of original input/output if data is sequential."""
319 return self._buffer_info.max_height
320
321 @property
322 def max_width(self) -> int:
323 """Maximum width of original input/output if data is sequential."""
324 return self._buffer_info.max_width
325
326 @property
327 def max_channel(self) -> int:
328 """Maximum channel of original input/output if data is sequential."""
329 return self._buffer_info.max_channel
330
331 @property
332 def max_cache_size(self) -> int:
333 """Maximum KV-cache size, relevant for LLM models using KV cache."""
334 return self._buffer_info.max_cache_size
335
336 @original_height.setter
337 def original_height(self, value: int):
338 self._buffer_info.original_height = value
339
340 @original_width.setter
341 def original_width(self, value: int):
342 self._buffer_info.original_width = value
343
344 @original_channel.setter
345 def original_channel(self, value: int):
346 self._buffer_info.original_channel = value
347
348 @reshaped_height.setter
349 def reshaped_height(self, value: int):
350 self._buffer_info.reshaped_height = value
351
352 @reshaped_width.setter
353 def reshaped_width(self, value: int):
354 self._buffer_info.reshaped_width = value
355
356 @reshaped_channel.setter
357 def reshaped_channel(self, value: int):
358 self._buffer_info.reshaped_channel = value
359
360 @height.setter
361 def height(self, value: int):
362 self._buffer_info.height = value
363
364 @width.setter
365 def width(self, value: int):
366 self._buffer_info.width = value
367
368 @channel.setter
369 def channel(self, value: int):
370 self._buffer_info.channel = value
371
372 @max_height.setter
373 def max_height(self, value: int):
374 self._buffer_info.max_height = value
375
376 @max_width.setter
377 def max_width(self, value: int):
378 self._buffer_info.max_width = value
379
380 @max_channel.setter
381 def max_channel(self, value: int):
382 self._buffer_info.max_channel = value
383
384 @max_cache_size.setter
385 def max_cache_size(self, value: int):
386 self._buffer_info.max_cache_size = value
387
388 def original_size(self) -> int:
389 """
390 @brief Returns the total size of the original input/output.
391
392 @return The data size.
393 """
394 return self._buffer_info.original_size()
395
396 def reshaped_size(self) -> int:
397 """
398 @brief Returns the total size of the reshaped input/output.
399
400 @return The data size.
401 """
402 return self._buffer_info.reshaped_size()
403
404 def size(self) -> int:
405 """
406 @brief Returns the total size of the NPU input/output.
407
408 @return The data size.
409 """
410 return self._buffer_info.size()
411
412 def original_shape(self) -> Tuple[int, int, int]:
413 return self._buffer_info.original_shape()
414
415 def original_shape_chw(self) -> Tuple[int, int, int]:
416 return self._buffer_info.original_shape_chw()
417
418 def reshaped_shape(self) -> Tuple[int, int, int]:
419 return self._buffer_info.reshaped_shape()
420
421 def reshaped_shape_chw(self) -> Tuple[int, int, int]:
422 return self._buffer_info.reshaped_shape_chw()
423
424 def shape(self) -> Tuple[int, int, int]:
425 return self._buffer_info.shape()
426
427 def shape_chw(self) -> Tuple[int, int, int]:
428 return self._buffer_info.shape_chw()
429
430 def __repr__(self):
431 d = {
432 "original_height": self._buffer_info.original_height,
433 "original_width": self._buffer_info.original_width,
434 "original_channel": self._buffer_info.original_channel,
435 "reshaped_height": self._buffer_info.reshaped_height,
436 "reshaped_width": self._buffer_info.reshaped_width,
437 "reshaped_channel": self._buffer_info.reshaped_channel,
438 "height": self._buffer_info.height,
439 "width": self._buffer_info.width,
440 "channel": self._buffer_info.channel,
441 "max_height": self._buffer_info.max_height,
442 "max_width": self._buffer_info.max_width,
443 "max_channel": self._buffer_info.max_channel,
444 "max_cache_size": self._buffer_info.max_cache_size,
445 }
446 return "{}({})".format(
447 self.__class__.__name__,
448 ", ".join("{}={}".format(k, v) for k, v in d.items()),
449 )
450
451
453 """
454 @brief Configures a core mode and core allocation of a model for NPU inference.
455 The `ModelConfig` class provides methods for setting a core mode and allocating
456 cores for NPU inference. Supported core modes are single-core, multi-core,
457 global4-core, and global8-core. Users can also specify which cores to allocate for
458 the model. Additionally, the configuration offers an option to enforce the use of a
459 specific NPU bundle.
460
461 @note Deprecated functions are included for backward compatibility, but it is
462 recommended to use the newer core mode configuration methods.
463 """
464
465 def __init__(self, num_cores: Optional[int] = None):
466 """
467 @brief Default constructor. This default-constructed object is initially set to
468 auto-core mode.
469 """
470 self._model_config = (
471 _cQbRuntime.ModelConfig()
472 if num_cores is None
473 else _cQbRuntime.ModelConfig(num_cores)
474 )
475
476 def set_auto_core_mode(self) -> bool:
477 """
478 @brief Sets the model to detect CoreMode automatically.
479
480 In auto-core mode, the model automatically detects a supported CoreMode
481 while using all available NPU cores.
482
483 @note If the model has more than one CoreMode, `CoreMode.Auto` is not supported.
484
485 @note activation buffer slots will be reset after `set_auto_core_mode` is called.
486
487 @return True if the mode was successfully set, False otherwise.
488 """
490
492 self, num_cores: Optional[int] = None, core_ids: Optional[List[CoreId]] = None
493 ) -> bool:
494 """
495 @brief Sets the model to use single-core mode for inference with a specified number
496 of local cores.
497
498 In single-core mode, each local core executes model inference independently.
499 The number of cores used is specified by the `num_cores` parameter, and the core
500 allocation policy is set to `CoreAllocationPolicy.Auto`, meaning the model will be
501 automatically allocated to available local cores when the model is launched to the
502 NPU, specifically when the `Model.launch()` function is called. Or The user can
503 specify a list of CoreIds to determine which cores to use for inference.
504
505 @note Use exactly one of `num_cores` or `core_ids`, not both.
506
507 @param[in] num_cores The number of local cores to use for inference.
508 @param[in] core_ids A list of CoreIds to be used for model inference.
509
510 @return True if the mode was successfully set, False otherwise.
511 """
512 if num_cores is not None and core_ids is None:
513 return self._model_config.set_single_core_mode(num_cores)
514 elif core_ids is not None and num_cores is None:
516 [core_id._core_id for core_id in core_ids]
517 )
518 raise ValueError(
519 "`set_single_core_mode` needs either `num_cores` and `core_ids`."
520 )
521
522 def set_global_core_mode(self, clusters: List[Cluster]) -> bool:
523 """@deprecated"""
524 return self._model_config.set_global_core_mode([c.value for c in clusters])
525
527 self, clusters: List[Cluster] = [Cluster.Cluster0, Cluster.Cluster1]
528 ) -> bool:
529 """
530 @brief Sets the model to use global4-core mode for inference with a specified set
531 of NPU clusters.
532
533 For Aries NPU, there are two clusters, each consisting of four local cores. In
534 global4-core mode, four local cores within the same cluster work together to
535 execute the model inference.
536
537 @param[in] clusters A list of clusters to be used for model inference.
538
539 @return True if the mode was successfully set, False otherwise.
540 """
541 return self._model_config.set_global4_core_mode([c.value for c in clusters])
542
543 def set_global8_core_mode(self) -> bool:
544 """
545 @brief Sets the model to use global8-core mode for inference.
546
547 For Aries NPU, there are two clusters, each consisting of four local cores. In
548 global8-core mode, all eight local cores across the two clusters work together to
549 execute the model inference.
550
551 @return True if the mode was successfully set, False otherwise.
552 """
554
555 def get_core_mode(self) -> CoreMode:
556 """
557 @brief Gets the core mode to be applied to the model.
558
559 This reflects the core mode that will be used when the model is created.
560
561 @return The `CoreMode` to be applied to the model.
562 """
564
566 self, clusters: List[Cluster] = [Cluster.Cluster0, Cluster.Cluster1]
567 ) -> bool:
568 """
569 @brief Sets the model to use multi-core mode for batch inference.
570
571 In multi-core mode, on Aries NPU, the four local cores within a cluster work
572 together to process batch inference tasks efficiently. This mode is optimized for
573 batch processing.
574
575 @param[in] clusters A list of clusters to be used for multi-core batch inference.
576
577 @return True if the mode was successfully set, False otherwise.
578 """
579 return self._model_config.set_multi_core_mode([c.value for c in clusters])
580
581 def get_core_allocation_policy(self) -> CoreAllocationPolicy:
582 """
583 @brief Gets the core allocation policy to be applied to the model.
584
585 This reflects the core allocation policy that will be used when the model is
586 created.
587
588 @return The `CoreAllocationPolicy` to be applied to the model.
589 """
591
592 def get_num_cores(self) -> int:
593 """
594 @brief Gets the number of cores to be allocated for the model.
595
596 This represents the number of cores that will be allocated for inference
597 when the model is launched to the NPU.
598
599 @return The number of cores to be allocated for the model.
600 """
601 return self._model_config.get_num_cores()
602
603 def force_single_npu_bundle(self, npu_bundle_index: int) -> bool:
604 """
605 @brief Forces the use of a specific NPU bundle.
606
607 This function forces the selection of a specific NPU bundle. If a non-negative
608 index is provided, the corresponding NPU bundle is selected and runs without CPU
609 offloading. If -1 is provided, all NPU bundles are used with CPU offloading
610 enabled.
611
612 @param[in] npu_bundle_index The index of the NPU bundle to force. A non-negative
613 integer selects a specific NPU bundle (runs without CPU
614 offloading), or -1 to enable all NPU bundles with CPU
615 offloading.
616
617 @return True if the index is valid and the NPU bundle is successfully set,
618 False if the index is invalid (less than -1).
619 """
620 return self._model_config.force_single_npu_bundle(npu_bundle_index)
621
623 """
624 @brief Retrieves the index of the forced NPU bundle.
625
626 This function returns the index of the NPU bundle that has been forced using the
627 `force_single_npu_bundle` function. If no NPU bundle is forced, the returned value
628 will be -1.
629
630 @return The index of the forced NPU bundle, or -1 if no bundle is forced.
631 """
633
634 def set_async_pipeline_enabled(self, enable: bool) -> None:
635 """
636 @brief Enables or disables the asynchronous pipeline required for asynchronous
637 inference.
638
639 Call this function with `enable` set to `True` if you intend to use
640 `Model.infer_async()`, as the asynchronous pipeline is necessary for their operation.
641
642 If you are only using synchronous inference, such as `Model.infer()` or
643 `Model.infer_to_float()`, it is recommended to keep the asynchronous pipeline disabled
644 to avoid unnecessary overhead.
645
646 @param[in] enable Set to `True` to enable the asynchronous pipeline; set to `False`
647 to disable it.
648 """
649 return self._model_config.set_async_pipeline_enabled(enable)
650
651 def get_async_pipeline_enabled(self) -> bool:
652 """
653 @brief Returns whether the asynchronous pipeline is enabled in this configuration.
654
655 @return `True` if the asynchronous pipeline is enabled; `False` otherwise.
656 """
658
659 def set_activation_slots(self, num: int) -> None:
660 """
661 @brief Sets activation buffer slots for multi-activation supported model.
662
663 all this function if you want to set the number of activation buffer slots manually.
664
665 If you do not call this function, the default number of activation buffer slots
666 is set differently depending on the CoreMode.
667
668 - `CoreMode.Single` : 2 * (the number of target core ids)
669 - `CoreMode.Multi` : 2 * (the number of target clusters)
670 - `CoreMode.Global4` : 2 * (the number of target clusters)
671 - `CoreMode.Global8` : 2
672
673 @note This function has no effect on MXQ file in version earlier than MXQv7.
674
675 @note Currently, LLM model's activation slot is fixed to 1 and ignoring `count`.
676
677 @param[in] count Multi activation counts. Must be >= 1.
678 """
679 return self._model_config.set_activation_slots(num)
680
681 def get_activation_slots(self) -> int:
682 """
683 @brief Returns activation buffer slot count.
684
685 @note This function has no meaning on MXQ file in version earlier than MXQv7.
686
687 @return Activation buffer slot count.
688 """
690
691 @property
692 def early_latencies(self) -> List[int]:
693 return self._model_config.early_latencies
694
695 @property
696 def finish_latencies(self) -> List[int]:
697 return self._model_config.finish_latencies
698
699 @early_latencies.setter
700 def early_latencies(self, latencies: List[int]):
701 """@deprecated This setting has no effect."""
702 self._model_config.early_latencies = latencies
703
704 @finish_latencies.setter
705 def finish_latencies(self, latencies: List[int]):
706 """@deprecated This setting has no effect."""
707 self._model_config.finish_latencies = latencies
708
709 def get_core_ids(self) -> List[CoreId]:
710 """
711 @brief Returns the list of NPU CoreIds to be used for model inference.
712
713 This function returns a list of NPU CoreIds that the model will use for
714 inference. When `set_single_core_mode(num_cores)` is called and the
715 core allocation policy is set to CoreAllocationPolicy.Auto, it will return an
716 empty list.
717
718 @return A list of NPU CoreIds.
719 """
720 return [
721 CoreId(Cluster(core_id.cluster), Core(core_id.core))
722 for core_id in self._model_config.core_ids
723 ]
724
725 def __repr__(self):
726 d = {
727 "core_mode": self.get_core_mode(),
728 "core_allocation_policy": self.get_core_allocation_policy(),
729 "core_ids": self.get_core_ids(),
730 "num_cores": self.get_num_cores(),
731 "forced_npu_bundle_index": self.get_forced_npu_bundle_index(),
732 }
733 return "{}({})".format(
734 self.__class__.__name__,
735 ", ".join("{}={}".format(k, v) for k, v in d.items()),
736 )
737
738
739class LogLevel(Enum):
740 """@brief LogLevel"""
741
742 DEBUG = _cQbRuntime.LogLevel.DEBUG
743 INFO = _cQbRuntime.LogLevel.INFO
744 WARN = _cQbRuntime.LogLevel.WARN
745 ERR = _cQbRuntime.LogLevel.ERR
746 FATAL = _cQbRuntime.LogLevel.FATAL
747 OFF = _cQbRuntime.LogLevel.OFF
748
749
750def set_log_level(level: LogLevel):
751 _cQbRuntime.set_log_level(level.value)
752
753
754class CacheType(Enum):
755 """@brief CacheType"""
756
757 Default = _cQbRuntime.CacheType.Default
758 Batch = _cQbRuntime.CacheType.Batch
759 Error = _cQbRuntime.CacheType.Error
760
761
763 """@brief Struct representing KV-cache information."""
764
765 def __init__(
766 self,
767 cache_type: CacheType = CacheType.Error,
768 name: str = "",
769 layer_hash: str = "",
770 size: int = 0,
771 num_batches: int = 0,
772 ):
773 self._cache_info = _cQbRuntime.CacheInfo()
774 self._cache_info.cache_type = cache_type.value
775 self._cache_info.name = name
776 self._cache_info.layer_hash = layer_hash
777 self._cache_info.size = size
778 self._cache_info.num_batches = num_batches
779
780 @classmethod
781 def from_cpp(cls, _cache_info: _cQbRuntime.CacheInfo):
782 return cls(
783 CacheType(_cache_info.cache_type),
784 _cache_info.name,
785 _cache_info.layer_hash,
786 _cache_info.size,
787 _cache_info.num_batches,
788 )
789
790 @property
791 def cache_type(self) -> CacheType:
792 return CacheType(self._cache_info.cache_type)
793
794 @property
795 def name(self) -> str:
796 return self._cache_info.name
797
798 @property
799 def layer_hash(self) -> str:
800 return self._cache_info.layer_hash
801
802 @property
803 def size(self) -> int:
804 return self._cache_info.size
805
806 @property
807 def num_batches(self) -> int:
808 return self._cache_info.num_batches
809
810 @cache_type.setter
811 def cache_type(self, value: CacheType):
812 self._cache_info.cache_type = value.value
813
814 @name.setter
815 def name(self, value: str):
816 self._cache_info.name = value
817
818 @layer_hash.setter
819 def layer_hash(self, value: str):
820 self._cache_info.layer_hash = value
821
822 @size.setter
823 def size(self, value: int):
824 self._cache_info.size = value
825
826 @num_batches.setter
827 def num_batches(self, value: int):
828 self._cache_info.num_batches = value
829
830
831class DataType(Enum):
832 """@brief DataType"""
833
834 Float32 = _cQbRuntime.DataType.Float32
835 Float16 = _cQbRuntime.DataType.Float16
836 Int8 = _cQbRuntime.DataType.Int8
837 Uint8 = _cQbRuntime.DataType.Uint8
838 Error = _cQbRuntime.DataType.Error
839
840
841def start_tracing_events(path: str) -> bool:
842 """
843 @brief Starts event tracing and prepares to save the trace log to a specified file.
844
845 The trace log is recorded in "Chrome Tracing JSON format," which can be
846 viewed at https://ui.perfetto.dev/.
847
848 The trace log is not written immediately; it is saved only when
849 stop_tracing_events() is called.
850
851 @param[in] path The file path where the trace log should be stored.
852 @return True if tracing starts successfully, False otherwise.
853 """
854 return _cQbRuntime.start_tracing_events(path)
855
856
858 """
859 @brief Stops event tracing and writes the recorded trace log.
860
861 This function finalizes tracing and saves the collected trace data
862 to the file specified when start_tracing_events() was called.
863 """
864 _cQbRuntime.stop_tracing_events()
865
866
867def get_model_summary(mxq_path: str) -> str:
868 """
869 @brief Generates a structured summary of the specified MXQ model.
870
871 Returns an overview of the model contained in the MXQ file, including:
872 - Target NPU hardware
873 - Supported core modes and their associated cores
874 - The total number of model variants
875 - For each variant:
876 - Input and output tensor shapes
877 - A list of layers with their types, output shapes, and input layer indices
878
879 The summary is returned as a human-readable string in a table and is useful for
880 inspecting model compatibility, structure, and input/output shapes.
881
882 @param[in] mxq_path Path to the MXQ model file.
883 @return A formatted string containing the model summary.
884 """
885 return _cQbRuntime.get_model_summary(mxq_path)
886
887
888def get_available_device_numbers() -> List[int]:
889 """
890 @brief Get the number of available NPU devices.
891
892 @return The number of available NPU devices.
893 """
894 return _cQbRuntime.get_available_device_numbers()
895
896
897
Struct representing input/output buffer information.
Definition type.py:219
int reshaped_width(self)
Width of reshaped input/output.
Definition type.py:292
int max_channel(self)
Maximum channel of original input/output if data is sequential.
Definition type.py:327
int original_height(self)
Height of original input/output.
Definition type.py:272
int width(self)
Width of NPU input/output.
Definition type.py:307
int max_cache_size(self)
Maximum KV-cache size, relevant for LLM models using KV cache.
Definition type.py:332
int max_height(self)
Maximum height of original input/output if data is sequential.
Definition type.py:317
int original_size(self)
Returns the total size of the original input/output.
Definition type.py:388
int height(self)
Height of NPU input/output.
Definition type.py:302
int original_width(self)
Width of original input/output.
Definition type.py:277
int channel(self)
Channel of NPU input/output.
Definition type.py:312
int size(self)
Returns the total size of the NPU input/output.
Definition type.py:404
int max_width(self)
Maximum width of original input/output if data is sequential.
Definition type.py:322
int reshaped_channel(self)
Channel of reshaped input/output.
Definition type.py:297
int original_channel(self)
Channel of original input/output.
Definition type.py:282
int reshaped_size(self)
Returns the total size of the reshaped input/output.
Definition type.py:396
int reshaped_height(self)
Height of reshaped input/output.
Definition type.py:287
A simple byte-sized buffer.
Definition type.py:171
Struct representing KV-cache information.
Definition type.py:762
Enumerates clusters in the ARIES NPU.
Definition type.py:17
Core allocation policy.
Definition type.py:49
Represents a unique identifier for an NPU core.
Definition type.py:114
bool __eq__(self, other)
Checks if two CoreId objects are equal.
Definition type.py:147
Cluster cluster(self)
Definition type.py:132
Core core(self)
Definition type.py:136
bool __lt__(self, other)
Compares two CoreId objects for ordering.
Definition type.py:155
Defines the core mode for NPU execution.
Definition type.py:196
Enumerates cores within a cluster in the ARIES NPU.
Definition type.py:31
Configures a core mode and core allocation of a model for NPU inference.
Definition type.py:452
List[CoreId] get_core_ids(self)
Returns the list of NPU CoreIds to be used for model inference.
Definition type.py:709
bool get_forced_npu_bundle_index(self)
Retrieves the index of the forced NPU bundle.
Definition type.py:622
bool set_global8_core_mode(self)
Sets the model to use global8-core mode for inference.
Definition type.py:543
bool set_multi_core_mode(self, List[Cluster] clusters=[Cluster.Cluster0, Cluster.Cluster1])
Sets the model to use multi-core mode for batch inference.
Definition type.py:567
bool set_global_core_mode(self, List[Cluster] clusters)
Definition type.py:522
bool force_single_npu_bundle(self, int npu_bundle_index)
Forces the use of a specific NPU bundle.
Definition type.py:603
__init__(self, Optional[int] num_cores=None)
Default constructor.
Definition type.py:465
CoreMode get_core_mode(self)
Gets the core mode to be applied to the model.
Definition type.py:555
None set_async_pipeline_enabled(self, bool enable)
Enables or disables the asynchronous pipeline required for asynchronous inference.
Definition type.py:634
bool get_async_pipeline_enabled(self)
Returns whether the asynchronous pipeline is enabled in this configuration.
Definition type.py:651
int get_activation_slots(self)
Returns activation buffer slot count.
Definition type.py:681
bool set_auto_core_mode(self)
Sets the model to detect CoreMode automatically.
Definition type.py:476
None set_activation_slots(self, int num)
Sets activation buffer slots for multi-activation supported model.
Definition type.py:659
int get_num_cores(self)
Gets the number of cores to be allocated for the model.
Definition type.py:592
bool set_global4_core_mode(self, List[Cluster] clusters=[Cluster.Cluster0, Cluster.Cluster1])
Sets the model to use global4-core mode for inference with a specified set of NPU clusters.
Definition type.py:528
CoreAllocationPolicy get_core_allocation_policy(self)
Gets the core allocation policy to be applied to the model.
Definition type.py:581
bool set_single_core_mode(self, Optional[int] num_cores=None, Optional[List[CoreId]] core_ids=None)
Sets the model to use single-core mode for inference with a specified number of local cores.
Definition type.py:493
Struct for scale values.
Definition type.py:56
bool is_uniform(self)
Definition type.py:78
float scale(self)
Definition type.py:74
float __getitem__(self, int i)
Returns the scale value at the specified index.
Definition type.py:93
List[float] scale_list(self)
Definition type.py:70
List[int] get_available_device_numbers()
Get the number of available NPU devices.
Definition type.py:888
str get_model_summary(str mxq_path)
Generates a structured summary of the specified MXQ model.
Definition type.py:867
bool start_tracing_events(str path)
Starts event tracing and prepares to save the trace log to a specified file.
Definition type.py:841
stop_tracing_events()
Stops event tracing and writes the recorded trace log.
Definition type.py:857