type.py Source File

type.py Source File#

Runtime Library: type.py Source File
Runtime Library v0.30
Mobilint SDK qb
type.py
Go to the documentation of this file.
1
4
5from typing import List, Optional, Tuple
6from enum import Enum
7
8import numpy as np
9
10import maccel.maccel as _cMaccel
11
12
15
16
17class Cluster(Enum):
18 """
19 @brief Enumerates clusters in the ARIES NPU.
20
21 @note The ARIES NPU consists of two clusters, each containing one global core and
22 four local cores, totaling eight local cores. REGULUS has only a single cluster
23 (Cluster0) with one local core (Core0).
24 """
25
26 Cluster0 = _cMaccel.Cluster.Cluster0
27 Cluster1 = _cMaccel.Cluster.Cluster1
28 Error = _cMaccel.Cluster.Error
29
30
31class Core(Enum):
32 """
33 @brief Enumerates cores within a cluster in the ARIES NPU.
34
35 @note The ARIES NPU consists of two clusters, each containing one global core and
36 four local cores, totaling eight local cores. REGULUS has only a single cluster
37 (Cluster0) with one local core (Core0).
38 """
39
40 Core0 = _cMaccel.Core.Core0
41 Core1 = _cMaccel.Core.Core1
42 Core2 = _cMaccel.Core.Core2
43 Core3 = _cMaccel.Core.Core3
44 All = _cMaccel.Core.All
45 GlobalCore = _cMaccel.Core.GlobalCore
46 Error = _cMaccel.Core.Error
47
48
50 """@brief Core allocation policy"""
51
52 Auto = _cMaccel.CoreAllocationPolicy.Auto
53 Manual = _cMaccel.CoreAllocationPolicy.Manual
54
55
56class LatencySetPolicy(Enum):
57 """@deprecated This enum is deprecated."""
58
59 Auto = _cMaccel.LatencySetPolicy.Auto
60 Manual = _cMaccel.LatencySetPolicy.Manual
61
62
64 """@deprecated This enum is deprecated."""
65
66 Maintain = _cMaccel.MaintenancePolicy.Maintain
67 DropExpired = _cMaccel.MaintenancePolicy.DropExpired
68 Undefined = _cMaccel.MaintenancePolicy.Undefined
69
70
71class SchedulePolicy(Enum):
72 """@deprecated This enum is deprecated."""
73
74 FIFO = _cMaccel.SchedulePolicy.FIFO
75 LIFO = _cMaccel.SchedulePolicy.LIFO
76 ByPriority = _cMaccel.SchedulePolicy.ByPriority
77 Undefined = _cMaccel.SchedulePolicy.Undefined
78
79
80class Scale:
81 """@brief Struct for scale values."""
82
83 def __init__(self, scale: float, is_uniform: bool, scale_list: List[float]):
84 self._scale = _cMaccel.Scale()
85 self._scale.scale = scale
86 self._scale.is_uniform = is_uniform
87 self._scale.scale_list = scale_list
88
89 @classmethod
90 def from_cpp(cls, _scale: _cMaccel.Scale):
91 return cls(_scale.scale, _scale.is_uniform, _scale.scale_list)
92
93 @property
94 def scale_list(self) -> List[float]:
95 return self._scale.scale_list
96
97 @property
98 def scale(self) -> float:
99 return self._scale.scale
100
101 @property
102 def is_uniform(self) -> bool:
103 return self._scale.is_uniform
104
105 @scale_list.setter
106 def scale_list(self, value: List[float]):
107 self._scale.scale_list = value
108
109 @scale.setter
110 def scale(self, value: float):
111 self._scale.scale = value
112
113 @is_uniform.setter
114 def is_uniform(self, value: bool):
115 self._scale.is_uniform = value
116
117 def __getitem__(self, i: int) -> float:
118 """
119 @brief Returns the scale value at the specified index.
120
121 @param[in] i Index.
122 @return Scale value.
123 """
124 return self._scale[i]
125
126 def __repr__(self):
127 d = {
128 "scale": self.scale,
129 "is_uniform": self.is_uniform,
130 "scale_list": self.scale_list,
131 }
132 return "{}({})".format(
133 self.__class__.__name__,
134 ", ".join("{}={}".format(k, v) for k, v in d.items()),
135 )
136
137
138class CoreId:
139 """
140 @brief Represents a unique identifier for an NPU core.
141
142 A CoreId consists of a Cluster and a Core, identifying a specific core
143 within an NPU.
144 """
145
146 def __init__(self, cluster: Cluster, core: Core):
147 self._core_id = _cMaccel.CoreId()
148 self._core_id.cluster = cluster.value
149 self._core_id.core = core.value
150
151 @classmethod
152 def from_cpp(cls, _core_id: _cMaccel.CoreId):
153 return cls(Cluster(_core_id.cluster), Core(_core_id.core))
154
155 @property
156 def cluster(self) -> Cluster:
157 return Cluster(self._core_id.cluster)
158
159 @property
160 def core(self) -> Core:
161 return Core(self._core_id.core)
162
163 @cluster.setter
164 def cluster(self, value: Cluster):
165 self._core_id.cluster = value.value
166
167 @core.setter
168 def core(self, value: Core):
169 self._core_id.core = value.value
170
171 def __eq__(self, other) -> bool:
172 """
173 @brief Checks if two CoreId objects are equal.
174
175 @return True if both CoreId objects are identical, False otherwise.
176 """
177 return self._core_id == other._core_id
178
179 def __lt__(self, other) -> bool:
180 """
181 @brief Compares two CoreId objects for ordering.
182
183 @return True if this CoreId is less than the given CoreId, False otherwise.
184 """
185 return self._core_id < other._core_id
186
187 def __repr__(self):
188 d = {"cluster": self.cluster, "core": self.core}
189 return "{}({})".format(
190 self.__class__.__name__,
191 ", ".join("{}={}".format(k, v) for k, v in d.items()),
192 )
193
194
195class Buffer:
196 """
197 @brief A simple byte-sized buffer.
198
199 This struct represents a contiguous block of memory for storing byte-sized data.
200 """
201
202 def __init__(self, _buffer: Optional[_cMaccel.Buffer] = None):
203 self._buffer = _cMaccel.Buffer() if _buffer is None else _buffer
204
205 @property
206 def size(self) -> int:
207 return self._buffer.size
208
209 @size.setter
210 def size(self, value: int):
211 self._buffer.size = value
212
213 def set_buffer(self, arr: np.ndarray):
214 self._buffer.set_buffer(np.ascontiguousarray(arr))
215
216 def __repr__(self):
217 return f"{self.__class__.__name__}(size={self._buffer.size})"
218
219
220class CoreMode(Enum):
221 """
222 @brief Defines the core mode for NPU execution.
223
224 Supported core modes include single-core, multi-core, global4-core, and global8-core.
225 For detailed explanations of each mode, refer to the following functions:
226
227 - `ModelConfig.set_single_core_mode()`
228 - `ModelConfig.set_multi_core_mode()`
229 - `ModelConfig.set_global4_core_mode()`
230 - `ModelConfig.set_global8_core_mode()`
231 """
232
233 Single = _cMaccel.CoreMode.Single
234 Multi = _cMaccel.CoreMode.Multi
235 Global = _cMaccel.CoreMode.Global
236 Global4 = _cMaccel.CoreMode.Global4
237 Global8 = _cMaccel.CoreMode.Global8
238 Error = _cMaccel.CoreMode.Error
239
240
242 """@brief Struct representing input/output buffer information."""
243
244 def __init__(
245 self,
246 original_height: int = 0,
247 original_width: int = 0,
248 original_channel: int = 0,
249 reshaped_height: int = 0,
250 reshaped_width: int = 0,
251 reshaped_channel: int = 0,
252 height: int = 0,
253 width: int = 0,
254 channel: int = 0,
255 max_height: int = 0,
256 max_width: int = 0,
257 max_channel: int = 0,
258 max_cache_size: int = 0,
259 ):
260 self._buffer_info = _cMaccel.BufferInfo()
261 self._buffer_info.original_height = original_height
262 self._buffer_info.original_width = original_width
263 self._buffer_info.original_channel = original_channel
264 self._buffer_info.reshaped_height = reshaped_height
265 self._buffer_info.reshaped_width = reshaped_width
266 self._buffer_info.reshaped_channel = reshaped_channel
267 self._buffer_info.height = height
268 self._buffer_info.width = width
269 self._buffer_info.channel = channel
270 self._buffer_info.max_height = max_height
271 self._buffer_info.max_width = max_width
272 self._buffer_info.max_channel = max_channel
273 self._buffer_info.max_cache_size = max_cache_size
274
275 @classmethod
276 def from_cpp(cls, _buffer_info: _cMaccel.BufferInfo):
277 return cls(
278 _buffer_info.original_height,
279 _buffer_info.original_width,
280 _buffer_info.original_channel,
281 _buffer_info.reshaped_height,
282 _buffer_info.reshaped_width,
283 _buffer_info.reshaped_channel,
284 _buffer_info.height,
285 _buffer_info.width,
286 _buffer_info.channel,
287 _buffer_info.max_height,
288 _buffer_info.max_width,
289 _buffer_info.max_channel,
290 _buffer_info.max_cache_size,
291 )
292
293 @property
294 def original_height(self) -> int:
295 """Height of original input/output"""
296 return self._buffer_info.original_height
297
298 @property
299 def original_width(self) -> int:
300 """Width of original input/output"""
301 return self._buffer_info.original_width
302
303 @property
304 def original_channel(self) -> int:
305 """Channel of original input/output"""
306 return self._buffer_info.original_channel
307
308 @property
309 def reshaped_height(self) -> int:
310 """Height of reshaped input/output"""
311 return self._buffer_info.reshaped_height
312
313 @property
314 def reshaped_width(self) -> int:
315 """Width of reshaped input/output"""
316 return self._buffer_info.reshaped_width
317
318 @property
319 def reshaped_channel(self) -> int:
320 """Channel of reshaped input/output"""
321 return self._buffer_info.reshaped_channel
322
323 @property
324 def height(self) -> int:
325 """Height of NPU input/output"""
326 return self._buffer_info.height
327
328 @property
329 def width(self) -> int:
330 """Width of NPU input/output"""
331 return self._buffer_info.width
332
333 @property
334 def channel(self) -> int:
335 """Channel of NPU input/output"""
336 return self._buffer_info.channel
337
338 @property
339 def max_height(self) -> int:
340 """Maximum height of original input/output if data is sequential."""
341 return self._buffer_info.max_height
342
343 @property
344 def max_width(self) -> int:
345 """Maximum width of original input/output if data is sequential."""
346 return self._buffer_info.max_width
347
348 @property
349 def max_channel(self) -> int:
350 """Maximum channel of original input/output if data is sequential."""
351 return self._buffer_info.max_channel
352
353 @property
354 def max_cache_size(self) -> int:
355 """Maximum KV-cache size, relevant for LLM models using KV cache."""
356 return self._buffer_info.max_cache_size
357
358 @original_height.setter
359 def original_height(self, value: int):
360 self._buffer_info.original_height = value
361
362 @original_width.setter
363 def original_width(self, value: int):
364 self._buffer_info.original_width = value
365
366 @original_channel.setter
367 def original_channel(self, value: int):
368 self._buffer_info.original_channel = value
369
370 @reshaped_height.setter
371 def reshaped_height(self, value: int):
372 self._buffer_info.reshaped_height = value
373
374 @reshaped_width.setter
375 def reshaped_width(self, value: int):
376 self._buffer_info.reshaped_width = value
377
378 @reshaped_channel.setter
379 def reshaped_channel(self, value: int):
380 self._buffer_info.reshaped_channel = value
381
382 @height.setter
383 def height(self, value: int):
384 self._buffer_info.height = value
385
386 @width.setter
387 def width(self, value: int):
388 self._buffer_info.width = value
389
390 @channel.setter
391 def channel(self, value: int):
392 self._buffer_info.channel = value
393
394 @max_height.setter
395 def max_height(self, value: int):
396 self._buffer_info.max_height = value
397
398 @max_width.setter
399 def max_width(self, value: int):
400 self._buffer_info.max_width = value
401
402 @max_channel.setter
403 def max_channel(self, value: int):
404 self._buffer_info.max_channel = value
405
406 @max_cache_size.setter
407 def max_cache_size(self, value: int):
408 self._buffer_info.max_cache_size = value
409
410 def original_size(self) -> int:
411 """
412 @brief Returns the total size of the original input/output.
413
414 @return The data size.
415 """
416 return self._buffer_info.original_size()
417
418 def reshaped_size(self) -> int:
419 """
420 @brief Returns the total size of the reshaped input/output.
421
422 @return The data size.
423 """
424 return self._buffer_info.reshaped_size()
425
426 def size(self) -> int:
427 """
428 @brief Returns the total size of the NPU input/output.
429
430 @return The data size.
431 """
432 return self._buffer_info.size()
433
434 def original_shape(self) -> Tuple[int, int, int]:
435 return self._buffer_info.original_shape()
436
437 def original_shape_chw(self) -> Tuple[int, int, int]:
438 return self._buffer_info.original_shape_chw()
439
440 def reshaped_shape(self) -> Tuple[int, int, int]:
441 return self._buffer_info.reshaped_shape()
442
443 def reshaped_shape_chw(self) -> Tuple[int, int, int]:
444 return self._buffer_info.reshaped_shape_chw()
445
446 def shape(self) -> Tuple[int, int, int]:
447 return self._buffer_info.shape()
448
449 def shape_chw(self) -> Tuple[int, int, int]:
450 return self._buffer_info.shape_chw()
451
452 def __repr__(self):
453 d = {
454 "original_height": self._buffer_info.original_height,
455 "original_width": self._buffer_info.original_width,
456 "original_channel": self._buffer_info.original_channel,
457 "reshaped_height": self._buffer_info.reshaped_height,
458 "reshaped_width": self._buffer_info.reshaped_width,
459 "reshaped_channel": self._buffer_info.reshaped_channel,
460 "height": self._buffer_info.height,
461 "width": self._buffer_info.width,
462 "channel": self._buffer_info.channel,
463 "max_height": self._buffer_info.max_height,
464 "max_width": self._buffer_info.max_width,
465 "max_channel": self._buffer_info.max_channel,
466 "max_cache_size": self._buffer_info.max_cache_size,
467 }
468 return "{}({})".format(
469 self.__class__.__name__,
470 ", ".join("{}={}".format(k, v) for k, v in d.items()),
471 )
472
473
475 """
476 @brief Configures a core mode and core allocation of a model for NPU inference.
477 The `ModelConfig` class provides methods for setting a core mode and allocating
478 cores for NPU inference. Supported core modes are single-core, multi-core,
479 global4-core, and global8-core. Users can also specify which cores to allocate for
480 the model. Additionally, the configuration offers an option to enforce the use of a
481 specific NPU bundle.
482
483 @note Deprecated functions are included for backward compatibility, but it is
484 recommended to use the newer core mode configuration methods.
485 """
486
487 def __init__(self, num_cores: Optional[int] = None):
488 """
489 @brief Default constructor. This default-constructed object is initially set to
490 single-core mode with all NPU local cores included.
491 """
492 self._model_config = (
493 _cMaccel.ModelConfig()
494 if num_cores is None
495 else _cMaccel.ModelConfig(num_cores)
496 )
497
498 def include_all_cores(self) -> bool:
499 return self._model_config.include_all_cores()
500
501 def exclude_all_cores(self) -> bool:
502 return self._model_config.exclude_all_cores()
503
504 def include(self, cluster: Cluster, core: Optional[Core] = None) -> bool:
505 if core is None:
506 return self._model_config.include(cluster.value)
507 else:
508 return self._model_config.include(cluster.value, core.value)
509
510 def exclude(self, cluster: Cluster, core: Optional[Core] = None) -> bool:
511 if core is None:
512 return self._model_config.exclude(cluster.value)
513 else:
514 return self._model_config.exclude(cluster.value, core.value)
515
517 self, num_cores: Optional[int] = None, core_ids: Optional[List[CoreId]] = None
518 ) -> bool:
519 """
520 @brief Sets the model to use single-core mode for inference with a specified number
521 of local cores.
522
523 In single-core mode, each local core executes model inference independently.
524 The number of cores used is specified by the `num_cores` parameter, and the core
525 allocation policy is set to `CoreAllocationPolicy.Auto`, meaning the model will be
526 automatically allocated to available local cores when the model is launched to the
527 NPU, specifically when the `Model.launch()` function is called. Or The user can
528 specify a list of CoreIds to determine which cores to use for inference.
529
530 @note Use exactly one of `num_cores` or `core_ids`, not both.
531
532 @param[in] num_cores The number of local cores to use for inference.
533 @param[in] core_ids A list of CoreIds to be used for model inference.
534
535 @return True if the mode was successfully set, False otherwise.
536 """
537 if num_cores is not None and core_ids is None:
538 return self._model_config.set_single_core_mode(num_cores)
539 elif core_ids is not None and num_cores is None:
541 [core_id._core_id for core_id in core_ids]
542 )
543 raise ValueError(
544 "`set_single_core_mode` needs either `num_cores` and `core_ids`."
545 )
546
547 def set_global_core_mode(self, clusters: List[Cluster]) -> bool:
548 """@deprecated"""
549 return self._model_config.set_global_core_mode([c.value for c in clusters])
550
551 def set_global4_core_mode(self, clusters: List[Cluster]) -> bool:
552 """
553 @brief Sets the model to use global4-core mode for inference with a specified set
554 of NPU clusters.
555
556 For Aries NPU, there are two clusters, each consisting of four local cores. In
557 global4-core mode, four local cores within the same cluster work together to
558 execute the model inference.
559
560 @param[in] clusters A list of clusters to be used for model inference.
561
562 @return True if the mode was successfully set, False otherwise.
563 """
564 return self._model_config.set_global4_core_mode([c.value for c in clusters])
565
566 def set_global8_core_mode(self) -> bool:
567 """
568 @brief Sets the model to use global8-core mode for inference.
569
570 For Aries NPU, there are two clusters, each consisting of four local cores. In
571 global8-core mode, all eight local cores across the two clusters work together to
572 execute the model inference.
573
574 @return True if the mode was successfully set, False otherwise.
575 """
577
578 def get_core_mode(self) -> CoreMode:
579 """
580 @brief Gets the core mode to be applied to the model.
581
582 This reflects the core mode that will be used when the model is created.
583
584 @return The `CoreMode` to be applied to the model.
585 """
587
588 def set_multi_core_mode(self, clusters: List[Cluster]) -> bool:
589 """
590 @brief Sets the model to use multi-core mode for batch inference.
591
592 In multi-core mode, on Aries NPU, the four local cores within a cluster work
593 together to process batch inference tasks efficiently. This mode is optimized for
594 batch processing.
595
596 @param[in] clusters A list of clusters to be used for multi-core batch inference.
597
598 @return True if the mode was successfully set, False otherwise.
599 """
600 return self._model_config.set_multi_core_mode([c.value for c in clusters])
601
602 def set_auto_mode(self, num_cores: int = 1) -> bool:
603 """@deprecated"""
604 return self._model_config.set_auto_mode(num_cores)
605
606 def set_manual_mode(self) -> bool:
607 """@deprecated"""
608 return self._model_config.set_manual_mode()
609
610 def get_core_allocation_policy(self) -> CoreAllocationPolicy:
611 """
612 @brief Gets the core allocation policy to be applied to the model.
613
614 This reflects the core allocation policy that will be used when the model is
615 created.
616
617 @return The `CoreAllocationPolicy` to be applied to the model.
618 """
620
621 def get_num_cores(self) -> int:
622 """
623 @brief Gets the number of cores to be allocated for the model.
624
625 This represents the number of cores that will be allocated for inference
626 when the model is launched to the NPU.
627
628 @return The number of cores to be allocated for the model.
629 """
630 return self._model_config.get_num_cores()
631
632 def force_single_npu_bundle(self, npu_bundle_index: int) -> bool:
633 """
634 @brief Forces the use of a specific NPU bundle.
635
636 This function forces the selection of a specific NPU bundle. If a non-negative
637 index is provided, the corresponding NPU bundle is selected and runs without CPU
638 offloading. If -1 is provided, all NPU bundles are used with CPU offloading
639 enabled.
640
641 @param[in] npu_bundle_index The index of the NPU bundle to force. A non-negative
642 integer selects a specific NPU bundle (runs without CPU
643 offloading), or -1 to enable all NPU bundles with CPU
644 offloading.
645
646 @return True if the index is valid and the NPU bundle is successfully set,
647 False if the index is invalid (less than -1).
648 """
649 return self._model_config.force_single_npu_bundle(npu_bundle_index)
650
652 """
653 @brief Retrieves the index of the forced NPU bundle.
654
655 This function returns the index of the NPU bundle that has been forced using the
656 `force_single_npu_bundle` function. If no NPU bundle is forced, the returned value
657 will be -1.
658
659 @return The index of the forced NPU bundle, or -1 if no bundle is forced.
660 """
662
663 def set_async_pipeline_enabled(self, enable: bool) -> None:
664 """
665 @brief Enables or disables the asynchronous pipeline required for asynchronous
666 inference.
667
668 Call this function with `enable` set to `True` if you intend to use
669 `Model.infer_async()`, as the asynchronous pipeline is necessary for their operation.
670
671 If you are only using synchronous inference, such as `Model.infer()` or
672 `Model.infer_to_float()`, it is recommended to keep the asynchronous pipeline disabled
673 to avoid unnecessary overhead.
674
675 @param[in] enable Set to `True` to enable the asynchronous pipeline; set to `False`
676 to disable it.
677 """
678 return self._model_config.set_async_pipeline_enabled(enable)
679
680 def get_async_pipeline_enabled(self) -> bool:
681 """
682 @brief Returns whether the asynchronous pipeline is enabled in this configuration.
683
684 @return `True` if the asynchronous pipeline is enabled; `False` otherwise.
685 """
687
688 @property
689 def schedule_policy(self) -> SchedulePolicy:
690 return SchedulePolicy(self._model_config.schedule_policy)
691
692 @property
693 def latency_set_policy(self) -> LatencySetPolicy:
694 return LatencySetPolicy(self._model_config.latency_set_policy)
695
696 @property
697 def maintenance_policy(self) -> MaintenancePolicy:
698 return MaintenancePolicy(self._model_config.maintenance_policy)
699
700 @property
701 def early_latencies(self) -> List[int]:
702 return self._model_config.early_latencies
703
704 @property
705 def finish_latencies(self) -> List[int]:
706 return self._model_config.finish_latencies
707
708 @schedule_policy.setter
709 def schedule_policy(self, policy: SchedulePolicy):
710 """@deprecated This setting has no effect."""
711 self._model_config.schedule_policy = policy.value
712
713 @latency_set_policy.setter
714 def latency_set_policy(self, policy: LatencySetPolicy):
715 """@deprecated This setting has no effect."""
716 self._model_config.latency_set_policy = policy.value
717
718 @maintenance_policy.setter
719 def maintenance_policy(self, policy: MaintenancePolicy):
720 """@deprecated This setting has no effect."""
721 self._model_config.maintenance_policy = policy.value
722
723 @early_latencies.setter
724 def early_latencies(self, latencies: List[int]):
725 """@deprecated This setting has no effect."""
726 self._model_config.early_latencies = latencies
727
728 @finish_latencies.setter
729 def finish_latencies(self, latencies: List[int]):
730 """@deprecated This setting has no effect."""
731 self._model_config.finish_latencies = latencies
732
733 def get_core_ids(self) -> List[CoreId]:
734 """
735 @brief Returns the list of NPU CoreIds to be used for model inference.
736
737 This function returns a list of NPU CoreIds that the model will use for
738 inference. When `set_single_core_mode(num_cores)` is called and the
739 core allocation policy is set to CoreAllocationPolicy.Auto, it will return an
740 empty list.
741
742 @return A list of NPU CoreIds.
743 """
744 return [
745 CoreId(Cluster(core_id.cluster), Core(core_id.core))
746 for core_id in self._model_config.core_ids
747 ]
748
749 def __repr__(self):
750 d = {
751 "core_mode": self.get_core_mode(),
752 "core_allocation_policy": self.get_core_allocation_policy(),
753 "core_ids": self.get_core_ids(),
754 "num_cores": self.get_num_cores(),
755 "forced_npu_bundle_index": self.get_forced_npu_bundle_index(),
756 }
757 return "{}({})".format(
758 self.__class__.__name__,
759 ", ".join("{}={}".format(k, v) for k, v in d.items()),
760 )
761
762
763class LogLevel(Enum):
764 """@brief LogLevel"""
765
766 DEBUG = _cMaccel.LogLevel.DEBUG
767 INFO = _cMaccel.LogLevel.INFO
768 WARN = _cMaccel.LogLevel.WARN
769 ERR = _cMaccel.LogLevel.ERR
770 FATAL = _cMaccel.LogLevel.FATAL
771 OFF = _cMaccel.LogLevel.OFF
772
773
774def set_log_level(level: LogLevel):
775 _cMaccel.set_log_level(level.value)
776
777
778class CacheType(Enum):
779 """@brief CacheType"""
780
781 Default = _cMaccel.CacheType.Default
782 Batch = _cMaccel.CacheType.Batch
783 Error = _cMaccel.CacheType.Error
784
785
787 """@brief Struct representing KV-cache information."""
788
789 def __init__(
790 self,
791 cache_type: CacheType = CacheType.Error,
792 name: str = "",
793 layer_hash: str = "",
794 size: int = 0,
795 num_batches: int = 0,
796 ):
797 self._cache_info = _cMaccel.CacheInfo()
798 self._cache_info.cache_type = cache_type.value
799 self._cache_info.name = name
800 self._cache_info.layer_hash = layer_hash
801 self._cache_info.size = size
802 self._cache_info.num_batches = num_batches
803
804 @classmethod
805 def from_cpp(cls, _cache_info: _cMaccel.CacheInfo):
806 return cls(
807 CacheType(_cache_info.cache_type),
808 _cache_info.name,
809 _cache_info.layer_hash,
810 _cache_info.size,
811 _cache_info.num_batches,
812 )
813
814 @property
815 def cache_type(self) -> CacheType:
816 return CacheType(self._cache_info.cache_type)
817
818 @property
819 def name(self) -> str:
820 return self._cache_info.name
821
822 @property
823 def layer_hash(self) -> str:
824 return self._cache_info.layer_hash
825
826 @property
827 def size(self) -> int:
828 return self._cache_info.size
829
830 @property
831 def num_batches(self) -> int:
832 return self._cache_info.num_batches
833
834 @cache_type.setter
835 def cache_type(self, value: CacheType):
836 self._cache_info.cache_type = value.value
837
838 @name.setter
839 def name(self, value: str):
840 self._cache_info.name = value
841
842 @layer_hash.setter
843 def layer_hash(self, value: str):
844 self._cache_info.layer_hash = value
845
846 @size.setter
847 def size(self, value: int):
848 self._cache_info.size = value
849
850 @num_batches.setter
851 def num_batches(self, value: int):
852 self._cache_info.num_batches = value
853
854
855def start_tracing_events(path: str) -> bool:
856 """
857 @brief Starts event tracing and prepares to save the trace log to a specified file.
858
859 The trace log is recorded in "Chrome Tracing JSON format," which can be
860 viewed at https://ui.perfetto.dev/.
861
862 The trace log is not written immediately; it is saved only when
863 stop_tracing_events() is called.
864
865 @param[in] path The file path where the trace log should be stored.
866 @return True if tracing starts successfully, False otherwise.
867 """
868 return _cMaccel.start_tracing_events(path)
869
870
872 """
873 @brief Stops event tracing and writes the recorded trace log.
874
875 This function finalizes tracing and saves the collected trace data
876 to the file specified when start_tracing_events() was called.
877 """
878 _cMaccel.stop_tracing_events()
879
880
881def get_model_summary(mxq_path: str) -> str:
882 """
883 @brief Generates a structured summary of the specified MXQ model.
884
885 Returns an overview of the model contained in the MXQ file, including:
886 - Target NPU hardware
887 - Supported core modes and their associated cores
888 - The total number of model variants
889 - For each variant:
890 - Input and output tensor shapes
891 - A list of layers with their types, output shapes, and input layer indices
892
893 The summary is returned as a human-readable string in a table and is useful for
894 inspecting model compatibility, structure, and input/output shapes.
895
896 @param[in] mxq_path Path to the MXQ model file.
897 @return A formatted string containing the model summary.
898 """
899 return _cMaccel.get_model_summary(mxq_path)
900
901
902
Struct representing input/output buffer information.
Definition type.py:241
int max_channel(self)
Maximum channel of original input/output if data is sequential.
Definition type.py:349
int reshaped_height(self)
Height of reshaped input/output.
Definition type.py:309
int reshaped_width(self)
Width of reshaped input/output.
Definition type.py:314
int original_width(self)
Width of original input/output.
Definition type.py:299
int size(self)
Returns the total size of the NPU input/output.
Definition type.py:426
int original_height(self)
Height of original input/output.
Definition type.py:294
int original_size(self)
Returns the total size of the original input/output.
Definition type.py:410
int reshaped_size(self)
Returns the total size of the reshaped input/output.
Definition type.py:418
int original_channel(self)
Channel of original input/output.
Definition type.py:304
int channel(self)
Channel of NPU input/output.
Definition type.py:334
int reshaped_channel(self)
Channel of reshaped input/output.
Definition type.py:319
int max_height(self)
Maximum height of original input/output if data is sequential.
Definition type.py:339
int max_cache_size(self)
Maximum KV-cache size, relevant for LLM models using KV cache.
Definition type.py:354
int width(self)
Width of NPU input/output.
Definition type.py:329
int height(self)
Height of NPU input/output.
Definition type.py:324
int max_width(self)
Maximum width of original input/output if data is sequential.
Definition type.py:344
A simple byte-sized buffer.
Definition type.py:195
Struct representing KV-cache information.
Definition type.py:786
Enumerates clusters in the ARIES NPU.
Definition type.py:17
Core allocation policy.
Definition type.py:49
Represents a unique identifier for an NPU core.
Definition type.py:138
bool __lt__(self, other)
Compares two CoreId objects for ordering.
Definition type.py:179
Core core(self)
Definition type.py:160
Cluster cluster(self)
Definition type.py:156
bool __eq__(self, other)
Checks if two CoreId objects are equal.
Definition type.py:171
Defines the core mode for NPU execution.
Definition type.py:220
Enumerates cores within a cluster in the ARIES NPU.
Definition type.py:31
Configures a core mode and core allocation of a model for NPU inference.
Definition type.py:474
int get_num_cores(self)
Gets the number of cores to be allocated for the model.
Definition type.py:621
CoreAllocationPolicy get_core_allocation_policy(self)
Gets the core allocation policy to be applied to the model.
Definition type.py:610
bool get_async_pipeline_enabled(self)
Returns whether the asynchronous pipeline is enabled in this configuration.
Definition type.py:680
bool set_manual_mode(self)
Definition type.py:606
bool force_single_npu_bundle(self, int npu_bundle_index)
Forces the use of a specific NPU bundle.
Definition type.py:632
bool get_forced_npu_bundle_index(self)
Retrieves the index of the forced NPU bundle.
Definition type.py:651
bool set_multi_core_mode(self, List[Cluster] clusters)
Sets the model to use multi-core mode for batch inference.
Definition type.py:588
List[CoreId] get_core_ids(self)
Returns the list of NPU CoreIds to be used for model inference.
Definition type.py:733
bool set_global_core_mode(self, List[Cluster] clusters)
Definition type.py:547
bool set_auto_mode(self, int num_cores=1)
Definition type.py:602
bool set_global8_core_mode(self)
Sets the model to use global8-core mode for inference.
Definition type.py:566
CoreMode get_core_mode(self)
Gets the core mode to be applied to the model.
Definition type.py:578
bool set_global4_core_mode(self, List[Cluster] clusters)
Sets the model to use global4-core mode for inference with a specified set of NPU clusters.
Definition type.py:551
__init__(self, Optional[int] num_cores=None)
Default constructor.
Definition type.py:487
bool set_single_core_mode(self, Optional[int] num_cores=None, Optional[List[CoreId]] core_ids=None)
Sets the model to use single-core mode for inference with a specified number of local cores.
Definition type.py:518
None set_async_pipeline_enabled(self, bool enable)
Enables or disables the asynchronous pipeline required for asynchronous inference.
Definition type.py:663
Struct for scale values.
Definition type.py:80
float __getitem__(self, int i)
Returns the scale value at the specified index.
Definition type.py:117
bool is_uniform(self)
Definition type.py:102
List[float] scale_list(self)
Definition type.py:94
float scale(self)
Definition type.py:98
stop_tracing_events()
Stops event tracing and writes the recorded trace log.
Definition type.py:871
str get_model_summary(str mxq_path)
Generates a structured summary of the specified MXQ model.
Definition type.py:881
bool start_tracing_events(str path)
Starts event tracing and prepares to save the trace log to a specified file.
Definition type.py:855