doxygen/html_en/type_8py_source.html

from typing import List, Optional, Tuple

from enum import Enum


import numpy as np


import qbruntime.qbruntime as _cQbRuntime


class Cluster(Enum):

    """

    @brief Enumerates clusters in the ARIES NPU.


    @note The ARIES NPU consists of two clusters, each containing one global core and

    four local cores, totaling eight local cores. REGULUS has only a single cluster

    (Cluster0) with one local core (Core0).

    """


    Cluster0 = _cQbRuntime.Cluster.Cluster0

    Cluster1 = _cQbRuntime.Cluster.Cluster1

    Error = _cQbRuntime.Cluster.Error


class Core(Enum):

    """

    @brief Enumerates cores within a cluster in the ARIES NPU.


    @note The ARIES NPU consists of two clusters, each containing one global core and

    four local cores, totaling eight local cores. REGULUS has only a single cluster

    (Cluster0) with one local core (Core0).

    """


    Core0 = _cQbRuntime.Core.Core0

    Core1 = _cQbRuntime.Core.Core1

    Core2 = _cQbRuntime.Core.Core2

    Core3 = _cQbRuntime.Core.Core3

    All = _cQbRuntime.Core.All

    GlobalCore = _cQbRuntime.Core.GlobalCore

    Error = _cQbRuntime.Core.Error


class CoreAllocationPolicy(Enum):

    """@brief Core allocation policy"""


    Auto = _cQbRuntime.CoreAllocationPolicy.Auto

    Manual = _cQbRuntime.CoreAllocationPolicy.Manual


class Scale:

    """@brief Struct for scale values."""


    def __init__(self, scale: float, is_uniform: bool, scale_list: List[float]):

        self._scale = _cQbRuntime.Scale()

        self._scale.scale = scale

        self._scale.is_uniform = is_uniform

        self._scale.scale_list = scale_list


    @classmethod

    def from_cpp(cls, _scale: _cQbRuntime.Scale):

        return cls(_scale.scale, _scale.is_uniform, _scale.scale_list)


    @property

    def scale_list(self) -> List[float]:

        return self._scale.scale_list


    @property

    def scale(self) -> float:

        return self._scale.scale


    @property

    def is_uniform(self) -> bool:

        return self._scale.is_uniform


    @scale_list.setter

    def scale_list(self, value: List[float]):

        self._scale.scale_list = value


    @scale.setter

    def scale(self, value: float):

        self._scale.scale = value


    @is_uniform.setter

    def is_uniform(self, value: bool):

        self._scale.is_uniform = value


    def __getitem__(self, i: int) -> float:

        """

        @brief Returns the scale value at the specified index.


        @param[in] i Index.

        @return Scale value.

        """

        return self._scale[i]


    def __repr__(self):

        d = {

            "scale": self.scale,

            "is_uniform": self.is_uniform,

            "scale_list": self.scale_list,

        }

        return "{}({})".format(

            self.__class__.__name__,

            ", ".join("{}={}".format(k, v) for k, v in d.items()),

        )


class CoreId:

    """

    @brief Represents a unique identifier for an NPU core.


    A CoreId consists of a Cluster and a Core, identifying a specific core

    within an NPU.

    """


    def __init__(self, cluster: Cluster, core: Core):

        self._core_id = _cQbRuntime.CoreId()

        self._core_id.cluster = cluster.value

        self._core_id.core = core.value


    @classmethod

    def from_cpp(cls, _core_id: _cQbRuntime.CoreId):

        return cls(Cluster(_core_id.cluster), Core(_core_id.core))


    @property

    def cluster(self) -> Cluster:

        return Cluster(self._core_id.cluster)


    @property

    def core(self) -> Core:

        return Core(self._core_id.core)


    @cluster.setter

    def cluster(self, value: Cluster):

        self._core_id.cluster = value.value


    @core.setter

    def core(self, value: Core):

        self._core_id.core = value.value


    def __eq__(self, other) -> bool:

        """

        @brief Checks if two CoreId objects are equal.


        @return True if both CoreId objects are identical, False otherwise.

        """

        return self._core_id == other._core_id


    def __lt__(self, other) -> bool:

        """

        @brief Compares two CoreId objects for ordering.


        @return True if this CoreId is less than the given CoreId, False otherwise.

        """

        return self._core_id < other._core_id


    def __repr__(self):

        d = {"cluster": self.cluster, "core": self.core}

        return "{}({})".format(

            self.__class__.__name__,

            ", ".join("{}={}".format(k, v) for k, v in d.items()),

        )


class Buffer:

    """

    @brief A simple byte-sized buffer.


    This struct represents a contiguous block of memory for storing byte-sized data.

    """


    def __init__(self, _buffer: Optional[_cQbRuntime.Buffer] = None):

        self._buffer = _cQbRuntime.Buffer() if _buffer is None else _buffer


    @property

    def size(self) -> int:

        return self._buffer.size


    @size.setter

    def size(self, value: int):

        self._buffer.size = value


    def set_buffer(self, arr: np.ndarray):

        self._buffer.set_buffer(np.ascontiguousarray(arr))


    def __repr__(self):

        return f"{self.__class__.__name__}(size={self._buffer.size})"


class CoreMode(Enum):

    """

    @brief Defines the core mode for NPU execution.


    Supported core modes include single-core, multi-core, global4-core, and global8-core.

    For detailed explanations of each mode, refer to the following functions:


    - `ModelConfig.set_auto_core_mode()`

    - `ModelConfig.set_single_core_mode()`

    - `ModelConfig.set_multi_core_mode()`

    - `ModelConfig.set_global4_core_mode()`

    - `ModelConfig.set_global8_core_mode()`

    """


    Single = _cQbRuntime.CoreMode.Single

    Multi = _cQbRuntime.CoreMode.Multi

    Global = _cQbRuntime.CoreMode.Global

    Global4 = _cQbRuntime.CoreMode.Global4

    Global8 = _cQbRuntime.CoreMode.Global8

    Auto = _cQbRuntime.CoreMode.Auto

    Error = _cQbRuntime.CoreMode.Error


class BufferInfo:

    """@brief Struct representing input/output buffer information."""


    def __init__(

        self,

        original_height: int = 0,

        original_width: int = 0,

        original_channel: int = 0,

        reshaped_height: int = 0,

        reshaped_width: int = 0,

        reshaped_channel: int = 0,

        height: int = 0,

        width: int = 0,

        channel: int = 0,

        max_height: int = 0,

        max_width: int = 0,

        max_channel: int = 0,

        max_cache_size: int = 0,

    ):

        self._buffer_info = _cQbRuntime.BufferInfo()

        self._buffer_info.original_height = original_height

        self._buffer_info.original_width = original_width

        self._buffer_info.original_channel = original_channel

        self._buffer_info.reshaped_height = reshaped_height

        self._buffer_info.reshaped_width = reshaped_width

        self._buffer_info.reshaped_channel = reshaped_channel

        self._buffer_info.height = height

        self._buffer_info.width = width

        self._buffer_info.channel = channel

        self._buffer_info.max_height = max_height

        self._buffer_info.max_width = max_width

        self._buffer_info.max_channel = max_channel

        self._buffer_info.max_cache_size = max_cache_size


    @classmethod

    def from_cpp(cls, _buffer_info: _cQbRuntime.BufferInfo):

        return cls(

            _buffer_info.original_height,

            _buffer_info.original_width,

            _buffer_info.original_channel,

            _buffer_info.reshaped_height,

            _buffer_info.reshaped_width,

            _buffer_info.reshaped_channel,

            _buffer_info.height,

            _buffer_info.width,

            _buffer_info.channel,

            _buffer_info.max_height,

            _buffer_info.max_width,

            _buffer_info.max_channel,

            _buffer_info.max_cache_size,

        )


    @property


    def original_height(self) -> int:

        """Height of original input/output"""

        return self._buffer_info.original_height


    @property


    def original_width(self) -> int:

        """Width of original input/output"""

        return self._buffer_info.original_width


    @property


    def original_channel(self) -> int:

        """Channel of original input/output"""

        return self._buffer_info.original_channel


    @property


    def reshaped_height(self) -> int:

        """Height of reshaped input/output"""

        return self._buffer_info.reshaped_height


    @property


    def reshaped_width(self) -> int:

        """Width of reshaped input/output"""

        return self._buffer_info.reshaped_width


    @property


    def reshaped_channel(self) -> int:

        """Channel of reshaped input/output"""

        return self._buffer_info.reshaped_channel


    @property


    def height(self) -> int:

        """Height of NPU input/output"""

        return self._buffer_info.height


    @property


    def width(self) -> int:

        """Width of NPU input/output"""

        return self._buffer_info.width


    @property


    def channel(self) -> int:

        """Channel of NPU input/output"""

        return self._buffer_info.channel


    @property


    def max_height(self) -> int:

        """Maximum height of original input/output if data is sequential."""

        return self._buffer_info.max_height


    @property


    def max_width(self) -> int:

        """Maximum width of original input/output if data is sequential."""

        return self._buffer_info.max_width


    @property


    def max_channel(self) -> int:

        """Maximum channel of original input/output if data is sequential."""

        return self._buffer_info.max_channel


    @property


    def max_cache_size(self) -> int:

        """Maximum KV-cache size, relevant for LLM models using KV cache."""

        return self._buffer_info.max_cache_size


    @original_height.setter

    def original_height(self, value: int):

        self._buffer_info.original_height = value


    @original_width.setter

    def original_width(self, value: int):

        self._buffer_info.original_width = value


    @original_channel.setter

    def original_channel(self, value: int):

        self._buffer_info.original_channel = value


    @reshaped_height.setter

    def reshaped_height(self, value: int):

        self._buffer_info.reshaped_height = value


    @reshaped_width.setter

    def reshaped_width(self, value: int):

        self._buffer_info.reshaped_width = value


    @reshaped_channel.setter

    def reshaped_channel(self, value: int):

        self._buffer_info.reshaped_channel = value


    @height.setter

    def height(self, value: int):

        self._buffer_info.height = value


    @width.setter

    def width(self, value: int):

        self._buffer_info.width = value


    @channel.setter

    def channel(self, value: int):

        self._buffer_info.channel = value


    @max_height.setter

    def max_height(self, value: int):

        self._buffer_info.max_height = value


    @max_width.setter

    def max_width(self, value: int):

        self._buffer_info.max_width = value


    @max_channel.setter

    def max_channel(self, value: int):

        self._buffer_info.max_channel = value


    @max_cache_size.setter

    def max_cache_size(self, value: int):

        self._buffer_info.max_cache_size = value


    def original_size(self) -> int:

        """

        @brief Returns the total size of the original input/output.


        @return The data size.

        """

        return self._buffer_info.original_size()


    def reshaped_size(self) -> int:

        """

        @brief Returns the total size of the reshaped input/output.


        @return The data size.

        """

        return self._buffer_info.reshaped_size()


    def size(self) -> int:

        """

        @brief Returns the total size of the NPU input/output.


        @return The data size.

        """

        return self._buffer_info.size()


    def original_shape(self) -> Tuple[int, int, int]:

        return self._buffer_info.original_shape()


    def original_shape_chw(self) -> Tuple[int, int, int]:

        return self._buffer_info.original_shape_chw()


    def reshaped_shape(self) -> Tuple[int, int, int]:

        return self._buffer_info.reshaped_shape()


    def reshaped_shape_chw(self) -> Tuple[int, int, int]:

        return self._buffer_info.reshaped_shape_chw()


    def shape(self) -> Tuple[int, int, int]:

        return self._buffer_info.shape()


    def shape_chw(self) -> Tuple[int, int, int]:

        return self._buffer_info.shape_chw()


    def __repr__(self):

        d = {

            "original_height": self._buffer_info.original_height,

            "original_width": self._buffer_info.original_width,

            "original_channel": self._buffer_info.original_channel,

            "reshaped_height": self._buffer_info.reshaped_height,

            "reshaped_width": self._buffer_info.reshaped_width,

            "reshaped_channel": self._buffer_info.reshaped_channel,

            "height": self._buffer_info.height,

            "width": self._buffer_info.width,

            "channel": self._buffer_info.channel,

            "max_height": self._buffer_info.max_height,

            "max_width": self._buffer_info.max_width,

            "max_channel": self._buffer_info.max_channel,

            "max_cache_size": self._buffer_info.max_cache_size,

        }

        return "{}({})".format(

            self.__class__.__name__,

            ", ".join("{}={}".format(k, v) for k, v in d.items()),

        )


class ModelConfig:

    """

    @brief Configures a core mode and core allocation of a model for NPU inference.

    The `ModelConfig` class provides methods for setting a core mode and allocating

    cores for NPU inference. Supported core modes are single-core, multi-core,

    global4-core, and global8-core. Users can also specify which cores to allocate for

    the model. Additionally, the configuration offers an option to enforce the use of a

    specific NPU bundle.


    @note Deprecated functions are included for backward compatibility, but it is

    recommended to use the newer core mode configuration methods.

    """


    def __init__(self, num_cores: Optional[int] = None):

        """

        @brief Default constructor. This default-constructed object is initially set to

        auto-core mode.

        """

        self._model_config = (

            _cQbRuntime.ModelConfig()

            if num_cores is None

            else _cQbRuntime.ModelConfig(num_cores)

        )


    def set_auto_core_mode(self) -> bool:

        """

        @brief Sets the model to detect CoreMode automatically.


        In auto-core mode, the model automatically detects a supported CoreMode

        while using all available NPU cores.


        @note If the model has more than one CoreMode, `CoreMode.Auto` is not supported.


        @note activation buffer slots will be reset after `set_auto_core_mode` is called.


        @return True if the mode was successfully set, False otherwise.

        """

        return self._model_config.set_auto_core_mode()


    def set_single_core_mode(

        self, num_cores: Optional[int] = None, core_ids: Optional[List[CoreId]] = None

    ) -> bool:

        """

        @brief Sets the model to use single-core mode for inference with a specified number

        of local cores.


        In single-core mode, each local core executes model inference independently.

        The number of cores used is specified by the `num_cores` parameter, and the core

        allocation policy is set to `CoreAllocationPolicy.Auto`, meaning the model will be

        automatically allocated to available local cores when the model is launched to the

        NPU, specifically when the `Model.launch()` function is called. Or The user can

        specify a list of CoreIds to determine which cores to use for inference.


        @note Use exactly one of `num_cores` or `core_ids`, not both.


        @param[in] num_cores The number of local cores to use for inference.

        @param[in] core_ids A list of CoreIds to be used for model inference.


        @return True if the mode was successfully set, False otherwise.

        """

        if num_cores is not None and core_ids is None:

            return self._model_config.set_single_core_mode(num_cores)

        elif core_ids is not None and num_cores is None:

            return self._model_config.set_single_core_mode(

                [core_id._core_id for core_id in core_ids]

            )

        raise ValueError(

            "`set_single_core_mode` needs either `num_cores` and `core_ids`."

        )


    def set_global_core_mode(self, clusters: List[Cluster]) -> bool:

        """@deprecated"""

        return self._model_config.set_global_core_mode([c.value for c in clusters])


    def set_global4_core_mode(

        self, clusters: List[Cluster] = [Cluster.Cluster0, Cluster.Cluster1]

    ) -> bool:

        """

        @brief Sets the model to use global4-core mode for inference with a specified set

        of NPU clusters.


        For Aries NPU, there are two clusters, each consisting of four local cores. In

        global4-core mode, four local cores within the same cluster work together to

        execute the model inference.


        @param[in] clusters A list of clusters to be used for model inference.


        @return True if the mode was successfully set, False otherwise.

        """

        return self._model_config.set_global4_core_mode([c.value for c in clusters])


    def set_global8_core_mode(self) -> bool:

        """

        @brief Sets the model to use global8-core mode for inference.


        For Aries NPU, there are two clusters, each consisting of four local cores. In

        global8-core mode, all eight local cores across the two clusters work together to

        execute the model inference.


        @return True if the mode was successfully set, False otherwise.

        """

        return self._model_config.set_global8_core_mode()


    def get_core_mode(self) -> CoreMode:

        """

        @brief Gets the core mode to be applied to the model.


        This reflects the core mode that will be used when the model is created.


        @return The `CoreMode` to be applied to the model.

        """

        return CoreMode(self._model_config.get_core_mode())


    def set_multi_core_mode(

        self, clusters: List[Cluster] = [Cluster.Cluster0, Cluster.Cluster1]

    ) -> bool:

        """

        @brief Sets the model to use multi-core mode for batch inference.


        In multi-core mode, on Aries NPU, the four local cores within a cluster work

        together to process batch inference tasks efficiently. This mode is optimized for

        batch processing.


        @param[in] clusters A list of clusters to be used for multi-core batch inference.


        @return True if the mode was successfully set, False otherwise.

        """

        return self._model_config.set_multi_core_mode([c.value for c in clusters])


    def get_core_allocation_policy(self) -> CoreAllocationPolicy:

        """

        @brief Gets the core allocation policy to be applied to the model.


        This reflects the core allocation policy that will be used when the model is

        created.


        @return The `CoreAllocationPolicy` to be applied to the model.

        """

        return CoreAllocationPolicy(self._model_config.get_core_allocation_policy())


    def get_num_cores(self) -> int:

        """

        @brief Gets the number of cores to be allocated for the model.


        This represents the number of cores that will be allocated for inference

        when the model is launched to the NPU.


        @return The number of cores to be allocated for the model.

        """

        return self._model_config.get_num_cores()


    def force_single_npu_bundle(self, npu_bundle_index: int) -> bool:

        """

        @brief Forces the use of a specific NPU bundle.


        This function forces the selection of a specific NPU bundle. If a non-negative

        index is provided, the corresponding NPU bundle is selected and runs without CPU

        offloading. If -1 is provided, all NPU bundles are used with CPU offloading

        enabled.


        @param[in] npu_bundle_index The index of the NPU bundle to force. A non-negative

                                    integer selects a specific NPU bundle (runs without CPU

                                    offloading), or -1 to enable all NPU bundles with CPU

                                    offloading.


        @return True if the index is valid and the NPU bundle is successfully set,

                False if the index is invalid (less than -1).

        """

        return self._model_config.force_single_npu_bundle(npu_bundle_index)


    def get_forced_npu_bundle_index(self) -> bool:

        """

        @brief Retrieves the index of the forced NPU bundle.


        This function returns the index of the NPU bundle that has been forced using the

        `force_single_npu_bundle` function. If no NPU bundle is forced, the returned value

        will be -1.


        @return The index of the forced NPU bundle, or -1 if no bundle is forced.

        """

        return self._model_config.get_forced_npu_bundle_index()


    def set_async_pipeline_enabled(self, enable: bool) -> None:

        """

        @brief Enables or disables the asynchronous pipeline required for asynchronous

            inference.


        Call this function with `enable` set to `True` if you intend to use

        `Model.infer_async()`, as the asynchronous pipeline is necessary for their operation.


        If you are only using synchronous inference, such as `Model.infer()` or

        `Model.infer_to_float()`, it is recommended to keep the asynchronous pipeline disabled

        to avoid unnecessary overhead.


        @param[in] enable Set to `True` to enable the asynchronous pipeline; set to `False`

                        to disable it.

        """

        return self._model_config.set_async_pipeline_enabled(enable)


    def get_async_pipeline_enabled(self) -> bool:

        """

        @brief Returns whether the asynchronous pipeline is enabled in this configuration.


        @return `True` if the asynchronous pipeline is enabled; `False` otherwise.

        """

        return self._model_config.get_async_pipeline_enabled()


    def set_activation_slots(self, num: int) -> None:

        """

        @brief Sets activation buffer slots for multi-activation supported model.


        all this function if you want to set the number of activation buffer slots manually.


        If you do not call this function, the default number of activation buffer slots

        is set differently depending on the CoreMode.


        - `CoreMode.Single` : 2 * (the number of target core ids)

        - `CoreMode.Multi` : 2 * (the number of target clusters)

        - `CoreMode.Global4` : 2 * (the number of target clusters)

        - `CoreMode.Global8` : 2


        @note This function has no effect on MXQ file in version earlier than MXQv7.


        @note Currently, LLM model's activation slot is fixed to 1 and ignoring `count`.


        @param[in] count Multi activation counts. Must be >= 1.

        """

        return self._model_config.set_activation_slots(num)


    def get_activation_slots(self) -> int:

        """

        @brief Returns activation buffer slot count.


        @note This function has no meaning on MXQ file in version earlier than MXQv7.


        @return Activation buffer slot count.

        """

        return self._model_config.get_activation_slots()


    @property

    def early_latencies(self) -> List[int]:

        return self._model_config.early_latencies


    @property

    def finish_latencies(self) -> List[int]:

        return self._model_config.finish_latencies


    @early_latencies.setter


    def early_latencies(self, latencies: List[int]):

        """@deprecated This setting has no effect."""

        self._model_config.early_latencies = latencies


    @finish_latencies.setter


    def finish_latencies(self, latencies: List[int]):

        """@deprecated This setting has no effect."""

        self._model_config.finish_latencies = latencies


    def get_core_ids(self) -> List[CoreId]:

        """

        @brief Returns the list of NPU CoreIds to be used for model inference.


        This function returns a list of NPU CoreIds that the model will use for

        inference. When `set_single_core_mode(num_cores)` is called and the

        core allocation policy is set to CoreAllocationPolicy.Auto, it will return an

        empty list.


        @return A list of NPU CoreIds.

        """

        return [

            CoreId(Cluster(core_id.cluster), Core(core_id.core))

            for core_id in self._model_config.core_ids

        ]


    def __repr__(self):

        d = {

            "core_mode": self.get_core_mode(),

            "core_allocation_policy": self.get_core_allocation_policy(),

            "core_ids": self.get_core_ids(),

            "num_cores": self.get_num_cores(),

            "forced_npu_bundle_index": self.get_forced_npu_bundle_index(),

        }

        return "{}({})".format(

            self.__class__.__name__,

            ", ".join("{}={}".format(k, v) for k, v in d.items()),

        )


class LogLevel(Enum):

    """@brief LogLevel"""


    DEBUG = _cQbRuntime.LogLevel.DEBUG

    INFO = _cQbRuntime.LogLevel.INFO

    WARN = _cQbRuntime.LogLevel.WARN

    ERR = _cQbRuntime.LogLevel.ERR

    FATAL = _cQbRuntime.LogLevel.FATAL

    OFF = _cQbRuntime.LogLevel.OFF


def set_log_level(level: LogLevel):

    _cQbRuntime.set_log_level(level.value)


class CacheType(Enum):

    """@brief CacheType"""


    Default = _cQbRuntime.CacheType.Default

    Batch = _cQbRuntime.CacheType.Batch

    Error = _cQbRuntime.CacheType.Error


class CacheInfo:

    """@brief Struct representing KV-cache information."""


    def __init__(

        self,

        cache_type: CacheType = CacheType.Error,

        name: str = "",

        layer_hash: str = "",

        size: int = 0,

        num_batches: int = 0,

    ):

        self._cache_info = _cQbRuntime.CacheInfo()

        self._cache_info.cache_type = cache_type.value

        self._cache_info.name = name

        self._cache_info.layer_hash = layer_hash

        self._cache_info.size = size

        self._cache_info.num_batches = num_batches


    @classmethod

    def from_cpp(cls, _cache_info: _cQbRuntime.CacheInfo):

        return cls(

            CacheType(_cache_info.cache_type),

            _cache_info.name,

            _cache_info.layer_hash,

            _cache_info.size,

            _cache_info.num_batches,

        )


    @property

    def cache_type(self) -> CacheType:

        return CacheType(self._cache_info.cache_type)


    @property

    def name(self) -> str:

        return self._cache_info.name


    @property

    def layer_hash(self) -> str:

        return self._cache_info.layer_hash


    @property

    def size(self) -> int:

        return self._cache_info.size


    @property

    def num_batches(self) -> int:

        return self._cache_info.num_batches


    @cache_type.setter

    def cache_type(self, value: CacheType):

        self._cache_info.cache_type = value.value


    @name.setter

    def name(self, value: str):

        self._cache_info.name = value


    @layer_hash.setter

    def layer_hash(self, value: str):

        self._cache_info.layer_hash = value


    @size.setter

    def size(self, value: int):

        self._cache_info.size = value


    @num_batches.setter

    def num_batches(self, value: int):

        self._cache_info.num_batches = value


class DataType(Enum):

    """@brief DataType"""


    Float32 = _cQbRuntime.DataType.Float32

    Float16 = _cQbRuntime.DataType.Float16

    Int8 = _cQbRuntime.DataType.Int8

    Uint8 = _cQbRuntime.DataType.Uint8

    Error = _cQbRuntime.DataType.Error


def start_tracing_events(path: str) -> bool:

    """

    @brief Starts event tracing and prepares to save the trace log to a specified file.


    The trace log is recorded in "Chrome Tracing JSON format," which can be

    viewed at https://ui.perfetto.dev/.


    The trace log is not written immediately; it is saved only when

    stop_tracing_events() is called.


    @param[in] path The file path where the trace log should be stored.

    @return True if tracing starts successfully, False otherwise.

    """

    return _cQbRuntime.start_tracing_events(path)


def stop_tracing_events():

    """

    @brief Stops event tracing and writes the recorded trace log.


    This function finalizes tracing and saves the collected trace data

    to the file specified when start_tracing_events() was called.

    """

    _cQbRuntime.stop_tracing_events()


def get_model_summary(mxq_path: str) -> str:

    """

    @brief Generates a structured summary of the specified MXQ model.


    Returns an overview of the model contained in the MXQ file, including:

    - Target NPU hardware

    - Supported core modes and their associated cores

    - The total number of model variants

    - For each variant:

        - Input and output tensor shapes

        - A list of layers with their types, output shapes, and input layer indices


    The summary is returned as a human-readable string in a table and is useful for

    inspecting model compatibility, structure, and input/output shapes.


    @param[in] mxq_path Path to the MXQ model file.

    @return A formatted string containing the model summary.

    """

    return _cQbRuntime.get_model_summary(mxq_path)


def get_available_device_numbers() -> List[int]:

    """

    @brief Get the number of available NPU devices.


    @return The number of available NPU devices.

    """

    return _cQbRuntime.get_available_device_numbers()


qbruntime.type.BufferInfo
Struct representing input/output buffer information.
Definition type.py:219

qbruntime.type.BufferInfo.reshaped_width
int reshaped_width(self)
Width of reshaped input/output.
Definition type.py:292

qbruntime.type.BufferInfo.max_channel
int max_channel(self)
Maximum channel of original input/output if data is sequential.
Definition type.py:327

qbruntime.type.BufferInfo.original_height
int original_height(self)
Height of original input/output.
Definition type.py:272

qbruntime.type.BufferInfo.width
int width(self)
Width of NPU input/output.
Definition type.py:307

qbruntime.type.BufferInfo.max_cache_size
int max_cache_size(self)
Maximum KV-cache size, relevant for LLM models using KV cache.
Definition type.py:332

qbruntime.type.BufferInfo.max_height
int max_height(self)
Maximum height of original input/output if data is sequential.
Definition type.py:317

qbruntime.type.BufferInfo.original_size
int original_size(self)
Returns the total size of the original input/output.
Definition type.py:388

qbruntime.type.BufferInfo.height
int height(self)
Height of NPU input/output.
Definition type.py:302

qbruntime.type.BufferInfo.original_width
int original_width(self)
Width of original input/output.
Definition type.py:277

qbruntime.type.BufferInfo._buffer_info
_buffer_info
Definition type.py:238

qbruntime.type.BufferInfo.channel
int channel(self)
Channel of NPU input/output.
Definition type.py:312

qbruntime.type.BufferInfo.size
int size(self)
Returns the total size of the NPU input/output.
Definition type.py:404

qbruntime.type.BufferInfo.max_width
int max_width(self)
Maximum width of original input/output if data is sequential.
Definition type.py:322

qbruntime.type.BufferInfo.reshaped_channel
int reshaped_channel(self)
Channel of reshaped input/output.
Definition type.py:297

qbruntime.type.BufferInfo.original_channel
int original_channel(self)
Channel of original input/output.
Definition type.py:282

qbruntime.type.BufferInfo.reshaped_size
int reshaped_size(self)
Returns the total size of the reshaped input/output.
Definition type.py:396

qbruntime.type.BufferInfo.reshaped_height
int reshaped_height(self)
Height of reshaped input/output.
Definition type.py:287

qbruntime.type.Buffer
A simple byte-sized buffer.
Definition type.py:171

qbruntime.type.Buffer._buffer
_buffer
Definition type.py:179

qbruntime.type.CacheInfo
Struct representing KV-cache information.
Definition type.py:762

qbruntime.type.CacheInfo._cache_info
_cache_info
Definition type.py:773

qbruntime.type.CacheType
CacheType.
Definition type.py:754

qbruntime.type.Cluster
Enumerates clusters in the ARIES NPU.
Definition type.py:17

qbruntime.type.CoreAllocationPolicy
Core allocation policy.
Definition type.py:49

qbruntime.type.CoreId
Represents a unique identifier for an NPU core.
Definition type.py:114

qbruntime.type.CoreId.__eq__
bool __eq__(self, other)
Checks if two CoreId objects are equal.
Definition type.py:147

qbruntime.type.CoreId.cluster
Cluster cluster(self)
Definition type.py:132

qbruntime.type.CoreId._core_id
_core_id
Definition type.py:123

qbruntime.type.CoreId.core
Core core(self)
Definition type.py:136

qbruntime.type.CoreId.__lt__
bool __lt__(self, other)
Compares two CoreId objects for ordering.
Definition type.py:155

qbruntime.type.CoreMode
Defines the core mode for NPU execution.
Definition type.py:196

qbruntime.type.Core
Enumerates cores within a cluster in the ARIES NPU.
Definition type.py:31

qbruntime.type.DataType
DataType.
Definition type.py:831

qbruntime.type.LogLevel
LogLevel.
Definition type.py:739

qbruntime.type.ModelConfig
Configures a core mode and core allocation of a model for NPU inference.
Definition type.py:452

qbruntime.type.ModelConfig.get_core_ids
List[CoreId] get_core_ids(self)
Returns the list of NPU CoreIds to be used for model inference.
Definition type.py:709

qbruntime.type.ModelConfig.get_forced_npu_bundle_index
bool get_forced_npu_bundle_index(self)
Retrieves the index of the forced NPU bundle.
Definition type.py:622

qbruntime.type.ModelConfig.set_global8_core_mode
bool set_global8_core_mode(self)
Sets the model to use global8-core mode for inference.
Definition type.py:543

qbruntime.type.ModelConfig.set_multi_core_mode
bool set_multi_core_mode(self, List[Cluster] clusters=[Cluster.Cluster0, Cluster.Cluster1])
Sets the model to use multi-core mode for batch inference.
Definition type.py:567

qbruntime.type.ModelConfig.set_global_core_mode
bool set_global_core_mode(self, List[Cluster] clusters)
Definition type.py:522

qbruntime.type.ModelConfig.force_single_npu_bundle
bool force_single_npu_bundle(self, int npu_bundle_index)
Forces the use of a specific NPU bundle.
Definition type.py:603

qbruntime.type.ModelConfig.__init__
__init__(self, Optional[int] num_cores=None)
Default constructor.
Definition type.py:465

qbruntime.type.ModelConfig.get_core_mode
CoreMode get_core_mode(self)
Gets the core mode to be applied to the model.
Definition type.py:555

qbruntime.type.ModelConfig.set_async_pipeline_enabled
None set_async_pipeline_enabled(self, bool enable)
Enables or disables the asynchronous pipeline required for asynchronous inference.
Definition type.py:634

qbruntime.type.ModelConfig.get_async_pipeline_enabled
bool get_async_pipeline_enabled(self)
Returns whether the asynchronous pipeline is enabled in this configuration.
Definition type.py:651

qbruntime.type.ModelConfig.get_activation_slots
int get_activation_slots(self)
Returns activation buffer slot count.
Definition type.py:681

qbruntime.type.ModelConfig._model_config
tuple _model_config
Definition type.py:470

qbruntime.type.ModelConfig.set_auto_core_mode
bool set_auto_core_mode(self)
Sets the model to detect CoreMode automatically.
Definition type.py:476

qbruntime.type.ModelConfig.set_activation_slots
None set_activation_slots(self, int num)
Sets activation buffer slots for multi-activation supported model.
Definition type.py:659

qbruntime.type.ModelConfig.get_num_cores
int get_num_cores(self)
Gets the number of cores to be allocated for the model.
Definition type.py:592

qbruntime.type.ModelConfig.set_global4_core_mode
bool set_global4_core_mode(self, List[Cluster] clusters=[Cluster.Cluster0, Cluster.Cluster1])
Sets the model to use global4-core mode for inference with a specified set of NPU clusters.
Definition type.py:528

qbruntime.type.ModelConfig.get_core_allocation_policy
CoreAllocationPolicy get_core_allocation_policy(self)
Gets the core allocation policy to be applied to the model.
Definition type.py:581

qbruntime.type.ModelConfig.set_single_core_mode
bool set_single_core_mode(self, Optional[int] num_cores=None, Optional[List[CoreId]] core_ids=None)
Sets the model to use single-core mode for inference with a specified number of local cores.
Definition type.py:493

qbruntime.type.Scale
Struct for scale values.
Definition type.py:56

qbruntime.type.Scale.is_uniform
bool is_uniform(self)
Definition type.py:78

qbruntime.type.Scale._scale
_scale
Definition type.py:60

qbruntime.type.Scale.scale
float scale(self)
Definition type.py:74

qbruntime.type.Scale.__getitem__
float __getitem__(self, int i)
Returns the scale value at the specified index.
Definition type.py:93

qbruntime.type.Scale.scale_list
List[float] scale_list(self)
Definition type.py:70

qbruntime.type.get_available_device_numbers
List[int] get_available_device_numbers()
Get the number of available NPU devices.
Definition type.py:888

qbruntime.type.get_model_summary
str get_model_summary(str mxq_path)
Generates a structured summary of the specified MXQ model.
Definition type.py:867

qbruntime.type.start_tracing_events
bool start_tracing_events(str path)
Starts event tracing and prepares to save the trace log to a specified file.
Definition type.py:841

qbruntime.type.stop_tracing_events
stop_tracing_events()
Stops event tracing and writes the recorded trace log.
Definition type.py:857