doxygen/html_en/type_8h_source.html

// Copyright ⓒ 2019- Mobilint Inc. All rights reserved.


#ifndef QBRUNTIME_TYPE_H_

#define QBRUNTIME_TYPE_H_


#include <cstdint>

#include <string>

#include <tuple>

#include <vector>


#include "qbruntime/export.h"


namespace mobilint {


QBRUNTIME_EXPORT std::string getQbRuntimeVersion();


QBRUNTIME_EXPORT std::string getQbRuntimeGitVersion();


QBRUNTIME_EXPORT std::string getQbRuntimeVendor();


QBRUNTIME_EXPORT std::string getQbRuntimeProduct();


enum class Cluster : int32_t {

    Cluster0 = 1 << 16,

    Cluster1 = 2 << 16,

    Error = 0x7FFF'0000,

};


enum class Core : int32_t {

    Core0 = 1,

    Core1 = 2,

    Core2 = 3,

    Core3 = 4,

    All = 0x0000'FFFC,

    GlobalCore = 0x0000'FFFE,

    Error = 0x0000'FFFF,

};


enum class CoreAllocationPolicy {

    Auto,

    Manual,

};


struct Scale {

    std::vector<float> scale_list;

    float scale = 0.0F;

    bool is_uniform = false;


    float operator[](int i) const {

        if (is_uniform) {

            return scale;

        }

        return scale_list[i];

    }


};


struct CoreId {

    Cluster cluster = Cluster::Error;

    Core core = Core::Error;


    bool operator==(const CoreId& rhs) const {

        return std::tie(cluster, core) == std::tie(rhs.cluster, rhs.core);

    }


    bool operator<(const CoreId& rhs) const {

        return std::tie(cluster, core) < std::tie(rhs.cluster, rhs.core);

    }


};


struct Buffer {

    int8_t* data = nullptr;

    uint64_t size = 0;

};


enum class CoreMode : uint8_t {

    Single = 0,

    Multi = 1,

    Global = 2,

    Global4 = 3,

    Global8 = 4,

    Auto = 0xE,

    Error = 0xF,

};


struct BufferInfo {

    // clang-format off

    uint32_t original_height = 0;

    uint32_t original_width = 0;

    uint32_t original_channel = 0;

    uint32_t reshaped_height = 0;

    uint32_t reshaped_width = 0;

    uint32_t reshaped_channel = 0;

    uint32_t height = 0;

    uint32_t width = 0;

    uint32_t channel = 0;

    uint32_t max_height = 0;

    uint32_t max_width = 0;

    uint32_t max_channel = 0;

    uint32_t max_cache_size = 0;

    // clang-format on


    uint32_t original_size() const {

        return original_height * original_width * original_channel;

    }


    uint32_t reshaped_size() const {

        return reshaped_height * reshaped_width * reshaped_channel;

    }


    uint32_t size() const { return height * width * channel; }

};


class QBRUNTIME_EXPORT ModelConfig {

public:

    ModelConfig();


    bool setAutoCoreMode();


    bool setSingleCoreMode(int num_cores);


    bool setSingleCoreMode(std::vector<CoreId> core_ids);


    bool setMultiCoreMode(std::vector<Cluster> clusters = {Cluster::Cluster0,

                                                           Cluster::Cluster1});


    bool setGlobal4CoreMode(std::vector<Cluster> clusters = {Cluster::Cluster0,

                                                             Cluster::Cluster1});


    bool setGlobal8CoreMode();


    CoreMode getCoreMode() const { return mCoreMode; }


    CoreAllocationPolicy getCoreAllocationPolicy() const { return mCoreAllocationPolicy; }


    int getNumCores() const { return mNumCores; }


    bool forceSingleNPUBundle(int npu_bundle_index);


    int getForcedNPUBundleIndex() const { return mForcedNPUBundleIndex; }


    const std::vector<CoreId>& getCoreIds() const { return mCoreIds; }


    const std::vector<Cluster>& getClusters() const { return mClusters; }


    void setAsyncPipelineEnabled(bool enable);


    bool getAsyncPipelineEnabled() const { return mAsyncPipelineEnabled; }


    void setActivationSlots(int count);


    int getActivationSlots() const { return mActivationSlots; }


    explicit ModelConfig(int num_cores);


    bool setGlobalCoreMode(std::vector<Cluster> clusters);


    std::vector<uint64_t> early_latencies;

    std::vector<uint64_t> finish_latencies;


private:

    CoreMode mCoreMode = CoreMode::Auto;

    CoreAllocationPolicy mCoreAllocationPolicy = CoreAllocationPolicy::Manual;

    std::vector<Cluster> mClusters;

    std::vector<CoreId> mCoreIds;

    int mNumCores;

    int mForcedNPUBundleIndex = -1;  // -1 means single npu bundle usage is not forced.

    bool mAsyncPipelineEnabled = false;

    int mActivationSlots = 1;

};


enum class LogLevel : char {

    DEBUG = 1,

    INFO = 2,

    WARN = 3,

    ERR = 4,

    FATAL = 5,

    OFF = 6,

};


enum class CacheType : uint8_t { Default = 0, Batch, Error = 0x0F };


struct CacheInfo {

    CacheType cache_type = CacheType::Error;

    std::string name;

    std::string layer_hash;

    uint64_t size = 0;

    size_t num_batches = 0;

};


enum class DataType : uint8_t { Float32 = 0, Float16, Int8, Uint8, Error = 0x0F };


QBRUNTIME_EXPORT void setLogLevel(LogLevel level);


QBRUNTIME_EXPORT bool startTracingEvents(const char* path);


QBRUNTIME_EXPORT void stopTracingEvents();


QBRUNTIME_EXPORT std::string getModelSummary(const std::string& mxq_path);


QBRUNTIME_EXPORT std::vector<int> getAvailableDeviceNumbers();


}  // namespace mobilint


#endif

mobilint::ModelConfig::setGlobal8CoreMode
bool setGlobal8CoreMode()
Sets the model to use global8-core mode for inference.

mobilint::ModelConfig::setGlobalCoreMode
bool setGlobalCoreMode(std::vector< Cluster > clusters)

mobilint::ModelConfig::getCoreMode
CoreMode getCoreMode() const
Gets the core mode to be applied to the model.
Definition type.h:334

mobilint::ModelConfig::getAsyncPipelineEnabled
bool getAsyncPipelineEnabled() const
Returns whether the asynchronous pipeline is enabled in this configuration.
Definition type.h:420

mobilint::ModelConfig::setGlobal4CoreMode
bool setGlobal4CoreMode(std::vector< Cluster > clusters={Cluster::Cluster0, Cluster::Cluster1})
Sets the model to use global4-core mode for inference with a specified set of NPU clusters.

mobilint::ModelConfig::setAutoCoreMode
bool setAutoCoreMode()
Sets the model to detect CoreMode automatically.

mobilint::ModelConfig::setAsyncPipelineEnabled
void setAsyncPipelineEnabled(bool enable)
Enables or disables the asynchronous pipeline required for asynchronous inference.

mobilint::ModelConfig::ModelConfig
ModelConfig(int num_cores)

mobilint::ModelConfig::setSingleCoreMode
bool setSingleCoreMode(int num_cores)
Sets the model to use single-core mode for inference with a specified number of local cores.

mobilint::ModelConfig::ModelConfig
ModelConfig()
Default constructor. This default-constructed object is initially set to auto-core mode.

mobilint::ModelConfig::setMultiCoreMode
bool setMultiCoreMode(std::vector< Cluster > clusters={Cluster::Cluster0, Cluster::Cluster1})
Sets the model to use multi-core mode for batch inference.

mobilint::ModelConfig::getActivationSlots
int getActivationSlots() const
Returns activation buffer slot count.
Definition type.h:451

mobilint::ModelConfig::getCoreIds
const std::vector< CoreId > & getCoreIds() const
Returns the list of NPU CoreIds to be used for model inference.
Definition type.h:394

mobilint::ModelConfig::early_latencies
std::vector< uint64_t > early_latencies
Definition type.h:460

mobilint::ModelConfig::finish_latencies
std::vector< uint64_t > finish_latencies
Definition type.h:464

mobilint::ModelConfig::setSingleCoreMode
bool setSingleCoreMode(std::vector< CoreId > core_ids)
Sets the model to use single-core mode for inference with a specific set of NPU local cores.

mobilint::ModelConfig::getCoreAllocationPolicy
CoreAllocationPolicy getCoreAllocationPolicy() const
Gets the core allocation policy to be applied to the model.
Definition type.h:344

mobilint::ModelConfig::forceSingleNPUBundle
bool forceSingleNPUBundle(int npu_bundle_index)
Forces the use of a specific NPU bundle.

mobilint::ModelConfig::getNumCores
int getNumCores() const
Gets the number of cores to be allocated for the model.
Definition type.h:354

mobilint::ModelConfig::getForcedNPUBundleIndex
int getForcedNPUBundleIndex() const
Retrieves the index of the forced NPU bundle.
Definition type.h:382

mobilint::ModelConfig::setActivationSlots
void setActivationSlots(int count)
Sets activation buffer slots for multi-activation supported model.

mobilint::DataType
DataType
DataType.
Definition type.h:508

mobilint::getQbRuntimeProduct
QBRUNTIME_EXPORT std::string getQbRuntimeProduct()
Retrieves product information of the qbruntime.

mobilint::startTracingEvents
QBRUNTIME_EXPORT bool startTracingEvents(const char *path)
Starts event tracing and prepares to save the trace log to a specified file.

mobilint::getAvailableDeviceNumbers
QBRUNTIME_EXPORT std::vector< int > getAvailableDeviceNumbers()
Get the number of available NPU devices.

mobilint::getModelSummary
QBRUNTIME_EXPORT std::string getModelSummary(const std::string &mxq_path)
Generates a structured summary of the specified MXQ model.

mobilint::Cluster
Cluster
Enumerates clusters in the ARIES NPU.
Definition type.h:64

mobilint::stopTracingEvents
QBRUNTIME_EXPORT void stopTracingEvents()
Stops event tracing and writes the recorded trace log.

mobilint::getQbRuntimeVendor
QBRUNTIME_EXPORT std::string getQbRuntimeVendor()
Retrieves the vendor name of the qbruntime.

mobilint::CacheType
CacheType
CacheType.
Definition type.h:492

mobilint::LogLevel
LogLevel
LogLevel.
Definition type.h:480

mobilint::CoreMode
CoreMode
Defines the core mode for NPU execution.
Definition type.h:170

mobilint::getQbRuntimeGitVersion
QBRUNTIME_EXPORT std::string getQbRuntimeGitVersion()
Retrieves the Git commit hash of the qbruntime.

mobilint::getQbRuntimeVersion
QBRUNTIME_EXPORT std::string getQbRuntimeVersion()
Retrieves the version of the qbruntime.

mobilint::CoreAllocationPolicy
CoreAllocationPolicy
Core allocation policy.
Definition type.h:90

mobilint::Core
Core
Enumerates cores within a cluster in the ARIES NPU.
Definition type.h:77

mobilint::Cluster::Cluster1
@ Cluster1
Definition type.h:66

mobilint::Cluster::Error
@ Error
Definition type.h:67

mobilint::Cluster::Cluster0
@ Cluster0
Definition type.h:65

mobilint::CoreMode::Auto
@ Auto
Definition type.h:176

mobilint::CoreMode::Global8
@ Global8
Definition type.h:175

mobilint::CoreMode::Global4
@ Global4
Definition type.h:174

mobilint::CoreMode::Global
@ Global
Definition type.h:173

mobilint::CoreMode::Single
@ Single
Definition type.h:171

mobilint::CoreMode::Multi
@ Multi
Definition type.h:172

mobilint::CoreAllocationPolicy::Auto
@ Auto
Definition type.h:91

mobilint::CoreAllocationPolicy::Manual
@ Manual
Definition type.h:92

mobilint::Core::Core1
@ Core1
Definition type.h:79

mobilint::Core::Core3
@ Core3
Definition type.h:81

mobilint::Core::GlobalCore
@ GlobalCore
Definition type.h:83

mobilint::Core::Core0
@ Core0
Definition type.h:78

mobilint::Core::Error
@ Error
Definition type.h:84

mobilint::Core::All
@ All
Definition type.h:82

mobilint::Core::Core2
@ Core2
Definition type.h:80

mobilint::BufferInfo
Struct representing input/output buffer information.
Definition type.h:183

mobilint::BufferInfo::original_height
uint32_t original_height
Definition type.h:185

mobilint::BufferInfo::max_cache_size
uint32_t max_cache_size
Definition type.h:197

mobilint::BufferInfo::width
uint32_t width
Definition type.h:192

mobilint::BufferInfo::reshaped_channel
uint32_t reshaped_channel
Definition type.h:190

mobilint::BufferInfo::height
uint32_t height
Definition type.h:191

mobilint::BufferInfo::reshaped_height
uint32_t reshaped_height
Definition type.h:188

mobilint::BufferInfo::original_size
uint32_t original_size() const
Returns the total size of the original input/output.
Definition type.h:204

mobilint::BufferInfo::max_channel
uint32_t max_channel
Definition type.h:196

mobilint::BufferInfo::original_channel
uint32_t original_channel
Definition type.h:187

mobilint::BufferInfo::channel
uint32_t channel
Definition type.h:193

mobilint::BufferInfo::reshaped_size
uint32_t reshaped_size() const
Returns the total size of the reshaped input/output.
Definition type.h:212

mobilint::BufferInfo::max_width
uint32_t max_width
Definition type.h:195

mobilint::BufferInfo::size
uint32_t size() const
Returns the total size of the NPU input/output.
Definition type.h:220

mobilint::BufferInfo::original_width
uint32_t original_width
Definition type.h:186

mobilint::BufferInfo::max_height
uint32_t max_height
Definition type.h:194

mobilint::BufferInfo::reshaped_width
uint32_t reshaped_width
Definition type.h:189

mobilint::Buffer
A simple byte-sized buffer.
Definition type.h:153

mobilint::Buffer::size
uint64_t size
Definition type.h:155

mobilint::Buffer::data
int8_t * data
Definition type.h:154

mobilint::CacheInfo
Struct representing KV-cache information.
Definition type.h:497

mobilint::CoreId
Represents a unique identifier for an NPU core.
Definition type.h:123

mobilint::CoreId::core
Core core
Definition type.h:125

mobilint::CoreId::operator<
bool operator<(const CoreId &rhs) const
Compares two CoreId objects for ordering.
Definition type.h:143

mobilint::CoreId::operator==
bool operator==(const CoreId &rhs) const
Checks if two CoreId objects are equal.
Definition type.h:133

mobilint::CoreId::cluster
Cluster cluster
Definition type.h:124

mobilint::Scale
Struct for scale values.
Definition type.h:98

mobilint::Scale::operator[]
float operator[](int i) const
Returns the scale value at the specified index.
Definition type.h:109

mobilint::Scale::scale
float scale
Definition type.h:100

mobilint::Scale::is_uniform
bool is_uniform
Definition type.h:101

mobilint::Scale::scale_list
std::vector< float > scale_list
Definition type.h:99