You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
569 lines
19 KiB
569 lines
19 KiB
/*
|
|
** Copyright (c) AImotive Kft. 2020
|
|
**
|
|
** The intellectual and technical concepts and implementations contained herein (including
|
|
** data structures, algorithms and essential business logic developed by AImotive Kft.) are
|
|
** proprietary to AImotive Kft., and may be covered by patents, and/or copyright law. This
|
|
** hardware or software is protected by trade secret, confidential business secret and as a
|
|
** general principle must be treated as confidential information.
|
|
**
|
|
** You may not use this hardware or software without specific prior written permission
|
|
** obtained from AImotive Kft.
|
|
**
|
|
** Access to this hardware or software is hereby forbidden to anyone except for Contracted
|
|
** Partners who have prior signed License Agreement, or Confidentiality, Non-Disclosure
|
|
** Agreements or any other equivalent Agreements explicitly covering such access and use.
|
|
**
|
|
** UNLESS OTHERWISE AGREED, THIS HARDWARE OR SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS
|
|
** OR IMPLIED WARRANTIES, INCLUDING - BUT NOT LIMITED TO - THE IMPLIED WARRANTIES OF
|
|
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
*/
|
|
|
|
/*
|
|
* This interface is th base for every system which is able to execute a neural network
|
|
* workload with aiWare arithmetic.
|
|
*/
|
|
#ifndef AIWARE_RUNTIME__RUNTIME_INF_HPP
|
|
#define AIWARE_RUNTIME__RUNTIME_INF_HPP
|
|
|
|
#include "aiware/common/c/status.h"
|
|
#include "aiware/common/c/tensordimensions.h"
|
|
#include "aiware/runtime/c/selftest.h"
|
|
#include "aiware/runtime/cpp/aiware-runtime-inf-lib-cpp_export.h"
|
|
|
|
#include <chrono>
|
|
#include <cstdint>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace aiware
|
|
{
|
|
namespace runtime
|
|
{
|
|
namespace inf
|
|
{
|
|
|
|
class Buffer;
|
|
class Program;
|
|
class Status;
|
|
|
|
/// An interface for any kind of device which is capable of performing neural network
|
|
/// workloads with aiWare arithmetic.
|
|
///
|
|
/// Each device has a set of programs associated with itself. These programs represent
|
|
/// the workloads, and the device provides methods to query and manage them. The device
|
|
/// is also responsible for destroying these programs.
|
|
class AIWARE_RUNTIME_INF_LIB_CPP_EXPORT Device
|
|
{
|
|
public:
|
|
virtual ~Device();
|
|
|
|
Device(const Device&) = delete;
|
|
Device& operator=(const Device&) = delete;
|
|
Device(Device&&) = delete;
|
|
Device& operator=(Device&&) = delete;
|
|
|
|
/// Returns the number of Programs associated with this device instance.
|
|
virtual uint32_t programCount() const = 0;
|
|
|
|
/// Returns the program identified by the given index, or nullptr if the index is out of range.
|
|
virtual const Program* program(uint32_t index) const = 0;
|
|
|
|
/// Returns the program identified by the given index, or nullptr if the index is out of range.
|
|
virtual Program* program(uint32_t index) = 0;
|
|
|
|
/// Deletes the given program.
|
|
///
|
|
/// The default implementation doesn't do anything and returns false. The derived classes
|
|
/// may override this behaviour with something meaningful. These implementation should take
|
|
/// care of only such programs can be deleted by a device which are associated with it.
|
|
virtual bool deleteProgram(Program* program);
|
|
|
|
/// Returns if the device supports tensor export or not.
|
|
///
|
|
/// The default implementation doesn't do anything and returns false. The derived classes
|
|
/// may override this behaviour with something meaningful.
|
|
virtual bool tensorExportSupported() const;
|
|
|
|
/// Sets if the selftest should be executed
|
|
/// before a program or not.
|
|
/// Returns true if the operation was successful.
|
|
virtual bool setSelftestExecutionPolicy(aiwSelftestExecutionPolicy policy);
|
|
|
|
/// Returns the selftest execution policy.
|
|
/// If the operation was successful, then the policy
|
|
/// will be stored in the policy parameter.
|
|
/// Returns true if the operation was successful.
|
|
virtual bool selftestExecutionPolicy(aiwSelftestExecutionPolicy& policy) const;
|
|
|
|
/// Manually executes the selftest program.
|
|
/// Return value is the same as the execute() method.
|
|
virtual Status executeSelftest();
|
|
|
|
protected:
|
|
Device();
|
|
};
|
|
|
|
/// Internal helper for implementing RAII style raw buffer pointer management.
|
|
///
|
|
/// #BufferPtr will call the #acquire method during its construction, which will
|
|
/// get a raw pointer. This pointer will be released in the destructor by calling
|
|
/// this class' #release method.
|
|
///
|
|
/// If the buffer is pinned, then the #acquire method shall return the same pointer
|
|
/// each time it's called.
|
|
class AIWARE_RUNTIME_INF_LIB_CPP_EXPORT BufferOperations
|
|
{
|
|
public:
|
|
virtual ~BufferOperations();
|
|
|
|
BufferOperations(const BufferOperations&) = delete;
|
|
BufferOperations(BufferOperations&&) = delete;
|
|
|
|
BufferOperations& operator=(const BufferOperations&) = delete;
|
|
BufferOperations& operator=(BufferOperations&&) = delete;
|
|
|
|
/// Returns the size of the buffer in bytes.
|
|
virtual uint64_t size() = 0;
|
|
|
|
/// Gets the raw pointer to a buffer which can be used until
|
|
/// the #release method is called (unless it's a pinned buffer).
|
|
virtual void* acquire() = 0;
|
|
|
|
/// Releases the acquired pointer which shouldn't be used anymore
|
|
/// (unless it's a pinned buffer).
|
|
virtual void release() = 0;
|
|
|
|
/// If returns true, then #acquire method shall return the same pointer
|
|
/// every time, and this pointer can be cached. Otherwise the #acquire
|
|
/// may return different pointers, and they can be used only until #release
|
|
/// is called.
|
|
virtual bool pinned() const = 0;
|
|
|
|
protected:
|
|
BufferOperations();
|
|
};
|
|
|
|
/// Wrapper class for a buffers memory location.
|
|
///
|
|
/// Don't store the raw pointer this object can be casted to unless the
|
|
/// parent buffer is a pinned one.
|
|
///
|
|
/// The const version of this object has only const conversion operators,
|
|
/// while the non-const version has both.
|
|
///
|
|
/// The class has conversion operator for multiple raw pointer types, but
|
|
/// it doesn't change the basic data type of the buffer, which can be
|
|
/// queried from the #Buffer class.
|
|
template<bool Const>
|
|
class AIWARE_RUNTIME_INF_LIB_CPP_EXPORT BufferPtr final
|
|
{
|
|
public:
|
|
BufferPtr() = delete;
|
|
BufferPtr(const BufferPtr&) = delete;
|
|
BufferPtr(BufferPtr&& other) noexcept;
|
|
|
|
BufferPtr& operator=(const BufferPtr&) = delete;
|
|
BufferPtr& operator=(BufferPtr&&) noexcept;
|
|
|
|
~BufferPtr();
|
|
|
|
operator const void*() const;
|
|
|
|
operator const uint8_t*() const;
|
|
|
|
operator const int8_t*() const;
|
|
|
|
operator const char*() const;
|
|
|
|
operator const float*() const;
|
|
|
|
template<bool B = Const, typename = typename std::enable_if<!B, void*>::type>
|
|
operator void*()
|
|
{
|
|
return _ptr;
|
|
}
|
|
|
|
template<bool B = Const, typename = typename std::enable_if<!B, uint8_t*>::type>
|
|
operator uint8_t*()
|
|
{
|
|
return static_cast<uint8_t*>(_ptr);
|
|
}
|
|
|
|
template<bool B = Const, typename = typename std::enable_if<!B, int8_t*>::type>
|
|
operator int8_t*()
|
|
{
|
|
return static_cast<int8_t*>(_ptr);
|
|
}
|
|
|
|
template<bool B = Const, typename = typename std::enable_if<!B, char*>::type>
|
|
operator char*()
|
|
{
|
|
return static_cast<char*>(_ptr);
|
|
}
|
|
|
|
template<bool B = Const, typename = typename std::enable_if<!B, float*>::type>
|
|
operator float*()
|
|
{
|
|
return static_cast<float*>(_ptr);
|
|
}
|
|
|
|
private:
|
|
BufferPtr(BufferOperations& ops);
|
|
|
|
friend class Buffer;
|
|
|
|
private:
|
|
BufferOperations* _ops = nullptr;
|
|
void* _ptr = nullptr;
|
|
};
|
|
|
|
/// Represents a data buffer associated with a tensor.
|
|
///
|
|
/// Through such an object the content of the tensor can be manipulated
|
|
/// directly. To do so, the layout of th buffer's data (the ordering) must
|
|
/// be taken into account. This ordering can be queried by the #ordering
|
|
/// method.
|
|
///
|
|
/// However aiWare like devices usually use int8 arithmetic, these buffer
|
|
/// can represent float data too if that's how the implementation defines.
|
|
/// In this case take care of using the #size method, which always returns
|
|
/// the size of the buffer in bytes instead of the element count.
|
|
///
|
|
/// The actual memory behind the buffer can be get by the #ptr functions
|
|
/// (const and non-const). Instead of returning actual raw pointers these
|
|
/// functions return wrapper objects, which can be casted to pointers. The
|
|
/// reason for this is these buffers may not pinned to a specific memory
|
|
/// location but changes time to time. These wrappers manage the acquiring
|
|
/// and releasing of these raw buffer in a RAII style. Thus the raw pointers
|
|
/// coming from these wrappers shouldn't be cached or stored, unless the
|
|
/// buffer is pinned (which means the buffer location doesn't change), so
|
|
/// it can be safely stored and used.
|
|
class AIWARE_RUNTIME_INF_LIB_CPP_EXPORT Buffer
|
|
{
|
|
public:
|
|
using ConstPtr = BufferPtr<true>;
|
|
using Ptr = BufferPtr<false>;
|
|
|
|
enum class Ordering : std::uint8_t
|
|
{
|
|
TileBase,
|
|
CellRowBased,
|
|
RowMajor
|
|
};
|
|
|
|
public:
|
|
virtual ~Buffer();
|
|
|
|
Buffer(const Buffer&) = delete;
|
|
Buffer& operator=(const Buffer&) = delete;
|
|
Buffer(Buffer&&) = delete;
|
|
Buffer& operator=(Buffer&&) = delete;
|
|
|
|
/// Returns the size of the buffer in bytes.
|
|
virtual uint64_t size() const = 0;
|
|
|
|
/// Returns the ordering of the underlying buffer.
|
|
virtual Ordering ordering() const = 0;
|
|
|
|
/// Returns whether the buffer is pinned (fixed location) or not.
|
|
bool pinned() const;
|
|
|
|
/// Returns a const pointer to the underlying raw buffer.
|
|
ConstPtr ptr() const;
|
|
|
|
/// Returns a non-const pointer to the underlying raw buffer.
|
|
Ptr ptr();
|
|
|
|
protected:
|
|
Buffer();
|
|
|
|
virtual const BufferOperations& bufferOperations() const = 0;
|
|
virtual BufferOperations& bufferOperations() = 0;
|
|
};
|
|
|
|
/// Represents an external memory descriptor for an aiWare memory tensor.
|
|
///
|
|
/// This object is used to store the export or import descriptors ofthe physical memory of
|
|
/// a given tensor. The descriptor can be used by other accelerators to access the memory area.
|
|
///
|
|
/// The descriptor is hardware platform and operating system specific.
|
|
class AIWARE_RUNTIME_INF_LIB_CPP_EXPORT ExternalMemoryDescriptor final
|
|
{
|
|
public:
|
|
ExternalMemoryDescriptor();
|
|
|
|
ExternalMemoryDescriptor(const ExternalMemoryDescriptor&);
|
|
ExternalMemoryDescriptor(ExternalMemoryDescriptor&&) noexcept;
|
|
|
|
ExternalMemoryDescriptor& operator=(const ExternalMemoryDescriptor&);
|
|
ExternalMemoryDescriptor& operator=(ExternalMemoryDescriptor&&) noexcept;
|
|
|
|
/// Returns the status of the descriptor, true if it's valid, false otherwise.
|
|
operator bool() const;
|
|
/// @brief Returns the descriptor to be used by other accelerators.
|
|
/// @return The platform-dependent descriptor object.
|
|
void* descriptor() const;
|
|
|
|
private:
|
|
ExternalMemoryDescriptor(bool status, void* descriptor = nullptr);
|
|
|
|
friend class Tensor;
|
|
|
|
private:
|
|
bool _status = false;
|
|
void* _descriptor = nullptr;
|
|
};
|
|
|
|
/// Represents a tensor of the neural network.
|
|
///
|
|
/// Tensor interface has 3 group of member functions: the first group
|
|
/// is the property getter functions. Note that some of these functions
|
|
/// may not implemented by the derived classes.
|
|
///
|
|
/// The second group of method provide an easy way to get or set the
|
|
/// tensor's data. These methods take an int8/uint8 or float buffer in
|
|
/// NCHW order and copy the data from it into the internal buffer or
|
|
/// vice versa, regardless of the internal data representation or layout.
|
|
/// All subclass must properly implement these methods.
|
|
///
|
|
/// The subclasses also shall provide access to these internal buffers,
|
|
/// where the data is actually stored. The third group of methods serve
|
|
/// this purpose.
|
|
class AIWARE_RUNTIME_INF_LIB_CPP_EXPORT Tensor
|
|
{
|
|
public:
|
|
enum class DataType : std::uint8_t
|
|
{
|
|
Int8,
|
|
Float32
|
|
};
|
|
|
|
public:
|
|
virtual ~Tensor();
|
|
|
|
Tensor(const Tensor&) = delete;
|
|
Tensor(Tensor&&) = delete;
|
|
Tensor& operator=(const Tensor&) = delete;
|
|
Tensor& operator=(Tensor&&) = delete;
|
|
|
|
/// Returns the id of the buffer if implemented, otherwise returns 0.
|
|
virtual uint32_t id() const;
|
|
|
|
/// Returns the name of the buffer if implemented, otherwise returns
|
|
/// an empty string.
|
|
virtual const std::string& name() const;
|
|
|
|
/// Returns the dimension of tensor if implemented, otherwise returns
|
|
/// an invalid dimension object (all dimensions are 0).
|
|
virtual const aiwTensorDimensions& dim() const;
|
|
|
|
/// Returns the original dimension of the tensor if implemented,
|
|
/// otherwise returns an empty vector.
|
|
///
|
|
/// The original dimension is the one can be found in the original
|
|
/// network descriptor file (i.e. NNEF graph file), not affected by
|
|
/// any transformations.
|
|
virtual std::vector<uint32_t> originalDim() const;
|
|
|
|
/// Returns the data type of the buffer if implemented, otherwise
|
|
/// returns DataType::Int8.
|
|
virtual DataType dataType() const;
|
|
|
|
/// Returns the sign of the tensor, or false if it's not implemented
|
|
/// in the subclasses. It also may return dummy value if the tensor's
|
|
/// data type is float.
|
|
virtual bool sign() const;
|
|
|
|
/// Returns the exponent of the tensor, or 0 if it's not implemented
|
|
/// in the subclasses. It also may return dummy value if the tensor's
|
|
/// data type is float.
|
|
virtual int8_t exponent() const;
|
|
|
|
/// Returns the size of the elements in the tensor.
|
|
///
|
|
/// Always returns a positive number, otherwise some error happened.
|
|
/// Note that it usually doesn't return the same number as #Buffer::size.
|
|
virtual uint32_t nchwSize() const = 0;
|
|
|
|
/// Sets the tensor's data from the passed NCHW ordered buffer, and
|
|
/// returns true if it was successful.
|
|
///
|
|
/// The passed buffer can't be null and must be at least #nchwSize element
|
|
/// large.
|
|
///
|
|
/// If the tensor's data type is float, then the function should do
|
|
/// anything, just return with false.
|
|
virtual bool setDataNCHW(const uint8_t* data) = 0;
|
|
|
|
/// Sets the tensor's data from the passed NCHW ordered buffer, and
|
|
/// returns true if it was successful.
|
|
///
|
|
/// The passed buffer can't be null and must be at least #nchwSize element
|
|
/// large.
|
|
///
|
|
/// This function always shall be properly implemented. If the tensor's
|
|
/// data type is int8, then he values will be quantized, otherwise simple
|
|
/// copy will be performed.
|
|
virtual bool setDataNCHW(const float* data) = 0;
|
|
|
|
/// Same as int8 variant of #setDataNCHW except if fills the passed buffer
|
|
/// with the tensor's data.
|
|
virtual bool getDataNCHW(uint8_t* data) const = 0;
|
|
|
|
/// Same as float variant of #setDataNCHW except if fills the passed buffer
|
|
/// with the tensor's data.
|
|
virtual bool getDataNCHW(float* data) const = 0;
|
|
|
|
/// Returns the buffer to the underlying raw data.
|
|
virtual const Buffer& rawBuffer() const = 0;
|
|
|
|
/// Returns the buffer to the underlying raw data.
|
|
virtual Buffer& rawBuffer() = 0;
|
|
|
|
/// Exports the tensor's memory descriptor.
|
|
virtual ExternalMemoryDescriptor exportMemory();
|
|
|
|
/// Releases the memory descriptor, optional.
|
|
virtual bool releaseExportedMemory(ExternalMemoryDescriptor& descriptor);
|
|
|
|
protected:
|
|
Tensor();
|
|
|
|
ExternalMemoryDescriptor createExtMemDescriptor(bool status, void* desc);
|
|
|
|
private:
|
|
static const std::string _sDummyName;
|
|
static const aiwTensorDimensions _sDummyDim;
|
|
};
|
|
|
|
/// Represents a return status code of an operation where the status can differ from success or error.
|
|
/// The status code can be used to determine the result of an operation but is also compatible with
|
|
/// the "classical" SUCCESS/ERROR status codes.
|
|
class AIWARE_RUNTIME_INF_LIB_CPP_EXPORT Status final
|
|
{
|
|
public:
|
|
Status();
|
|
Status(aiw_status s);
|
|
Status(bool s);
|
|
|
|
Status(const Status&);
|
|
Status(Status&&) noexcept;
|
|
|
|
Status& operator=(const Status&);
|
|
Status& operator=(Status&&) noexcept;
|
|
|
|
aiw_status statusCode() const;
|
|
|
|
bool success() const;
|
|
bool error() const;
|
|
bool timeout() const;
|
|
|
|
operator bool() const;
|
|
|
|
private:
|
|
aiw_status _s;
|
|
};
|
|
|
|
/// Represents and executable neural network workload associated
|
|
/// with a device.
|
|
class AIWARE_RUNTIME_INF_LIB_CPP_EXPORT Program
|
|
{
|
|
public:
|
|
virtual ~Program();
|
|
|
|
Program(const Program&) = delete;
|
|
Program(Program&&) = delete;
|
|
Program& operator=(const Program&) = delete;
|
|
Program& operator=(Program&&) = delete;
|
|
|
|
/// Number of the input tensor, shall return a positive number,
|
|
/// otherwise some error happened.
|
|
virtual uint32_t inputTensorCount() const = 0;
|
|
|
|
/// Returns the input tensor identified by the given index, or nullptr
|
|
/// if index is out of range.
|
|
virtual const Tensor* inputTensor(uint32_t index) const = 0;
|
|
|
|
/// Returns the input tensor identified by the given index, or nullptr
|
|
/// if index is out of range.
|
|
virtual Tensor* inputTensor(uint32_t) = 0;
|
|
|
|
/// Number of the output tensor, shall return a positive number,
|
|
/// otherwise some error happened.
|
|
virtual uint32_t outputTensorCount() const = 0;
|
|
|
|
/// Returns the output tensor identified by the given index, or nullptr
|
|
/// if index is out of range.
|
|
virtual const Tensor* outputTensor(uint32_t index) const = 0;
|
|
|
|
/// Returns the output tensor identified by the given index, or nullptr
|
|
/// if index is out of range.
|
|
virtual Tensor* outputTensor(uint32_t) = 0;
|
|
|
|
/// Executes the workload, and returns the status.
|
|
virtual Status execute() = 0;
|
|
|
|
/// Starts the execution of the workload without blocking, and returns the status.
|
|
virtual Status executeAsync();
|
|
|
|
/// Waits for the execution of the workload to finish, and returns the status.
|
|
virtual Status await();
|
|
|
|
/// Checks if program supports asynchronous execution.
|
|
virtual bool asyncExecutionSupported() const;
|
|
|
|
/// Synchronizes the input tensors data between the host and the
|
|
/// device (from the host to the device) if necessary.
|
|
virtual Status uploadInputs() = 0;
|
|
|
|
/// Synchronizes the output tensors data between the host and the
|
|
/// device (from the device to the host) if necessary.
|
|
virtual Status downloadOutputs() = 0;
|
|
|
|
/// Returns whether setting memory transfer timeout is supported.
|
|
virtual bool memoryTransferTimeoutSupported() const;
|
|
|
|
/// Returns whether setting execution timeout is supported.
|
|
virtual bool executionTimeoutSupported() const;
|
|
|
|
/// Sets the memory transfer (DMA) timeout. Timeout 0 means no timeout.
|
|
/// Returns true if the operation was successful.
|
|
/// If memory transfer timeout isn't supported, then it always return false.
|
|
virtual bool setMemoryTransferTimeout(uint32_t transferTimeoutMs);
|
|
|
|
/// Sets the program execution timeout. Timeout 0 means no timeout.
|
|
/// Returns true if the operation was successful.
|
|
/// If execution timeout isn't supported, then it always return false.
|
|
virtual bool setExecutionTimeout(uint32_t executionTimeoutMs);
|
|
|
|
/// Gets the memory transfer (DMA) timeout.
|
|
/// Returns true if the operation was successful.
|
|
/// If memory transfer timeout isn't supported, then it always return false.
|
|
virtual bool getMemoryTransferTimeout(uint32_t& transferTimeoutMs) const;
|
|
|
|
/// Gets the program execution timeout.
|
|
/// Returns true if the operation was successful.
|
|
/// If execution timeout isn't supported, then it always return false.
|
|
virtual bool getExecutionTimeout(uint32_t& executionTimeoutMs) const;
|
|
|
|
/// Fine-grained selftest control.
|
|
/// If the selftest is enabled, then it will be executed
|
|
/// according to the device policy. If it's disabled, then
|
|
/// it won't be executed, regardless the policy - useful
|
|
/// for performance reasons when multiple networks are executed for a
|
|
/// single frame. By default the selftest is enabled.
|
|
/// Returns true if the operation was successful.
|
|
|
|
virtual bool enableSelftest(bool enabled);
|
|
|
|
protected:
|
|
Program();
|
|
};
|
|
|
|
} // namespace inf
|
|
} // namespace runtime
|
|
} // namespace aiware
|
|
|
|
#endif //AIWARE_RUNTIME__RUNTIME_INF_HPP
|