545 lines
25 KiB
Python
545 lines
25 KiB
Python
# ******************************************************************************
|
|
# Copyright (c) 2022. Kneron Inc. All rights reserved. *
|
|
# ******************************************************************************
|
|
from typing import List, Union
|
|
from utils.ExampleEnum import *
|
|
|
|
import numpy as np
|
|
import re
|
|
import os
|
|
import sys
|
|
import cv2
|
|
|
|
PWD = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.insert(1, os.path.join(PWD, '../..'))
|
|
|
|
import kp
|
|
|
|
TARGET_FW_VERSION = 'KDP2'
|
|
|
|
|
|
def get_device_usb_speed_by_port_id(usb_port_id: int) -> kp.UsbSpeed:
|
|
device_list = kp.core.scan_devices()
|
|
|
|
for device_descriptor in device_list.device_descriptor_list:
|
|
if 0 == usb_port_id:
|
|
return device_descriptor.link_speed
|
|
elif usb_port_id == device_descriptor.usb_port_id:
|
|
return device_descriptor.link_speed
|
|
|
|
raise IOError('Specified USB port ID {} not exist.'.format(usb_port_id))
|
|
|
|
|
|
def get_connect_device_descriptor(target_device: str,
|
|
scan_index_list: Union[List[int], None],
|
|
usb_port_id_list: Union[List[int], None]):
|
|
print('[Check Device]')
|
|
|
|
# scan devices
|
|
_device_list = kp.core.scan_devices()
|
|
|
|
# check Kneron device exist
|
|
if _device_list.device_descriptor_number == 0:
|
|
print('Error: no Kneron device !')
|
|
exit(0)
|
|
|
|
_index_device_descriptor_list = []
|
|
|
|
# get device_descriptor of specified scan index
|
|
if scan_index_list is not None:
|
|
for _scan_index in scan_index_list:
|
|
if _device_list.device_descriptor_number > _scan_index >= 0:
|
|
_index_device_descriptor_list.append([_scan_index, _device_list.device_descriptor_list[_scan_index]])
|
|
else:
|
|
print('Error: no matched Kneron device of specified scan index !')
|
|
exit(0)
|
|
# get device_descriptor of specified port ID
|
|
elif usb_port_id_list is not None:
|
|
for _scan_index, __device_descriptor in enumerate(_device_list.device_descriptor_list):
|
|
for _usb_port_id in usb_port_id_list:
|
|
if __device_descriptor.usb_port_id == _usb_port_id:
|
|
_index_device_descriptor_list.append([_scan_index, __device_descriptor])
|
|
|
|
if 0 == len(_index_device_descriptor_list):
|
|
print('Error: no matched Kneron device of specified port ID !')
|
|
exit(0)
|
|
# get device_descriptor of by default
|
|
else:
|
|
_index_device_descriptor_list = [[_scan_index, __device_descriptor] for _scan_index, __device_descriptor in
|
|
enumerate(_device_list.device_descriptor_list)]
|
|
|
|
# check device_descriptor is specified target device
|
|
if target_device.lower() == 'kl520':
|
|
_target_device_product_id = kp.ProductId.KP_DEVICE_KL520
|
|
elif target_device.lower() == 'kl720':
|
|
_target_device_product_id = kp.ProductId.KP_DEVICE_KL720
|
|
elif target_device.lower() == 'kl630':
|
|
_target_device_product_id = kp.ProductId.KP_DEVICE_KL630
|
|
elif target_device.lower() == 'kl730':
|
|
_target_device_product_id = kp.ProductId.KP_DEVICE_KL730
|
|
elif target_device.lower() == 'kl830':
|
|
_target_device_product_id = kp.ProductId.KP_DEVICE_KL830
|
|
|
|
for _scan_index, __device_descriptor in _index_device_descriptor_list:
|
|
if kp.ProductId(__device_descriptor.product_id) != _target_device_product_id:
|
|
print('Error: Not matched Kneron device of specified target device !')
|
|
exit(0)
|
|
|
|
for _scan_index, __device_descriptor in _index_device_descriptor_list:
|
|
if TARGET_FW_VERSION not in __device_descriptor.firmware:
|
|
print('Error: device is not running KDP2/KDP2 Loader firmware ...')
|
|
print('please upload firmware first via \'kp.core.load_firmware_from_file()\'')
|
|
exit(0)
|
|
|
|
print(' - Success')
|
|
|
|
return _index_device_descriptor_list
|
|
|
|
|
|
def read_image(img_path: str, img_type: str, img_format: str):
|
|
print('[Read Image]')
|
|
if img_type == ImageType.GENERAL.value:
|
|
_img = cv2.imread(filename=img_path)
|
|
|
|
if len(_img.shape) < 3:
|
|
channel_num = 2
|
|
else:
|
|
channel_num = _img.shape[2]
|
|
|
|
if channel_num == 1:
|
|
if img_format == ImageFormat.RGB565.value:
|
|
color_cvt_code = cv2.COLOR_GRAY2BGR565
|
|
elif img_format == ImageFormat.RGBA8888.value:
|
|
color_cvt_code = cv2.COLOR_GRAY2BGRA
|
|
elif img_format == ImageFormat.RAW8.value:
|
|
color_cvt_code = None
|
|
else:
|
|
print('Error: No matched image format !')
|
|
exit(0)
|
|
elif channel_num == 3:
|
|
if img_format == ImageFormat.RGB565.value:
|
|
color_cvt_code = cv2.COLOR_BGR2BGR565
|
|
elif img_format == ImageFormat.RGBA8888.value:
|
|
color_cvt_code = cv2.COLOR_BGR2BGRA
|
|
elif img_format == ImageFormat.RAW8.value:
|
|
color_cvt_code = cv2.COLOR_BGR2GRAY
|
|
else:
|
|
print('Error: No matched image format !')
|
|
exit(0)
|
|
else:
|
|
print('Error: Not support image format !')
|
|
exit(0)
|
|
|
|
if color_cvt_code is not None:
|
|
_img = cv2.cvtColor(src=_img, code=color_cvt_code)
|
|
|
|
elif img_type == ImageType.BINARY.value:
|
|
with open(file=img_path, mode='rb') as file:
|
|
_img = file.read()
|
|
else:
|
|
print('Error: Not support image type !')
|
|
exit(0)
|
|
|
|
print(' - Success')
|
|
return _img
|
|
|
|
|
|
def get_kp_image_format(image_format: str) -> kp.ImageFormat:
|
|
if image_format == ImageFormat.RGB565.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_RGB565
|
|
elif image_format == ImageFormat.RGBA8888.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_RGBA8888
|
|
elif image_format == ImageFormat.YUYV.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YUYV
|
|
elif image_format == ImageFormat.CRY1CBY0.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YCBCR422_CRY1CBY0
|
|
elif image_format == ImageFormat.CBY1CRY0.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YCBCR422_CBY1CRY0
|
|
elif image_format == ImageFormat.Y1CRY0CB.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YCBCR422_Y1CRY0CB
|
|
elif image_format == ImageFormat.Y1CBY0CR.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YCBCR422_Y1CBY0CR
|
|
elif image_format == ImageFormat.CRY0CBY1.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YCBCR422_CRY0CBY1
|
|
elif image_format == ImageFormat.CBY0CRY1.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YCBCR422_CBY0CRY1
|
|
elif image_format == ImageFormat.Y0CRY1CB.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YCBCR422_Y0CRY1CB
|
|
elif image_format == ImageFormat.Y0CBY1CR.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YCBCR422_Y0CBY1CR
|
|
elif image_format == ImageFormat.RAW8.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_RAW8
|
|
elif image_format == ImageFormat.YUV420p.value:
|
|
_kp_image_format = kp.ImageFormat.KP_IMAGE_FORMAT_YUV420
|
|
else:
|
|
print('Error: Not support image format !')
|
|
exit(0)
|
|
|
|
return _kp_image_format
|
|
|
|
|
|
def get_kp_normalize_mode(norm_mode: str) -> kp.NormalizeMode:
|
|
if norm_mode == NormalizeMode.NONE.value:
|
|
_kp_norm = kp.NormalizeMode.KP_NORMALIZE_DISABLE
|
|
elif norm_mode == NormalizeMode.KNERON.value:
|
|
_kp_norm = kp.NormalizeMode.KP_NORMALIZE_KNERON
|
|
elif norm_mode == NormalizeMode.YOLO.value:
|
|
_kp_norm = kp.NormalizeMode.KP_NORMALIZE_YOLO
|
|
elif norm_mode == NormalizeMode.TENSORFLOW.value:
|
|
_kp_norm = kp.NormalizeMode.KP_NORMALIZE_TENSOR_FLOW
|
|
elif norm_mode == NormalizeMode.CUSTOMIZED_DEFAULT.value:
|
|
_kp_norm = kp.NormalizeMode.KP_NORMALIZE_CUSTOMIZED_DEFAULT
|
|
elif norm_mode == NormalizeMode.CUSTOMIZED_SUB128.value:
|
|
_kp_norm = kp.NormalizeMode.KP_NORMALIZE_CUSTOMIZED_SUB128
|
|
elif norm_mode == NormalizeMode.CUSTOMIZED_DIV2.value:
|
|
_kp_norm = kp.NormalizeMode.KP_NORMALIZE_CUSTOMIZED_DIV2
|
|
elif norm_mode == NormalizeMode.CUSTOMIZED_SUB128_DIV2.value:
|
|
_kp_norm = kp.NormalizeMode.KP_NORMALIZE_CUSTOMIZED_SUB128_DIV2
|
|
else:
|
|
print('Error: Not support normalize mode !')
|
|
exit(0)
|
|
|
|
return _kp_norm
|
|
|
|
|
|
def get_kp_pre_process_resize_mode(resize_mode: str) -> kp.ResizeMode:
|
|
if resize_mode == ResizeMode.NONE.value:
|
|
_kp_resize_mode = kp.ResizeMode.KP_RESIZE_DISABLE
|
|
elif resize_mode == ResizeMode.ENABLE.value:
|
|
_kp_resize_mode = kp.ResizeMode.KP_RESIZE_ENABLE
|
|
else:
|
|
print('Error: Not support pre process resize mode !')
|
|
exit(0)
|
|
|
|
return _kp_resize_mode
|
|
|
|
|
|
def get_kp_pre_process_padding_mode(padding_mode: str) -> kp.PaddingMode:
|
|
if padding_mode == PaddingMode.NONE.value:
|
|
_kp_padding_mode = kp.PaddingMode.KP_PADDING_DISABLE
|
|
elif padding_mode == PaddingMode.PADDING_CORNER.value:
|
|
_kp_padding_mode = kp.PaddingMode.KP_PADDING_CORNER
|
|
elif padding_mode == PaddingMode.PADDING_SYMMETRIC.value:
|
|
_kp_padding_mode = kp.PaddingMode.KP_PADDING_SYMMETRIC
|
|
else:
|
|
print('Error: Not support pre process padding mode !')
|
|
exit(0)
|
|
|
|
return _kp_padding_mode
|
|
|
|
|
|
def get_ex_post_process_mode(post_proc: str) -> PostprocessMode:
|
|
if post_proc in PostprocessMode._value2member_map_:
|
|
_ex_post_proc = PostprocessMode(post_proc)
|
|
else:
|
|
print('Error: Not support post process mode !')
|
|
exit(0)
|
|
|
|
return _ex_post_proc
|
|
|
|
|
|
def parse_crop_box_from_str(crop_box_str: str) -> List[kp.InferenceCropBox]:
|
|
_group_list = re.compile(r'\([\s]*(\d+)[\s]*,[\s]*(\d+)[\s]*,[\s]*(\d+)[\s]*,[\s]*(\d+)[\s]*\)').findall(
|
|
crop_box_str)
|
|
_crop_box_list = []
|
|
|
|
for _idx, _crop_box in enumerate(_group_list):
|
|
_crop_box_list.append(
|
|
kp.InferenceCropBox(
|
|
crop_box_index=_idx,
|
|
x=int(_crop_box[0]),
|
|
y=int(_crop_box[1]),
|
|
width=int(_crop_box[2]),
|
|
height=int(_crop_box[3])
|
|
)
|
|
)
|
|
|
|
return _crop_box_list
|
|
|
|
|
|
def convert_onnx_data_to_npu_data(tensor_descriptor: kp.TensorDescriptor, onnx_data: np.ndarray) -> bytes:
|
|
def __get_npu_ndarray(__tensor_descriptor: kp.TensorDescriptor, __npu_ndarray_dtype: np.dtype):
|
|
assert __tensor_descriptor.tensor_shape_info.version == kp.ModelTensorShapeInformationVersion.KP_MODEL_TENSOR_SHAPE_INFO_VERSION_2
|
|
|
|
if __tensor_descriptor.data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8BHL]:
|
|
""" calculate channel group stride in C language
|
|
for (int axis = 0; axis < (int)tensor_shape_info->shape_len; axis++) {
|
|
if (1 == tensor_shape_info->stride_npu[axis]) {
|
|
channel_idx = axis;
|
|
continue;
|
|
}
|
|
|
|
npu_channel_group_stride_tmp = tensor_shape_info->stride_npu[axis] * tensor_shape_info->shape[axis];
|
|
if (npu_channel_group_stride_tmp > npu_channel_group_stride)
|
|
npu_channel_group_stride = npu_channel_group_stride_tmp;
|
|
}
|
|
"""
|
|
__shape = np.array(__tensor_descriptor.tensor_shape_info.v2.shape, dtype=int)
|
|
__stride_npu = np.array(__tensor_descriptor.tensor_shape_info.v2.stride_npu, dtype=int)
|
|
__channel_idx = np.where(__stride_npu == 1)[0][0]
|
|
__dimension_stride = __stride_npu * __shape
|
|
__dimension_stride[__channel_idx] = 0
|
|
__npu_channel_group_stride = np.max(__dimension_stride.flatten())
|
|
|
|
"""
|
|
__shape = __tensor_descriptor.tensor_shape_info.v2.shape
|
|
__max_element_num += ((__shape[__channel_idx] / 16) + (0 if (__shape[__channel_idx] % 16) == 0 else 1)) * __npu_channel_group_stride
|
|
"""
|
|
__max_element_num = ((__shape[__channel_idx] >> 4) + (0 if (__shape[__channel_idx] % 16) == 0 else 1)) * __npu_channel_group_stride
|
|
else:
|
|
__max_element_num = 0
|
|
__dimension_num = len(__tensor_descriptor.tensor_shape_info.v2.shape)
|
|
|
|
for dimension in range(__dimension_num):
|
|
__element_num = __tensor_descriptor.tensor_shape_info.v2.shape[dimension] * __tensor_descriptor.tensor_shape_info.v2.stride_npu[dimension]
|
|
if __element_num > __max_element_num:
|
|
__max_element_num = __element_num
|
|
|
|
return np.zeros(shape=__max_element_num, dtype=__npu_ndarray_dtype).flatten()
|
|
|
|
quantization_parameters = tensor_descriptor.quantization_parameters
|
|
tensor_shape_info = tensor_descriptor.tensor_shape_info
|
|
npu_data_layout = tensor_descriptor.data_layout
|
|
|
|
quantization_max_value = 0
|
|
quantization_min_value = 0
|
|
radix = 0
|
|
scale = 0
|
|
quantization_factor = 0
|
|
|
|
channel_idx = 0
|
|
npu_channel_group_stride = -1
|
|
|
|
onnx_data_shape_index = None
|
|
onnx_data_buf_offset = 0
|
|
npu_data_buf_offset = 0
|
|
|
|
npu_data_element_u16b = 0
|
|
npu_data_high_bit_offset = 16
|
|
|
|
npu_data_dtype = np.int8
|
|
|
|
if tensor_shape_info.version != kp.ModelTensorShapeInformationVersion.KP_MODEL_TENSOR_SHAPE_INFO_VERSION_2:
|
|
raise AttributeError('Unsupport ModelTensorShapeInformationVersion {}'.format(tensor_descriptor.tensor_shape_info.version))
|
|
|
|
"""
|
|
input data quantization
|
|
"""
|
|
if npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_4W4C8B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8B_CH_COMPACT,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_16W1C8B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_RAW_8B]:
|
|
quantization_max_value = np.iinfo(np.int8).max
|
|
quantization_min_value = np.iinfo(np.int8).min
|
|
npu_data_dtype = np.int8
|
|
elif npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_8W1C16B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_RAW_16B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_4W4C8BHL,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8BHL,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8BHL_CH_COMPACT,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_16W1C8BHL]:
|
|
quantization_max_value = np.iinfo(np.int16).max
|
|
quantization_min_value = np.iinfo(np.int16).min
|
|
npu_data_dtype = np.int16
|
|
elif npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_RAW_FLOAT]:
|
|
quantization_max_value = np.finfo(np.float32).max
|
|
quantization_min_value = np.finfo(np.float32).min
|
|
npu_data_dtype = np.float32
|
|
else:
|
|
raise AttributeError('Unsupport ModelTensorDataLayout {}'.format(npu_data_layout))
|
|
|
|
|
|
shape = np.array(tensor_shape_info.v2.shape, dtype=np.int32)
|
|
dimension_num = len(shape)
|
|
quantized_axis = quantization_parameters.v1.quantized_axis
|
|
radix = np.array([quantized_fixed_point_descriptor.radix for quantized_fixed_point_descriptor in quantization_parameters.v1.quantized_fixed_point_descriptor_list], dtype=np.int32)
|
|
scale = np.array([quantized_fixed_point_descriptor.scale.value for quantized_fixed_point_descriptor in quantization_parameters.v1.quantized_fixed_point_descriptor_list], dtype=np.float32)
|
|
|
|
quantization_factor = np.power(2, radix) * scale
|
|
if 1 < len(quantization_parameters.v1.quantized_fixed_point_descriptor_list):
|
|
quantization_factor = np.expand_dims(quantization_factor, axis=tuple([dimension for dimension in range(dimension_num) if dimension is not quantized_axis]))
|
|
quantization_factor = np.broadcast_to(array=quantization_factor, shape=shape)
|
|
|
|
onnx_quantized_data = (onnx_data * quantization_factor).astype(np.float32)
|
|
onnx_quantized_data = np.round(onnx_quantized_data)
|
|
onnx_quantized_data = np.clip(onnx_quantized_data, quantization_min_value, quantization_max_value).astype(npu_data_dtype)
|
|
|
|
"""
|
|
flatten onnx/npu data
|
|
"""
|
|
onnx_quantized_data_flatten = onnx_quantized_data.flatten()
|
|
npu_data_flatten = __get_npu_ndarray(__tensor_descriptor=tensor_descriptor, __npu_ndarray_dtype=npu_data_dtype)
|
|
|
|
'''
|
|
re-arrange data from onnx to npu
|
|
'''
|
|
onnx_data_shape_index = np.zeros(shape=(len(shape)), dtype=int)
|
|
stride_onnx = np.array(tensor_shape_info.v2.stride_onnx, dtype=int)
|
|
stride_npu = np.array(tensor_shape_info.v2.stride_npu, dtype=int)
|
|
|
|
if npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_4W4C8B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8B_CH_COMPACT,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_16W1C8B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_RAW_8B]:
|
|
while True:
|
|
onnx_data_buf_offset = onnx_data_shape_index.dot(stride_onnx)
|
|
npu_data_buf_offset = onnx_data_shape_index.dot(stride_npu)
|
|
|
|
if npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8B]:
|
|
if -1 == npu_channel_group_stride:
|
|
""" calculate channel group stride in C language
|
|
for (int axis = 0; axis < (int)tensor_shape_info->shape_len; axis++) {
|
|
if (1 == tensor_shape_info->stride_npu[axis]) {
|
|
channel_idx = axis;
|
|
continue;
|
|
}
|
|
|
|
npu_channel_group_stride_tmp = tensor_shape_info->stride_npu[axis] * tensor_shape_info->shape[axis];
|
|
if (npu_channel_group_stride_tmp > npu_channel_group_stride)
|
|
npu_channel_group_stride = npu_channel_group_stride_tmp;
|
|
}
|
|
|
|
npu_channel_group_stride -= 16;
|
|
"""
|
|
channel_idx = np.where(stride_npu == 1)[0][0]
|
|
dimension_stride = stride_npu * shape
|
|
dimension_stride[channel_idx] = 0
|
|
npu_channel_group_stride = np.max(dimension_stride.flatten()) - 16
|
|
|
|
"""
|
|
npu_data_buf_offset += (onnx_data_shape_index[channel_idx] / 16) * npu_channel_group_stride
|
|
"""
|
|
npu_data_buf_offset += (onnx_data_shape_index[channel_idx] >> 4) * npu_channel_group_stride
|
|
|
|
npu_data_flatten[npu_data_buf_offset] = onnx_quantized_data_flatten[onnx_data_buf_offset]
|
|
|
|
'''
|
|
update onnx_data_shape_index
|
|
'''
|
|
for dimension in range(dimension_num - 1, -1, -1):
|
|
onnx_data_shape_index[dimension] += 1
|
|
if onnx_data_shape_index[dimension] == shape[dimension]:
|
|
if dimension == 0:
|
|
break
|
|
onnx_data_shape_index[dimension] = 0
|
|
continue
|
|
else:
|
|
break
|
|
|
|
if onnx_data_shape_index[0] == shape[0]:
|
|
break
|
|
elif npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_8W1C16B,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_RAW_16B]:
|
|
while True:
|
|
onnx_data_buf_offset = onnx_data_shape_index.dot(stride_onnx)
|
|
npu_data_buf_offset = onnx_data_shape_index.dot(stride_npu)
|
|
|
|
npu_data_element_u16b = np.frombuffer(buffer=onnx_quantized_data_flatten[onnx_data_buf_offset].tobytes(), dtype=np.uint16)
|
|
npu_data_flatten[npu_data_buf_offset] = np.frombuffer(buffer=(npu_data_element_u16b & 0xfffe).tobytes(), dtype=np.int16)
|
|
|
|
'''
|
|
update onnx_data_shape_index
|
|
'''
|
|
for dimension in range(dimension_num - 1, -1, -1):
|
|
onnx_data_shape_index[dimension] += 1
|
|
if onnx_data_shape_index[dimension] == shape[dimension]:
|
|
if dimension == 0:
|
|
break
|
|
onnx_data_shape_index[dimension] = 0
|
|
continue
|
|
else:
|
|
break
|
|
|
|
if onnx_data_shape_index[0] == shape[0]:
|
|
break
|
|
elif npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_4W4C8BHL,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8BHL,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8BHL_CH_COMPACT,
|
|
kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_16W1C8BHL]:
|
|
|
|
npu_data_flatten = np.frombuffer(buffer=npu_data_flatten.tobytes(), dtype=np.uint8).copy()
|
|
|
|
while True:
|
|
onnx_data_buf_offset = onnx_data_shape_index.dot(stride_onnx)
|
|
npu_data_buf_offset = onnx_data_shape_index.dot(stride_npu)
|
|
|
|
if npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_1W16C8BHL]:
|
|
if -1 == npu_channel_group_stride:
|
|
""" calculate channel group stride in C language
|
|
for (int axis = 0; axis < (int)tensor_shape_info->shape_len; axis++) {
|
|
if (1 == tensor_shape_info->stride_npu[axis]) {
|
|
channel_idx = axis;
|
|
continue;
|
|
}
|
|
|
|
npu_channel_group_stride_tmp = tensor_shape_info->stride_npu[axis] * tensor_shape_info->shape[axis];
|
|
if (npu_channel_group_stride_tmp > npu_channel_group_stride)
|
|
npu_channel_group_stride = npu_channel_group_stride_tmp;
|
|
}
|
|
|
|
npu_channel_group_stride -= 16;
|
|
"""
|
|
channel_idx = np.where(stride_npu == 1)[0][0]
|
|
dimension_stride = stride_npu * shape
|
|
dimension_stride[channel_idx] = 0
|
|
npu_channel_group_stride = np.max(dimension_stride.flatten()) - 16
|
|
|
|
"""
|
|
npu_data_buf_offset += (onnx_data_shape_index[channel_idx] / 16) * npu_channel_group_stride
|
|
"""
|
|
npu_data_buf_offset += (onnx_data_shape_index[channel_idx] >> 4) * npu_channel_group_stride
|
|
|
|
"""
|
|
npu_data_buf_offset = (npu_data_buf_offset / 16) * 32 + (npu_data_buf_offset % 16)
|
|
"""
|
|
npu_data_buf_offset = ((npu_data_buf_offset >> 4) << 5) + (npu_data_buf_offset & 15)
|
|
|
|
npu_data_element_u16b = np.frombuffer(buffer=onnx_quantized_data_flatten[onnx_data_buf_offset].tobytes(), dtype=np.uint16)
|
|
npu_data_element_u16b = (npu_data_element_u16b >> 1)
|
|
npu_data_flatten[npu_data_buf_offset] = (npu_data_element_u16b & 0x007f).astype(dtype=np.uint8)
|
|
npu_data_flatten[npu_data_buf_offset + npu_data_high_bit_offset] = ((npu_data_element_u16b >> 7) & 0x00ff).astype(dtype=np.uint8)
|
|
|
|
'''
|
|
update onnx_data_shape_index
|
|
'''
|
|
for dimension in range(dimension_num - 1, -1, -1):
|
|
onnx_data_shape_index[dimension] += 1
|
|
if onnx_data_shape_index[dimension] == shape[dimension]:
|
|
if dimension == 0:
|
|
break
|
|
onnx_data_shape_index[dimension] = 0
|
|
continue
|
|
else:
|
|
break
|
|
|
|
if onnx_data_shape_index[0] == shape[0]:
|
|
break
|
|
elif npu_data_layout in [kp.ModelTensorDataLayout.KP_MODEL_TENSOR_DATA_LAYOUT_RAW_FLOAT]:
|
|
while True:
|
|
onnx_data_buf_offset = onnx_data_shape_index.dot(stride_onnx)
|
|
npu_data_buf_offset = onnx_data_shape_index.dot(stride_npu)
|
|
|
|
npu_data_flatten[npu_data_buf_offset] = onnx_quantized_data_flatten[onnx_data_buf_offset]
|
|
|
|
'''
|
|
update onnx_data_shape_index
|
|
'''
|
|
for dimension in range(dimension_num - 1, -1, -1):
|
|
onnx_data_shape_index[dimension] += 1
|
|
if onnx_data_shape_index[dimension] == shape[dimension]:
|
|
if dimension == 0:
|
|
break
|
|
onnx_data_shape_index[dimension] = 0
|
|
continue
|
|
else:
|
|
break
|
|
|
|
if onnx_data_shape_index[0] == shape[0]:
|
|
break
|
|
else:
|
|
raise AttributeError('Unsupport ModelTensorDataLayout {}'.format(npu_data_layout))
|
|
|
|
return npu_data_flatten.tobytes() |