chore: 添加虚拟环境到仓库
- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
@@ -0,0 +1,292 @@
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import onnx
|
||||
from onnx import TensorProto, helper
|
||||
|
||||
|
||||
def graph_topological_sort(graph):
|
||||
deps_count = [0] * len(graph.node) # dependency count of each node
|
||||
deps_to_nodes = {} # input to node indice
|
||||
sorted_nodes = [] # initialize sorted_nodes
|
||||
for node_idx, node in enumerate(graph.node):
|
||||
# CANNOT use len(node.input) directly because input can be optional
|
||||
deps_count[node_idx] = sum(1 for _ in node.input if _)
|
||||
if deps_count[node_idx] == 0: # Constant doesn't depend on any inputs
|
||||
sorted_nodes.append(graph.node[node_idx])
|
||||
continue
|
||||
|
||||
for input_name in node.input:
|
||||
if input_name not in deps_to_nodes:
|
||||
deps_to_nodes[input_name] = [node_idx]
|
||||
else:
|
||||
deps_to_nodes[input_name].append(node_idx)
|
||||
|
||||
# Note: this logic only applies to top level graph since a sub graph could use intializer from parent graph
|
||||
initializer_names = [init.name for init in graph.initializer]
|
||||
graph_input_names = [input.name for input in graph.input]
|
||||
input_names = initializer_names + graph_input_names
|
||||
input_names.sort()
|
||||
prev_input_name = None
|
||||
for input_name in input_names:
|
||||
if prev_input_name == input_name:
|
||||
continue
|
||||
|
||||
prev_input_name = input_name
|
||||
if input_name in deps_to_nodes:
|
||||
for node_idx in deps_to_nodes[input_name]:
|
||||
deps_count[node_idx] = deps_count[node_idx] - 1
|
||||
if deps_count[node_idx] == 0:
|
||||
sorted_nodes.append(graph.node[node_idx])
|
||||
|
||||
start = 0
|
||||
end = len(sorted_nodes)
|
||||
|
||||
while start < end:
|
||||
for output in sorted_nodes[start].output:
|
||||
if output in deps_to_nodes:
|
||||
for node_idx in deps_to_nodes[output]:
|
||||
deps_count[node_idx] = deps_count[node_idx] - 1
|
||||
if deps_count[node_idx] == 0:
|
||||
sorted_nodes.append(graph.node[node_idx])
|
||||
end = end + 1
|
||||
start = start + 1
|
||||
|
||||
assert end == len(graph.node), "Graph is not a DAG"
|
||||
graph.ClearField("node")
|
||||
graph.node.extend(sorted_nodes)
|
||||
|
||||
|
||||
class QnnTensorStruct:
|
||||
def __init__(self):
|
||||
self.name = ""
|
||||
self.onnx_data_type = TensorProto.FLOAT
|
||||
self.dim = []
|
||||
|
||||
|
||||
def qnn_data_type_to_onnx_data_type(qnn_data_type):
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UINT_8
|
||||
if qnn_data_type == 0x0408 or qnn_data_type == 0x0108:
|
||||
return TensorProto.UINT8
|
||||
# QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_UINT_16
|
||||
elif qnn_data_type == 0x0416 or qnn_data_type == 0x0116:
|
||||
return TensorProto.UINT16
|
||||
# QNN_DATATYPE_UFIXED_POINT_32 QNN_DATATYPE_UINT_32
|
||||
elif qnn_data_type == 0x0432 or qnn_data_type == 0x0132:
|
||||
return TensorProto.UINT32
|
||||
# QNN_DATATYPE_UINT_64
|
||||
elif qnn_data_type == 0x0164:
|
||||
return TensorProto.UINT64
|
||||
# QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_INT_8
|
||||
elif qnn_data_type == 0x0308 or qnn_data_type == 0x0008:
|
||||
return TensorProto.INT8
|
||||
# QNN_DATATYPE_FIXED_POINT_16 QNN_DATATYPE_INT_16
|
||||
elif qnn_data_type == 0x0316 or qnn_data_type == 0x0016:
|
||||
return TensorProto.INT16
|
||||
# QNN_DATATYPE_FIXED_POINT_32 QNN_DATATYPE_INT_32
|
||||
elif qnn_data_type == 0x0332 or qnn_data_type == 0x0032:
|
||||
return TensorProto.INT32
|
||||
# QNN_DATATYPE_INT_64
|
||||
elif qnn_data_type == 0x0064:
|
||||
return TensorProto.INT64
|
||||
# QNN_DATATYPE_FLOAT_16
|
||||
elif qnn_data_type == 0x0216:
|
||||
return TensorProto.FLOAT16
|
||||
# QNN_DATATYPE_FLOAT_32
|
||||
elif qnn_data_type == 0x0232:
|
||||
return TensorProto.FLOAT
|
||||
# QNN_DATATYPE_BOOL_8
|
||||
elif qnn_data_type == 0x0508:
|
||||
return TensorProto.BOOL
|
||||
else:
|
||||
return TensorProto.UNDEFINED
|
||||
|
||||
|
||||
def parse_qnn_json_file(qnn_json_file_path, qnn_input_output_tensor_dic):
|
||||
with open(qnn_json_file_path) as qnn_json_file:
|
||||
qnn_json = json.load(qnn_json_file)
|
||||
assert "graph" in qnn_json, "QNN converted json file not valid. Can't find graph."
|
||||
assert "tensors" in qnn_json["graph"], "QNN converted json file not valid. Can't find tensors."
|
||||
for qnn_tensor_name, qnn_tensor_attribute in qnn_json["graph"]["tensors"].items():
|
||||
# type:0 - QNN input tensor, type:1 - QNN output tensor
|
||||
assert (
|
||||
"type" in qnn_tensor_attribute
|
||||
and "data_type" in qnn_tensor_attribute
|
||||
and "dims" in qnn_tensor_attribute
|
||||
), "QNN converted json file not valid. Can't find some keys from tensors"
|
||||
if qnn_tensor_attribute["type"] == 0 or qnn_tensor_attribute["type"] == 1:
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = qnn_tensor_name
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(qnn_tensor_attribute["data_type"])
|
||||
qnn_tensor.dim = qnn_tensor_attribute["dims"]
|
||||
qnn_input_output_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
|
||||
assert len(qnn_input_output_tensor_dic) > 1, (
|
||||
"Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
)
|
||||
|
||||
|
||||
def compare_onnx_shape_with_qnn_shape(onnx_dims, qnn_dims):
|
||||
assert len(onnx_dims) == len(qnn_dims), "Onnx shape and Qnn shape has different rank."
|
||||
return all(onnx_dims[i].dim_value == qnn_dims[i] for i in range(len(onnx_dims)))
|
||||
|
||||
|
||||
def gen_to_channel_first_perm(rank):
|
||||
assert rank > 2, "Shape rank should >2 for the Transpose node."
|
||||
perm = []
|
||||
perm.append(0)
|
||||
perm.append(rank - 1)
|
||||
for i in range(1, rank - 1):
|
||||
perm.append(i) # noqa: PERF402
|
||||
|
||||
return perm
|
||||
|
||||
|
||||
def gen_to_channel_last_perm(rank):
|
||||
assert rank > 2, "Shape rank should >2 for the Transpose node."
|
||||
perm = []
|
||||
perm.append(0)
|
||||
for i in range(2, rank):
|
||||
perm.append(i) # noqa: PERF402
|
||||
perm.append(1)
|
||||
|
||||
return perm
|
||||
|
||||
|
||||
# Onnxruntime QNN EP can support context binary file generated by QNN tool chain. However QNN generated context binary file
|
||||
# uses channel last data layout and 8 bits or 16 bits for input and output.
|
||||
# This script gets the QNN model input & output information from QNN converted model_net.json file, compare them with Onnx model
|
||||
# and inserts Cast, Transpose nodes to Onnx model if required
|
||||
def main():
|
||||
parser = ArgumentParser(
|
||||
"Insert Cast, Transpose nodes into Onnx model to make it aligned with QNN generated context binary."
|
||||
)
|
||||
parser.add_argument("-m", "--onnx_model", help="Required. Path to Onnx model file.", required=True, type=str)
|
||||
parser.add_argument(
|
||||
"-q", "--qnn_json", help="Required. Path to Qnn converted model_net.json file.", required=True, type=str
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse Qnn model_net.json file to get the graph input output information
|
||||
qnn_input_output_tensor_dic = {}
|
||||
parse_qnn_json_file(args.qnn_json, qnn_input_output_tensor_dic)
|
||||
|
||||
model = onnx.load(args.onnx_model)
|
||||
|
||||
nodes_to_add = []
|
||||
# Tranch the tensor name change to update the consumer nodes
|
||||
graph_input_output_name_dic = {}
|
||||
for graph_input in model.graph.input:
|
||||
if graph_input.name in qnn_input_output_tensor_dic:
|
||||
input_name_fater_node_insert = graph_input.name
|
||||
qnn_input_tensor = qnn_input_output_tensor_dic[graph_input.name]
|
||||
# Insert Cast node if Onnx input and Qnn input has different data type
|
||||
if graph_input.type.tensor_type.elem_type != qnn_input_tensor.onnx_data_type:
|
||||
# Insert Cast node
|
||||
cast_input_name = input_name_fater_node_insert
|
||||
cast_output_name = cast_input_name + "_qnn_cast"
|
||||
input_cast_node = helper.make_node(
|
||||
"Cast",
|
||||
name=cast_output_name,
|
||||
inputs=[cast_input_name],
|
||||
outputs=[cast_output_name],
|
||||
to=graph_input.type.tensor_type.elem_type,
|
||||
)
|
||||
# Change input data type to Qnn input data type
|
||||
graph_input.type.tensor_type.elem_type = qnn_input_tensor.onnx_data_type
|
||||
nodes_to_add.extend([input_cast_node])
|
||||
input_name_fater_node_insert = cast_output_name
|
||||
graph_input_output_name_dic[graph_input.name] = cast_output_name
|
||||
|
||||
if not compare_onnx_shape_with_qnn_shape(graph_input.type.tensor_type.shape.dim, qnn_input_tensor.dim):
|
||||
# Add Transpose node (channel last to channel first)
|
||||
transpose_perm = gen_to_channel_first_perm(len(graph_input.type.tensor_type.shape.dim))
|
||||
transpose_input_name = input_name_fater_node_insert
|
||||
transpose_output_name = transpose_input_name + "_qnn_trans"
|
||||
input_transpose_node = helper.make_node(
|
||||
"Transpose",
|
||||
name=transpose_output_name,
|
||||
inputs=[transpose_input_name],
|
||||
outputs=[transpose_output_name],
|
||||
perm=transpose_perm,
|
||||
)
|
||||
nodes_to_add.extend([input_transpose_node])
|
||||
graph_input_output_name_dic[graph_input.name] = transpose_output_name
|
||||
|
||||
# Change input shape to Qnn input shape
|
||||
for i in range(len(graph_input.type.tensor_type.shape.dim)):
|
||||
graph_input.type.tensor_type.shape.dim[i].dim_value = qnn_input_tensor.dim[i]
|
||||
else:
|
||||
raise AssertionError("Error: Onnx model input: " + graph_input.name + " not exist from QNN model input.")
|
||||
|
||||
for graph_output in model.graph.output:
|
||||
if graph_output.name in qnn_input_output_tensor_dic:
|
||||
output_name_after_node_insert = graph_output.name
|
||||
# Insert Cast node if Onnx input and Qnn input has idfferent data type
|
||||
qnn_output_tensor = qnn_input_output_tensor_dic[graph_output.name]
|
||||
if graph_output.type.tensor_type.elem_type != qnn_output_tensor.onnx_data_type:
|
||||
# Insert Cast node
|
||||
cast_output_name = output_name_after_node_insert
|
||||
cast_input_name = cast_output_name + "_qnn_cast"
|
||||
output_cast_node = helper.make_node(
|
||||
"Cast",
|
||||
name=cast_input_name,
|
||||
inputs=[cast_input_name],
|
||||
outputs=[cast_output_name],
|
||||
to=qnn_output_tensor.onnx_data_type,
|
||||
)
|
||||
# Change output data type to Onn output data type
|
||||
graph_output.type.tensor_type.elem_type = qnn_output_tensor.onnx_data_type
|
||||
nodes_to_add.extend([output_cast_node])
|
||||
output_name_after_node_insert = cast_input_name
|
||||
graph_input_output_name_dic[graph_output.name] = cast_input_name
|
||||
|
||||
if not compare_onnx_shape_with_qnn_shape(graph_output.type.tensor_type.shape.dim, qnn_output_tensor.dim):
|
||||
# Add Transpose node (channel first to channel last)
|
||||
transpose_perm = gen_to_channel_last_perm(len(graph_output.type.tensor_type.shape.dim))
|
||||
transpose_output_name = output_name_after_node_insert
|
||||
transpose_input_name = transpose_output_name + "_qnn_trans"
|
||||
output_transpose_node = helper.make_node(
|
||||
"Transpose",
|
||||
name=transpose_input_name,
|
||||
inputs=[transpose_input_name],
|
||||
outputs=[transpose_output_name],
|
||||
perm=transpose_perm,
|
||||
)
|
||||
nodes_to_add.extend([output_transpose_node])
|
||||
graph_input_output_name_dic[graph_output.name] = transpose_input_name
|
||||
|
||||
# Change output shape to Qnn output shape
|
||||
for i in range(len(graph_output.type.tensor_type.shape.dim)):
|
||||
graph_output.type.tensor_type.shape.dim[i].dim_value = qnn_input_output_tensor_dic[
|
||||
graph_output.name
|
||||
].dim[i]
|
||||
else:
|
||||
raise AssertionError("Error: Onnx model output: " + graph_output.name + " not exist from QNN model output.")
|
||||
|
||||
for node in model.graph.node:
|
||||
for node_input_index, node_input in enumerate(node.input):
|
||||
# update consumer node for graph inputs to connect to inserted node
|
||||
if node_input in graph_input_output_name_dic:
|
||||
node.input[node_input_index] = graph_input_output_name_dic[node_input]
|
||||
|
||||
for node_output_index, node_output in enumerate(node.output):
|
||||
# update producer node for graph outputs to connect to inserted node
|
||||
if node_output in graph_input_output_name_dic:
|
||||
node.output[node_output_index] = graph_input_output_name_dic[node_output]
|
||||
|
||||
model.graph.node.extend(nodes_to_add)
|
||||
graph_topological_sort(model.graph)
|
||||
|
||||
# Add extra parameter all_tensors_to_one_file=False, size_threshold=5000 if the model exceeds protobuf 2GB limit e.g below
|
||||
# onnx.save(model, args.onnx_model.replace(".onnx", "_add_trans.onnx"), all_tensors_to_one_file=False, size_threshold=5000)
|
||||
onnx.save(model, args.onnx_model.replace(".onnx", "_add_trans.onnx"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,364 @@
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import onnx
|
||||
from onnx import TensorProto, helper
|
||||
|
||||
|
||||
class QnnTensorStruct:
|
||||
def __init__(
|
||||
self, name="", onnx_data_type=TensorProto.FLOAT, is_quantized=False, scale=0.0, offset=0, dim=None, id=None
|
||||
):
|
||||
self.name = name
|
||||
self.onnx_data_type = onnx_data_type
|
||||
self.is_quantized = is_quantized
|
||||
self.scale = scale
|
||||
self.offset = offset
|
||||
self.dim = [] if dim is None else dim
|
||||
self.id = id
|
||||
|
||||
|
||||
def is_quantized_data_type(qnn_data_type, is_converter_json):
|
||||
if is_converter_json:
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_FIXED_POINT_16
|
||||
return qnn_data_type == 0x0408 or qnn_data_type == 0x0416 or qnn_data_type == 0x0308 or qnn_data_type == 0x0316
|
||||
else:
|
||||
return (
|
||||
qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_8"
|
||||
or qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_16"
|
||||
or qnn_data_type == "QNN_DATATYPE_FIXED_POINT_8"
|
||||
or qnn_data_type == "QNN_DATATYPE_FIXED_POINT_16"
|
||||
)
|
||||
|
||||
|
||||
def qnn_data_type_to_onnx_data_type(qnn_data_type, is_converter_json):
|
||||
if is_converter_json:
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UINT_8
|
||||
if qnn_data_type == 0x0408 or qnn_data_type == 0x0108:
|
||||
return TensorProto.UINT8
|
||||
# QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_UINT_16
|
||||
elif qnn_data_type == 0x0416 or qnn_data_type == 0x0116:
|
||||
return TensorProto.UINT16
|
||||
# QNN_DATATYPE_UFIXED_POINT_32 QNN_DATATYPE_UINT_32
|
||||
elif qnn_data_type == 0x0432 or qnn_data_type == 0x0132:
|
||||
return TensorProto.UINT32
|
||||
# QNN_DATATYPE_UINT_64
|
||||
elif qnn_data_type == 0x0164:
|
||||
return TensorProto.UINT64
|
||||
# QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_INT_8
|
||||
elif qnn_data_type == 0x0308 or qnn_data_type == 0x0008:
|
||||
return TensorProto.INT8
|
||||
# QNN_DATATYPE_FIXED_POINT_16 QNN_DATATYPE_INT_16
|
||||
elif qnn_data_type == 0x0316 or qnn_data_type == 0x0016:
|
||||
return TensorProto.INT16
|
||||
# QNN_DATATYPE_FIXED_POINT_32 QNN_DATATYPE_INT_32
|
||||
elif qnn_data_type == 0x0332 or qnn_data_type == 0x0032:
|
||||
return TensorProto.INT32
|
||||
# QNN_DATATYPE_INT_64
|
||||
elif qnn_data_type == 0x0064:
|
||||
return TensorProto.INT64
|
||||
# QNN_DATATYPE_FLOAT_16
|
||||
elif qnn_data_type == 0x0216:
|
||||
return TensorProto.FLOAT16
|
||||
# QNN_DATATYPE_FLOAT_32
|
||||
elif qnn_data_type == 0x0232:
|
||||
return TensorProto.FLOAT
|
||||
# QNN_DATATYPE_BOOL_8
|
||||
elif qnn_data_type == 0x0508:
|
||||
return TensorProto.BOOL
|
||||
else:
|
||||
return TensorProto.UNDEFINED
|
||||
else:
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UINT_8
|
||||
if qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_8" or qnn_data_type == "QNN_DATATYPE_UINT_8":
|
||||
return TensorProto.UINT8
|
||||
# QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_UINT_16
|
||||
elif qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_16" or qnn_data_type == "QNN_DATATYPE_UINT_16":
|
||||
return TensorProto.UINT16
|
||||
# QNN_DATATYPE_UFIXED_POINT_32 QNN_DATATYPE_UINT_32
|
||||
elif qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_32" or qnn_data_type == "QNN_DATATYPE_UINT_32":
|
||||
return TensorProto.UINT32
|
||||
# QNN_DATATYPE_UINT_64
|
||||
elif qnn_data_type == "QNN_DATATYPE_UINT_64":
|
||||
return TensorProto.UINT64
|
||||
# QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_INT_8
|
||||
elif qnn_data_type == "QNN_DATATYPE_FIXED_POINT_8" or qnn_data_type == "QNN_DATATYPE_INT_8":
|
||||
return TensorProto.INT8
|
||||
# QNN_DATATYPE_FIXED_POINT_16 QNN_DATATYPE_INT_16
|
||||
elif qnn_data_type == "QNN_DATATYPE_FIXED_POINT_16" or qnn_data_type == "QNN_DATATYPE_INT_16":
|
||||
return TensorProto.INT16
|
||||
# QNN_DATATYPE_FIXED_POINT_32 QNN_DATATYPE_INT_32
|
||||
elif qnn_data_type == "QNN_DATATYPE_FIXED_POINT_32" or qnn_data_type == "QNN_DATATYPE_INT_32":
|
||||
return TensorProto.INT32
|
||||
# QNN_DATATYPE_INT_64
|
||||
elif qnn_data_type == "QNN_DATATYPE_INT_64":
|
||||
return TensorProto.INT64
|
||||
# QNN_DATATYPE_FLOAT_16
|
||||
elif qnn_data_type == "QNN_DATATYPE_FLOAT_16":
|
||||
return TensorProto.FLOAT16
|
||||
# QNN_DATATYPE_FLOAT_32
|
||||
elif qnn_data_type == "QNN_DATATYPE_FLOAT_32":
|
||||
return TensorProto.FLOAT
|
||||
# QNN_DATATYPE_BOOL_8
|
||||
elif qnn_data_type == "QNN_DATATYPE_BOOL_8":
|
||||
return TensorProto.BOOL
|
||||
else:
|
||||
return TensorProto.UNDEFINED
|
||||
|
||||
|
||||
def parse_qnn_converter_json_file(qnn_convert_json, qnn_input_tensor_dic, qnn_output_tensor_dic):
|
||||
is_qnn_converter_json = True
|
||||
for qnn_tensor_name, qnn_tensor_attribute in qnn_convert_json["graph"]["tensors"].items():
|
||||
# type:0 - QNN input tensor, type:1 - QNN output tensor
|
||||
assert (
|
||||
"type" in qnn_tensor_attribute
|
||||
and "data_type" in qnn_tensor_attribute
|
||||
and "dims" in qnn_tensor_attribute
|
||||
and "id" in qnn_tensor_attribute
|
||||
and "quant_params" in qnn_tensor_attribute
|
||||
), "QNN converted json file not valid. Can't find some keys from tensors"
|
||||
|
||||
# If tensor is not IO, ignore it
|
||||
if qnn_tensor_attribute["type"] not in [0, 1]:
|
||||
continue
|
||||
|
||||
# Get all graph inputs & output
|
||||
qnn_tensor = QnnTensorStruct(
|
||||
name=qnn_tensor_name,
|
||||
onnx_data_type=qnn_data_type_to_onnx_data_type(qnn_tensor_attribute["data_type"], is_qnn_converter_json),
|
||||
is_quantized=is_quantized_data_type(qnn_tensor_attribute["data_type"], is_qnn_converter_json),
|
||||
dim=qnn_tensor_attribute["dims"],
|
||||
id=qnn_tensor_attribute["id"],
|
||||
)
|
||||
|
||||
if (
|
||||
qnn_tensor_attribute["quant_params"]["definition"] == 1
|
||||
and qnn_tensor_attribute["quant_params"]["encoding"] == 0
|
||||
):
|
||||
qnn_tensor.scale = qnn_tensor_attribute["quant_params"]["scale_offset"]["scale"]
|
||||
qnn_tensor.offset = -qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
|
||||
|
||||
if qnn_tensor_attribute["type"] == 0:
|
||||
qnn_input_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
else:
|
||||
qnn_output_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
|
||||
assert len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1, (
|
||||
"Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
)
|
||||
|
||||
|
||||
def generate_wrapper_onnx_file(
|
||||
grap_name,
|
||||
model_file_name,
|
||||
qnn_input_tensor_dic,
|
||||
qnn_output_tensor_dic,
|
||||
disable_embed_mode,
|
||||
qnn_ctx_file,
|
||||
quantized_IO,
|
||||
qnn_sdk_version="unknown",
|
||||
):
|
||||
graph_nodes = []
|
||||
ini_list = []
|
||||
value_infos = []
|
||||
|
||||
model_inputs = []
|
||||
for qnn_input in sorted(qnn_input_tensor_dic.values(), key=lambda inp: inp.id):
|
||||
if qnn_input.is_quantized and not quantized_IO:
|
||||
q_scale_input_name = qnn_input.name + "_scale"
|
||||
q_offset_input_name = qnn_input.name + "_zp"
|
||||
q_scale = helper.make_tensor(q_scale_input_name, TensorProto.FLOAT, [], [qnn_input.scale])
|
||||
ini_list.append(q_scale)
|
||||
q_offset = helper.make_tensor(q_offset_input_name, qnn_input.onnx_data_type, [], [qnn_input.offset])
|
||||
ini_list.append(q_offset)
|
||||
input_name = qnn_input.name + "_dq"
|
||||
|
||||
q_node = helper.make_node(
|
||||
"QuantizeLinear",
|
||||
name=qnn_input.name,
|
||||
inputs=[input_name, q_scale_input_name, q_offset_input_name],
|
||||
outputs=[qnn_input.name],
|
||||
)
|
||||
|
||||
graph_nodes.append(q_node)
|
||||
model_inputs.append(helper.make_tensor_value_info(input_name, TensorProto.FLOAT, qnn_input.dim))
|
||||
value_infos.append(helper.make_tensor_value_info(qnn_input.name, qnn_input.onnx_data_type, qnn_input.dim))
|
||||
else:
|
||||
model_inputs.append(helper.make_tensor_value_info(qnn_input.name, qnn_input.onnx_data_type, qnn_input.dim))
|
||||
|
||||
if disable_embed_mode:
|
||||
ep_cache_context_content = qnn_ctx_file
|
||||
ctx_embed_mode = 0
|
||||
else:
|
||||
with open(qnn_ctx_file, "rb") as file:
|
||||
ep_cache_context_content = file.read()
|
||||
ctx_embed_mode = 1
|
||||
|
||||
qnn_ep_context_node = helper.make_node(
|
||||
"EPContext",
|
||||
name=grap_name,
|
||||
inputs=qnn_input_tensor_dic.keys(),
|
||||
outputs=qnn_output_tensor_dic.keys(),
|
||||
ep_cache_context=ep_cache_context_content,
|
||||
embed_mode=ctx_embed_mode,
|
||||
ep_sdk_version=qnn_sdk_version,
|
||||
source="Qnn",
|
||||
domain="com.microsoft",
|
||||
)
|
||||
graph_nodes.append(qnn_ep_context_node)
|
||||
|
||||
model_outputs = []
|
||||
for qnn_output in sorted(qnn_output_tensor_dic.values(), key=lambda out: out.id):
|
||||
if qnn_output.is_quantized and not quantized_IO:
|
||||
dq_scale_input_name = qnn_output.name + "_scale"
|
||||
dq_offset_input_name = qnn_output.name + "_zp"
|
||||
dq_scale = helper.make_tensor(dq_scale_input_name, TensorProto.FLOAT, [], [qnn_output.scale])
|
||||
ini_list.append(dq_scale)
|
||||
dq_offset = helper.make_tensor(dq_offset_input_name, qnn_output.onnx_data_type, [], [qnn_output.offset])
|
||||
ini_list.append(dq_offset)
|
||||
output_name = qnn_output.name + "_dq"
|
||||
|
||||
dq_node = helper.make_node(
|
||||
"DequantizeLinear",
|
||||
name=output_name,
|
||||
inputs=[qnn_output.name, dq_scale_input_name, dq_offset_input_name],
|
||||
outputs=[output_name],
|
||||
)
|
||||
|
||||
graph_nodes.append(dq_node)
|
||||
model_outputs.append(helper.make_tensor_value_info(output_name, TensorProto.FLOAT, qnn_output.dim))
|
||||
value_infos.append(
|
||||
helper.make_tensor_value_info(qnn_output.name, qnn_output.onnx_data_type, qnn_output.dim)
|
||||
)
|
||||
else:
|
||||
model_outputs.append(
|
||||
helper.make_tensor_value_info(qnn_output.name, qnn_output.onnx_data_type, qnn_output.dim)
|
||||
)
|
||||
|
||||
graph_def = helper.make_graph(graph_nodes, "qnn-onnx-model", model_inputs, model_outputs, ini_list, "", value_infos)
|
||||
|
||||
model_def = helper.make_model(graph_def, producer_name="MS")
|
||||
|
||||
onnx.save(model_def, model_file_name)
|
||||
|
||||
|
||||
# parse Qnn graph from the json file that extracted from context binary file
|
||||
def parse_qnn_graph(qnn_graph, qnn_input_tensor_dic, qnn_output_tensor_dic):
|
||||
is_qnn_converter_json = False
|
||||
graph_name = qnn_graph["info"]["graphName"]
|
||||
raw_inputs = qnn_graph["info"]["graphInputs"]
|
||||
raw_outputs = qnn_graph["info"]["graphOutputs"]
|
||||
|
||||
for raw_input in raw_inputs:
|
||||
tensor_info = raw_input["info"]
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = tensor_info["name"]
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(tensor_info["dataType"], is_qnn_converter_json)
|
||||
qnn_tensor.is_quantized = is_quantized_data_type(tensor_info["dataType"], is_qnn_converter_json)
|
||||
qnn_tensor.dim = tensor_info["dimensions"]
|
||||
if (
|
||||
tensor_info["quantizeParams"]["definition"] == "QNN_DEFINITION_DEFINED"
|
||||
and tensor_info["quantizeParams"]["quantizationEncoding"] == "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET"
|
||||
):
|
||||
qnn_tensor.scale = tensor_info["quantizeParams"]["scaleOffset"]["scale"]
|
||||
qnn_tensor.offset = 0 - tensor_info["quantizeParams"]["scaleOffset"]["offset"]
|
||||
qnn_input_tensor_dic[qnn_tensor.name] = qnn_tensor
|
||||
|
||||
for raw_output in raw_outputs:
|
||||
tensor_info = raw_output["info"]
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = tensor_info["name"]
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(tensor_info["dataType"], is_qnn_converter_json)
|
||||
qnn_tensor.is_quantized = is_quantized_data_type(tensor_info["dataType"], is_qnn_converter_json)
|
||||
qnn_tensor.dim = tensor_info["dimensions"]
|
||||
if (
|
||||
tensor_info["quantizeParams"]["definition"] == "QNN_DEFINITION_DEFINED"
|
||||
and tensor_info["quantizeParams"]["quantizationEncoding"] == "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET"
|
||||
):
|
||||
qnn_tensor.scale = tensor_info["quantizeParams"]["scaleOffset"]["scale"]
|
||||
qnn_tensor.offset = 0 - tensor_info["quantizeParams"]["scaleOffset"]["offset"]
|
||||
qnn_output_tensor_dic[qnn_tensor.name] = qnn_tensor
|
||||
|
||||
assert len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1, (
|
||||
"Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
)
|
||||
|
||||
return graph_name
|
||||
|
||||
|
||||
# Onnxruntime QNN EP can support context binary file generated by QNN tool chain. However QNN generated context binary file
|
||||
# uses channel last data layout and 8 bits or 16 bits for input and output.
|
||||
# This script gets the QNN model input & output information from QNN converted model_net.json file, compare them with Onnx model
|
||||
# and inserts Cast, Transpose nodes to Onnx model if required
|
||||
def main():
|
||||
parser = ArgumentParser("Generate Onnx model which includes the QNN context binary.")
|
||||
parser.add_argument("-b", "--qnn_bin", help="Required. Path to Qnn context binary file.", required=True, type=str)
|
||||
parser.add_argument(
|
||||
"-q", "--qnn_json", help="Required. Path to Qnn converted model_net.json file.", required=True, type=str
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable_embed_mode",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Set embed_mode=1 which mean embed Qnn context binary into the onnx model. Otherwise, set context binary file path in the onnx model",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quantized_IO",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="QNN converted context binary use quantized data as graph inputs and outputs. Will keep it if quantized_IO=True, otherwise, will insert Q and DQ nodes accordingly to make the graph inputs & outputs as float32 data type.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse Qnn model_net.json file to get the graph input output information
|
||||
|
||||
with open(args.qnn_json) as qnn_json_file:
|
||||
qnn_json_obj = json.load(qnn_json_file)
|
||||
if "graph" in qnn_json_obj and "tensors" in qnn_json_obj["graph"]:
|
||||
print("This json file is from Qnn converter")
|
||||
qnn_input_tensor_dic = {}
|
||||
qnn_output_tensor_dic = {}
|
||||
parse_qnn_converter_json_file(qnn_json_obj, qnn_input_tensor_dic, qnn_output_tensor_dic)
|
||||
|
||||
generate_wrapper_onnx_file(
|
||||
"QnnContext",
|
||||
args.qnn_json.replace(".json", "_qnn_ctx.onnx"),
|
||||
qnn_input_tensor_dic,
|
||||
qnn_output_tensor_dic,
|
||||
args.disable_embed_mode,
|
||||
args.qnn_bin,
|
||||
args.quantized_IO,
|
||||
)
|
||||
elif "info" in qnn_json_obj and "graphs" in qnn_json_obj["info"]:
|
||||
print("This json file is extracted from QNN context binary file")
|
||||
qnn_version = qnn_json_obj["info"]["buildId"]
|
||||
for qnn_graph in qnn_json_obj["info"]["graphs"]:
|
||||
qnn_input_tensor_dic = {}
|
||||
qnn_output_tensor_dic = {}
|
||||
graph_name = parse_qnn_graph(qnn_graph, qnn_input_tensor_dic, qnn_output_tensor_dic)
|
||||
|
||||
ctx_file_name = graph_name + "_qnn_ctx.onnx"
|
||||
if not args.quantized_IO:
|
||||
ctx_file_name = ctx_file_name.replace(".onnx", "_fp32_io.onnx")
|
||||
|
||||
generate_wrapper_onnx_file(
|
||||
graph_name,
|
||||
ctx_file_name,
|
||||
qnn_input_tensor_dic,
|
||||
qnn_output_tensor_dic,
|
||||
args.disable_embed_mode,
|
||||
args.qnn_bin,
|
||||
args.quantized_IO,
|
||||
qnn_version,
|
||||
)
|
||||
else:
|
||||
print("json file unrecoginized.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,165 @@
|
||||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
"""Provide entry point to preprocess ONNX model especially for QNN."""
|
||||
|
||||
import argparse
|
||||
import pathlib
|
||||
|
||||
import onnx
|
||||
|
||||
from onnxruntime.quantization.execution_providers import qnn
|
||||
|
||||
|
||||
def _parse_arguments():
|
||||
"""Parse cmdline arguments."""
|
||||
parser = argparse.ArgumentParser(description="Arguments for QNN model preprocess.")
|
||||
|
||||
parser.add_argument("--input_model_path", "-i", required=True, help="Path to the input ONNX model.")
|
||||
parser.add_argument("--output_model_path", "-o", required=True, help="Path to the output ONNX model.")
|
||||
|
||||
# Save preprocessed model with external data.
|
||||
parser.add_argument(
|
||||
"--save_as_external_data",
|
||||
action="store_true",
|
||||
help="Whether the output model would be saved with external data.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--all_tensors_to_one_file",
|
||||
action="store_true",
|
||||
help="Whether to save all external data in one file or save each tensor to a file named with the tensor name.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--external_data_location",
|
||||
help="Filename of the external file where all tensors are saved. The path is relative to the model path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--external_data_size_threshold",
|
||||
default=1024,
|
||||
type=int,
|
||||
help="Tensors with data size larger than this threshold are converted to external data.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--external_data_convert_attribute",
|
||||
action="store_true",
|
||||
help="Whether to save all tensors, including attribute tensors, to external data.",
|
||||
)
|
||||
|
||||
# Preprocess options.
|
||||
parser.add_argument(
|
||||
"--fuse_layernorm",
|
||||
action="store_true",
|
||||
help="Whether to fuse matched sequences into LayerNormalization nodes if possible.",
|
||||
)
|
||||
|
||||
# I/O layouts.
|
||||
parser.add_argument(
|
||||
"--inputs_to_make_channel_last",
|
||||
nargs="+",
|
||||
default=None,
|
||||
help="List of graph input names to be transposed into channel-last.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--outputs_to_make_channel_last",
|
||||
nargs="+",
|
||||
default=None,
|
||||
help="List of graph output names to be transposed into channel-last.",
|
||||
)
|
||||
|
||||
# Fix dynamic input shapes.
|
||||
parser.add_argument(
|
||||
"--dynamic_input_shapes",
|
||||
nargs=2,
|
||||
action="append",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Model input name and desired static shape in comma seprated format, for example: 'input' 1,3,256,256",
|
||||
)
|
||||
|
||||
# Exclude initializer from input
|
||||
parser.add_argument(
|
||||
"--exclude_initializer_from_input",
|
||||
action="store_true",
|
||||
help="Whether to exclude initializer from input if model.ir_version >= 4",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def qnn_preprocess_model(
|
||||
model_input: str | pathlib.Path | onnx.ModelProto,
|
||||
model_output: str | pathlib.Path,
|
||||
fuse_layernorm: bool = False,
|
||||
save_as_external_data: bool = False,
|
||||
all_tensors_to_one_file: bool = False,
|
||||
external_data_location: str | None = None,
|
||||
external_data_size_threshold: int = 1024,
|
||||
external_data_convert_attribute: bool = False,
|
||||
inputs_to_make_channel_last: list[str] | None = None,
|
||||
outputs_to_make_channel_last: list[str] | None = None,
|
||||
dynamic_input_shapes: list[tuple[str, str]] | None = None,
|
||||
exclude_initializer_from_input: bool = False,
|
||||
) -> bool:
|
||||
"""Preprocess ONNX model for QNN.
|
||||
|
||||
Args:
|
||||
model_input: A path or ONNX ModelProto specifiying the model to be preprocessed.
|
||||
model_output: A path specifying where the preprocessed model to be saved.
|
||||
fuse_layernorm: A bool specifying whether to fuse the matched sequence into a single LayerNormalization node.
|
||||
Defaults to False.
|
||||
save_as_external_data: A bool specifying whether to save model with external data. Defaults to False.
|
||||
all_tensors_to_one_file: A bool specifying whether to save all external data in one file or save each tensor to
|
||||
a file named with the tensor name. This argument is effective only when `save_as_external_data` is True.
|
||||
Defaults to False.
|
||||
external_data_location: A str specifying where to save the external data. The path is relative to the model
|
||||
path. This argument is effective only when `save_as_external_data` is True. Defaults to the model name.
|
||||
external_data_size_threshold: An int specifying the threshold of data size for tensors be saved as external
|
||||
data. This argument is effective only when `save_as_external_data` is True. Defaults to 1024.
|
||||
external_data_convert_attribute: A bool specifying whether to save all tensors including attributes as external
|
||||
data. This argument is effective only when `save_as_external_data` is True. Defaults to False.
|
||||
inputs_to_make_channel_last: A list of strs specifying graph input names to be transposed into channel-last.
|
||||
Defaults to None.
|
||||
outputs_to_make_channel_last: A list of strs specifying graph output names to be transposed into channel-last.
|
||||
Defaults to None.
|
||||
dynamic_input_shapes: A list of tuples specifying model input name to and its static shape in comma seprated
|
||||
format, for example: [('input', '1,3,256,256')]. Defaults to None.
|
||||
exclude_initializer_from_input: A bool specifying whether to exclude initializer from input. Defaults to False.
|
||||
|
||||
Returns:
|
||||
A bool indicating whether the model is modified.
|
||||
"""
|
||||
return qnn.qnn_preprocess_model(
|
||||
model_input,
|
||||
model_output,
|
||||
fuse_layernorm=fuse_layernorm,
|
||||
save_as_external_data=save_as_external_data,
|
||||
all_tensors_to_one_file=all_tensors_to_one_file,
|
||||
external_data_location=external_data_location,
|
||||
external_data_size_threshold=external_data_size_threshold,
|
||||
external_data_convert_attribute=external_data_convert_attribute,
|
||||
inputs_to_make_channel_last=inputs_to_make_channel_last,
|
||||
outputs_to_make_channel_last=outputs_to_make_channel_last,
|
||||
dynamic_input_shapes=dynamic_input_shapes,
|
||||
exclude_initializer_from_input=exclude_initializer_from_input,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = _parse_arguments()
|
||||
qnn_preprocess_model(
|
||||
args.input_model_path,
|
||||
args.output_model_path,
|
||||
fuse_layernorm=args.fuse_layernorm,
|
||||
save_as_external_data=args.save_as_external_data,
|
||||
all_tensors_to_one_file=args.all_tensors_to_one_file,
|
||||
external_data_location=args.external_data_location,
|
||||
external_data_size_threshold=args.external_data_size_threshold,
|
||||
external_data_convert_attribute=args.external_data_convert_attribute,
|
||||
inputs_to_make_channel_last=args.inputs_to_make_channel_last,
|
||||
outputs_to_make_channel_last=args.outputs_to_make_channel_last,
|
||||
dynamic_input_shapes=args.dynamic_input_shapes,
|
||||
exclude_initializer_from_input=args.exclude_initializer_from_input,
|
||||
)
|
||||
Reference in New Issue
Block a user