# This sample uses a UFF MNIST model to create a TensorRT Inference Engine
from random import randint
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import time
import sys, os
sys.path.insert(1, os.path.join(sys.path[0], ".."))
import common
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
batch_size = 128
class ModelData(object):
MODEL_FILE = os.path.join(os.path.dirname(__file__), "model2/frozen_model.uff")
INPUT_NAME ="input_1"
INPUT_SHAPE = (3, 256, 256)
OUTPUT_NAME = 'predictions/Softmax'
DTYPE = trt.float32
def build_engine(model_file):
# For more information on TRT basics, refer to the introductory samples.
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
builder.max_batch_size = batch_size
builder.max_workspace_size = common.GiB(1)
# Parse the Uff Network
parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
parser.register_output(ModelData.OUTPUT_NAME)
parser.parse(model_file, network)
# Build and return an engine.
return builder.build_cuda_engine(network)
def load_normalized_test_case(data_path, pagelocked_buffer, case_num=randint(0, 9)):
# Flatten the image into a 1D array, normalize, and copy to pagelocked memory.
def normalize\_image(image):
# Resize, antialias and transpose the image to CHW.
c, h, w = ModelData.INPUT\_SHAPE
return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose(\[2, 0, 1\]).astype(trt.nptype(ModelData.DTYPE))
test\_case\_path = "lena.jpg"
img = normalize\_image(Image.open(test\_case\_path))
img\_array = \[\]
for i in range(batch\_size):
img\_array.append(img)
img\_array = np.array(img\_array, dtype=trt.nptype(ModelData.DTYPE))
img\_array = img\_array.ravel()
np.copyto(pagelocked\_buffer, img\_array)
return case\_num
def main():
data\_path = "/home/bjxiangboren/tools/TensorRT-5.0.2.6/data/mnist/"
model\_file = ModelData.MODEL\_FILE
with build\_engine(model\_file) as engine:
# Build an engine, allocate buffers and create a stream.
# For more information on buffer allocation, refer to the introductory samples.
with open("inception\_batch.engine", "wb") as f:
f.write(engine.serialize())
inputs, outputs, bindings, stream = common.allocate\_buffers(engine)
with engine.create\_execution\_context() as context:
case\_num = load\_normalized\_test\_case(data\_path, pagelocked\_buffer=inputs\[0\].host)
# For more information on performing inference, refer to the introductory samples.
# The common.do\_inference function will return a list of outputs - we only have one in this case.
while True:
start\_time = time.time()
\[output\] = common.do\_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch\_size=batch\_size)
end\_time = time.time()
print("time dis is %s" % (end\_time - start\_time))
if __name__ == '__main__':
main()
1、首先将pb转为uff格式的模型
python /usr/lib/python3.5/dist-packages/uff/bin/convert_to_uff.py --input_file models/lenet5.pb
2、使用trt engine加速
这个加速还是挺明显的,但转换后的模型无法使用tfservign了,只能用tensorrt自己的engine。
https://blog.csdn.net/zong596568821xp/article/details/86077553
手机扫一扫
移动阅读更方便
你可能感兴趣的文章