Tensort之uff
阅读原文时间:2023年07月10日阅读:1

# This sample uses a UFF MNIST model to create a TensorRT Inference Engine
from random import randint
from PIL import Image
import numpy as np

import pycuda.driver as cuda

This import causes pycuda to automatically manage CUDA context creation and cleanup.

import pycuda.autoinit

import tensorrt as trt
import time

import sys, os
sys.path.insert(1, os.path.join(sys.path[0], ".."))
import common

You can set the logger severity higher to suppress messages (or lower to display more messages).

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

batch_size = 128

class ModelData(object):
MODEL_FILE = os.path.join(os.path.dirname(__file__), "model2/frozen_model.uff")
INPUT_NAME ="input_1"
INPUT_SHAPE = (3, 256, 256)
OUTPUT_NAME = 'predictions/Softmax'
DTYPE = trt.float32

def build_engine(model_file):
# For more information on TRT basics, refer to the introductory samples.
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
builder.max_batch_size = batch_size
builder.max_workspace_size = common.GiB(1)
# Parse the Uff Network
parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
parser.register_output(ModelData.OUTPUT_NAME)
parser.parse(model_file, network)
# Build and return an engine.
return builder.build_cuda_engine(network)

Loads a test case into the provided pagelocked_buffer.

def load_normalized_test_case(data_path, pagelocked_buffer, case_num=randint(0, 9)):

test_case_path = os.path.join(data_path, str(case_num) + ".pgm")

# Flatten the image into a 1D array, normalize, and copy to pagelocked memory.  
def normalize\_image(image):  
    # Resize, antialias and transpose the image to CHW.  
    c, h, w = ModelData.INPUT\_SHAPE  
    return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose(\[2, 0, 1\]).astype(trt.nptype(ModelData.DTYPE))  
test\_case\_path = "lena.jpg"  
img = normalize\_image(Image.open(test\_case\_path))  
img\_array = \[\]  
for i in range(batch\_size):  
    img\_array.append(img)  
img\_array = np.array(img\_array, dtype=trt.nptype(ModelData.DTYPE))  
img\_array = img\_array.ravel()  
np.copyto(pagelocked\_buffer, img\_array)  
return case\_num

def main():

data_path = common.find_sample_data(description="Runs an MNIST network using a UFF model file", subfolder="mnist")

data\_path = "/home/bjxiangboren/tools/TensorRT-5.0.2.6/data/mnist/"  
model\_file = ModelData.MODEL\_FILE

with open("inception_batch.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:

engine = runtime.deserialize_cuda_engine(f.read())

with build\_engine(model\_file) as engine:  
    # Build an engine, allocate buffers and create a stream.  
    # For more information on buffer allocation, refer to the introductory samples.  
    with open("inception\_batch.engine", "wb") as f:  
        f.write(engine.serialize())  
    inputs, outputs, bindings, stream = common.allocate\_buffers(engine)  
    with engine.create\_execution\_context() as context:  
        case\_num = load\_normalized\_test\_case(data\_path, pagelocked\_buffer=inputs\[0\].host)  
        # For more information on performing inference, refer to the introductory samples.  
        # The common.do\_inference function will return a list of outputs - we only have one in this case.  
        while True:  
            start\_time = time.time()  
            \[output\] = common.do\_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch\_size=batch\_size)  
            end\_time = time.time()  
            print("time dis is %s" % (end\_time - start\_time))  

output = output.reshape((30,1001))

print output

print output.shape

print np.argmax(output, axis=1)

pred = np.argmax(output)

print("Test Case: " + str(case_num))

print("Prediction: " + str(pred))

if __name__ == '__main__':
main()

1、首先将pb转为uff格式的模型

python  /usr/lib/python3.5/dist-packages/uff/bin/convert_to_uff.py --input_file models/lenet5.pb

2、使用trt engine加速

这个加速还是挺明显的,但转换后的模型无法使用tfservign了,只能用tensorrt自己的engine。

参考:https://devtalk.nvidia.com/default/topic/1044466/tensorrt/uff-inference-time-large-than-pb-time-when-process-vgg-19/

https://blog.csdn.net/zong596568821xp/article/details/86077553

https://blog.csdn.net/g11d111/article/details/92061884

https://mp.weixin.qq.com/s/Ps49ZTfJprcOYrc6xo-gLg?