Batch processing on tao engine

I have customize one inference script for the mobileNet_V1 classification trained on tao.

I have converted the etlt file in engine using tao-converter for batch size 1 and it is working fine.

below is the working code for batch size 1.

import os
import time

import cv2
#import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import pdb
import codecs
import glob
import datetime
import shutil
import matplotlib.pyplot as plt



# input_shape = (3,236,236)
input_shape = (3,354,354)
fallen_label = ["Fallen","Normal"]

# Input Params



class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine


# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
# def allocate_buffers(engine, batch_size=-1):
def allocate_buffers(engine, batch_size=1):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        # pdb.set_trace()
        size = trt.volume(engine.get_binding_shape(binding)) * batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream



def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    )
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

def model_loading(trt_engine_path):
    # TensorRT logger singleton
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    # trt_engine_path = "/opt/smarg/surveillance_gateway_prod/surveillance_ai_model/x86_64/Secondary_NumberPlateClassification/lpr_us_onnx_b16.engine"

    trt_runtime = trt.Runtime(TRT_LOGGER)
    # pdb.set_trace()
    trt_engine = load_engine(trt_runtime, trt_engine_path)
    # Execution context is needed for inference
    context = trt_engine.create_execution_context()
    # input shape
    context.set_binding_shape(0, input_shape)
    # This allocates memory for network inputs/outputs on both CPU and GPU
    inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
    return inputs, outputs, bindings, stream, context

def preprocess_res18(image):
    # image = np.asarray(image.resize((224, 224), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(trt.float32)).ravel()
    image = np.asarray(image.resize((input_shape[1], input_shape[2]), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(trt.float32)).ravel()
    return image



trt_engine_path = "./Models/V2.2/MobileNet_V1/MobileNetV1_ReTrain_Ep70_ZeroConfAug2_SIZE_416X416_FallenObjectClassification_V2.2_fp16_b1.engine"
camera_frames_path = "./TestSampleFrame/73_43/"
output_folder_path = "./output/73_430_output/"
acc_threshold = 95

cropped_images = "./output/Accuracy_wise_analysis_73_430_output/"

if not os.path.exists(output_folder_path):
    os.mkdir(output_folder_path)

if not os.path.exists(cropped_images):
    os.mkdir(cropped_images)

inputs, outputs, bindings, stream, context = model_loading(trt_engine_path)

cropped_img_count = 1
image_count = 0

import glob
from PIL import Image, ImageDraw

# Function to perform classification on each cropped area and draw rectangles

def classify_and_draw(image, fallen_area, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold):
    try:
        # image = Image.open(image_path)
        draw = ImageDraw.Draw(image)
        
        global cropped_img_count
        global image_count

        box_coordinates = []  # List to store box coordinates

        for area_coordinates in fallen_area:
            x1, y1, x2, y2 = area_coordinates['XMIN'], area_coordinates['YMIN'], area_coordinates['XMAX'], area_coordinates['YMAX']
            if (x2-x1)>60 and (y2-y1)>60:
                
                area_image_cropped = image.crop((x1, y1, x2, y2))
                area_image = preprocess_res18(area_image_cropped)  # Preprocess the cropped area
                # print(area_image.shape)
                np.copyto(inputs[0].host, area_image)  # Copy preprocessed image to input buffer
                
                output = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)  # Perform inference

                max_index_row = np.argmax(output[0], axis=0)
                fallen_normal_acc = int('{:.0f}'.format(output[0][max_index_row]*100))
                fallen_label_info = fallen_label[max_index_row]

                color = "green"
                if fallen_label_info == "Fallen":
                    # color = "yellow"
                    color = "red"
                    if fallen_normal_acc>20:
                        image_name = f"{image_count}.jpg"
                        # crop_img_name = cropped_images + "/" + image_name + "_" + str(cropped_img_count)+"_"+str(fallen_normal_acc)+".jpg"
                        crop_img_name = cropped_images + "/"+str(fallen_normal_acc)+"_" + image_name + "_" + str(cropped_img_count)+".jpg"
                        cropped_img_count += 1
                        area_image_cropped.save(crop_img_name)

                box_coordinates.append((x1, y1, x2, y2, color, fallen_label_info, fallen_normal_acc))

        # Draw all boxes after the loop
        for box_info in box_coordinates:
            x1, y1, x2, y2, color, fallen_label_info, fallen_normal_acc = box_info
            border_width=10
            draw.rectangle([x1, y1, x2, y2], outline=color)
            
            # inner_rect = [x1 + border_width, y1 + border_width, x2 - border_width, y2 - border_width]
            # draw.rectangle(inner_rect,outline=color, fill=None)

            if fallen_normal_acc > acc_threshold:
                draw.text((x1, y1), f"{fallen_label_info} ({fallen_normal_acc}%)", fill=color)

            
        image.save(f"{output_folder_path}/{image_count}.jpg")
        image_count+=1
        print(f"processed images are : {image_count}")
    except Exception as e:
        print("exception as : ",e)
        pass
    

# Wrong violation analysis

# for camera_dir in glob.glob(camera_frames_path+"/*"):
#     camera_name = camera_dir.split("/")[-1]
#     fallen_area_data = fallen_area[100 + int(camera_name_mapping[camera_name])]

#     for image_path in glob.glob(camera_dir+"/*"):
#         classify_and_draw(image_path, fallen_area_data, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold)
    
camera_image_mapping = {
    131:31,
    132:32,
    133:33,
    134:34,
    135:35,
    136:36,
    137:37,
    138:38,
    139:39,
    140:40,
    141:41,
    142:42,
    143:43
}

camera_name_mapping={
    '59_330':31,
    '60_570':32,
    '61_070':33,
    '62_210':34,
    '65_220':35,
    '66_550':36,
    '73_430':37,
    '74_820':38,
    '75_420':39,
    '76_590':40,
    '83_860':41,
    '85_650':43
    
}


# next 59_330 : 31

area = fallen_area[137]
video_path = "./TestSampleFrame/fallen_22March_16_1630/22march_4_430/73+430_DT_2024-03-22_16:00:01.550223_DT_003d.mkv"
frame_interval = 10
frame_count = 0
# Initialize OpenCV video capture
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Unable to open video.")
    exit()

while cap.isOpened():
    ret, frame = cap.read()

    # if not ret:
    #     break  # Break the loop if there are no more frames
    try:
        frame_count+=1
        print(frame_count)
        frame = cv2.resize(frame, (1920, 1080))
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image_pil = Image.fromarray(image_rgb)
        
        if frame_count%frame_interval==0:
            classify_and_draw(image_pil, area, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold)
        else:
            image_pil.save(f"{output_folder_path}/{image_count}.jpg")
    
    except Exception as e:
        print("---",e)

    # Press 'q' to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

print(cap.isOpened())

Can you please suggest how can i modify it for batch size n (n>1) ?
also please suggest how to process batch input before passing to the model.

I have tried but getting size issues while feeding batch input.

Thanks.

11 posts - 2 participants

Read full topic

Batch processing on tao engine

Trending Articles

SANIDAPA LIVE IN HALDADUWANA 2005-06-26

Black Angus Grilled Artichokes

BO RUSSELL BENDER Arrested by Clackamas County Sheriff's Office on Mar 11, 2020

Giorgio Moroder - Music From Battlestar Galactica and Other Original...

'Exceptionally dangerous' rapist Bradley Trengove from Camborne...

Chaoro Lyrics Translation | Mary Kom - Priyanka Chopra

Creating Database from Backup of a Terminated DB System

Tinny — Dzormo (Prod by Hammer)

The 10 Tennessee Cities With The Largest Black Population For 2021

Banks reluctant to lend on 400 Manx homes built in 1970s

मुख मैथुन से उठाएं सेक्स का भरपूर मज़ा, जानें क्या है इसका सही तरीकामुख मैथुन...

Teen Shot In Miami Drive-By Dies From Injuries

Hizia picha za utupu za meneja wa benki imekaaje?

A Bottle of Dew Class 6 Worksheet English Poorvi Chapter 1

[ROM][UNOFFICIAL][x1s][SM-G980F/DS][10] Resurrection Remix v8.6.6 for Samsung...

Our most epic blog yet, 4 stunning, gorgeous Curvy Kate Star In A Bra...

LC4245W - TOSHIBA LCD TV - POWER SUPPLY SCHEMATIC [Circuit Diagram]

JAVARIS FOSTER Arrested by Miami-Dade County Corrections on Feb 01, 2017

Grimsby school staff resign in sex photo shame

Afzal Hai Kul Jahan Se Gharana Hussain Ka