Quantcast
Channel: TAO Toolkit - NVIDIA Developer Forums
Viewing all articles
Browse latest Browse all 497

Batch processing on tao engine

$
0
0

Dear @Morganh,

I have customize one inference script for the mobileNet_V1 classification trained on tao.

I have converted the etlt file in engine using tao-converter for batch size 1 and it is working fine.

below is the working code for batch size 1.

import os
import time

import cv2
#import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import pdb
import codecs
import glob
import datetime
import shutil
import matplotlib.pyplot as plt



# input_shape = (3,236,236)
input_shape = (3,354,354)
fallen_label = ["Fallen","Normal"]

# Input Params



class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine


# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
# def allocate_buffers(engine, batch_size=-1):
def allocate_buffers(engine, batch_size=1):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        # pdb.set_trace()
        size = trt.volume(engine.get_binding_shape(binding)) * batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream



def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    )
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

def model_loading(trt_engine_path):
    # TensorRT logger singleton
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    # trt_engine_path = "/opt/smarg/surveillance_gateway_prod/surveillance_ai_model/x86_64/Secondary_NumberPlateClassification/lpr_us_onnx_b16.engine"

    trt_runtime = trt.Runtime(TRT_LOGGER)
    # pdb.set_trace()
    trt_engine = load_engine(trt_runtime, trt_engine_path)
    # Execution context is needed for inference
    context = trt_engine.create_execution_context()
    # input shape
    context.set_binding_shape(0, input_shape)
    # This allocates memory for network inputs/outputs on both CPU and GPU
    inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
    return inputs, outputs, bindings, stream, context

def preprocess_res18(image):
    # image = np.asarray(image.resize((224, 224), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(trt.float32)).ravel()
    image = np.asarray(image.resize((input_shape[1], input_shape[2]), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(trt.float32)).ravel()
    return image



trt_engine_path = "./Models/V2.2/MobileNet_V1/MobileNetV1_ReTrain_Ep70_ZeroConfAug2_SIZE_416X416_FallenObjectClassification_V2.2_fp16_b1.engine"
camera_frames_path = "./TestSampleFrame/73_43/"
output_folder_path = "./output/73_430_output/"
acc_threshold = 95

cropped_images = "./output/Accuracy_wise_analysis_73_430_output/"

if not os.path.exists(output_folder_path):
    os.mkdir(output_folder_path)

if not os.path.exists(cropped_images):
    os.mkdir(cropped_images)

inputs, outputs, bindings, stream, context = model_loading(trt_engine_path)

cropped_img_count = 1
image_count = 0

import glob
from PIL import Image, ImageDraw

# Function to perform classification on each cropped area and draw rectangles

def classify_and_draw(image, fallen_area, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold):
    try:
        # image = Image.open(image_path)
        draw = ImageDraw.Draw(image)
        
        global cropped_img_count
        global image_count

        box_coordinates = []  # List to store box coordinates

        for area_coordinates in fallen_area:
            x1, y1, x2, y2 = area_coordinates['XMIN'], area_coordinates['YMIN'], area_coordinates['XMAX'], area_coordinates['YMAX']
            if (x2-x1)>60 and (y2-y1)>60:
                
                area_image_cropped = image.crop((x1, y1, x2, y2))
                area_image = preprocess_res18(area_image_cropped)  # Preprocess the cropped area
                # print(area_image.shape)
                np.copyto(inputs[0].host, area_image)  # Copy preprocessed image to input buffer
                
                output = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)  # Perform inference

                max_index_row = np.argmax(output[0], axis=0)
                fallen_normal_acc = int('{:.0f}'.format(output[0][max_index_row]*100))
                fallen_label_info = fallen_label[max_index_row]

                color = "green"
                if fallen_label_info == "Fallen":
                    # color = "yellow"
                    color = "red"
                    if fallen_normal_acc>20:
                        image_name = f"{image_count}.jpg"
                        # crop_img_name = cropped_images + "/" + image_name + "_" + str(cropped_img_count)+"_"+str(fallen_normal_acc)+".jpg"
                        crop_img_name = cropped_images + "/"+str(fallen_normal_acc)+"_" + image_name + "_" + str(cropped_img_count)+".jpg"
                        cropped_img_count += 1
                        area_image_cropped.save(crop_img_name)

                box_coordinates.append((x1, y1, x2, y2, color, fallen_label_info, fallen_normal_acc))

        # Draw all boxes after the loop
        for box_info in box_coordinates:
            x1, y1, x2, y2, color, fallen_label_info, fallen_normal_acc = box_info
            border_width=10
            draw.rectangle([x1, y1, x2, y2], outline=color)
            
            # inner_rect = [x1 + border_width, y1 + border_width, x2 - border_width, y2 - border_width]
            # draw.rectangle(inner_rect,outline=color, fill=None)

            if fallen_normal_acc > acc_threshold:
                draw.text((x1, y1), f"{fallen_label_info} ({fallen_normal_acc}%)", fill=color)

            
        image.save(f"{output_folder_path}/{image_count}.jpg")
        image_count+=1
        print(f"processed images are : {image_count}")
    except Exception as e:
        print("exception as : ",e)
        pass
    

# Wrong violation analysis

# for camera_dir in glob.glob(camera_frames_path+"/*"):
#     camera_name = camera_dir.split("/")[-1]
#     fallen_area_data = fallen_area[100 + int(camera_name_mapping[camera_name])]

#     for image_path in glob.glob(camera_dir+"/*"):
#         classify_and_draw(image_path, fallen_area_data, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold)
    
camera_image_mapping = {
    131:31,
    132:32,
    133:33,
    134:34,
    135:35,
    136:36,
    137:37,
    138:38,
    139:39,
    140:40,
    141:41,
    142:42,
    143:43
}

camera_name_mapping={
    '59_330':31,
    '60_570':32,
    '61_070':33,
    '62_210':34,
    '65_220':35,
    '66_550':36,
    '73_430':37,
    '74_820':38,
    '75_420':39,
    '76_590':40,
    '83_860':41,
    '85_650':43
    
}


# next 59_330 : 31

area = fallen_area[137]
video_path = "./TestSampleFrame/fallen_22March_16_1630/22march_4_430/73+430_DT_2024-03-22_16:00:01.550223_DT_003d.mkv"
frame_interval = 10
frame_count = 0
# Initialize OpenCV video capture
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Unable to open video.")
    exit()

while cap.isOpened():
    ret, frame = cap.read()

    # if not ret:
    #     break  # Break the loop if there are no more frames
    try:
        frame_count+=1
        print(frame_count)
        frame = cv2.resize(frame, (1920, 1080))
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image_pil = Image.fromarray(image_rgb)
        
        if frame_count%frame_interval==0:
            classify_and_draw(image_pil, area, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold)
        else:
            image_pil.save(f"{output_folder_path}/{image_count}.jpg")
    
    except Exception as e:
        print("---",e)

    # Press 'q' to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

print(cap.isOpened())

Can you please suggest how can i modify it for batch size n (n>1) ?
also please suggest how to process batch input before passing to the model.

I have tried but getting size issues while feeding batch input.

Thanks.

11 posts - 2 participants

Read full topic


Viewing all articles
Browse latest Browse all 497

Trending Articles