Quantcast
Channel: TAO Toolkit - NVIDIA Developer Forums
Viewing all articles
Browse latest Browse all 497

Wrong results while running multi task classification model with custom code

$
0
0

Dear @Morganh and Team,

I have trained 1 multitask classification model with resnet 10.

Below is the classes.
{"tasks": ["helmet", "shoes", "vest"], "class_mapping": {"helmet": {"0": "Available", "1": "None"}, "shoes": {"0": "Available", "1": "None"}, "vest": {"0": "Available", "1": "None"}}}

I am getting the good accuracy on validation data.

Total Val Loss: 0.9775297530404814
Tasks: ['helmet', 'shoes', 'vest']
Val loss per task: [0.05684200308674558, 0.01992518341385692, 0.023936687715554805]
Val acc per task: [0.9862637362637363, 0.9937205651491365, 0.9929356357927787]
Execution status: PASS

I have converted the model using tao-converter to use engine file in custom script.

below is etlt model conversion command.

./tao-converter ./model/PPE_KIT_MultiTask_TrainedEp080_Resnet10_V1.8.etlt -k actual_key  -o helmet/Softmax,shoes/Softmax,vest/Softmax -d 3,224,224 -i nchw -e ./model/PPE_KIT_MultiTask_TrainedEp080_Resnet10_V1.8_fp16.engine -m 1 -t fp16 -b 1

Below is the script.

import numpy as np
import cv2
import pycuda.driver as cuda
import pycuda.autoinit  # This is needed for initializing CUDA driver
import tensorrt as trt
import glob
import os

# Function to load and preprocess an image
def preprocess_image(image_path):
    # Load image
    image = cv2.imread(image_path)
    # Resize image to match the input size of the model
    resized_image = cv2.resize(image, (224, 224))
    # Convert image to RGB and normalize
    normalized_image = resized_image.astype(np.float32) / 255.0
    # Change image layout from HWC to CHW
    chw_image = np.transpose(normalized_image, (2, 0, 1))
    # Add batch dimension
    batched_image = np.expand_dims(chw_image, axis=0)
    return batched_image
class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
# def allocate_buffers(engine, batch_size=1):
def allocate_buffers(engine, batch_size=1):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    
    for binding in engine:
        # pdb.set_trace()
        size = trt.volume(engine.get_binding_shape(binding)) * batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream

def model_loading(trt_engine_path):
    # TensorRT logger singleton
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    # trt_engine_path = "/opt/smarg/surveillance_gateway_prod/surveillance_ai_model/x86_64/Secondary_NumberPlateClassification/lpr_us_onnx_b16.engine"

    trt_runtime = trt.Runtime(TRT_LOGGER)
    # pdb.set_trace()
    trt_engine = load_engine(trt_runtime, trt_engine_path)
    # Execution context is needed for inference
    context = trt_engine.create_execution_context()
    # NPR input shape
    input_shape = (1,3,224,224)
    context.set_binding_shape(0, input_shape)
    # This allocates memory for network inputs/outputs on both CPU and GPU
    inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
    return inputs, outputs, bindings, stream, context


def do_model_2_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    )
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]


trt_engine_path = "/root/data/TAO-MODEL-CONVERTER/model/PPE_KIT_MultiTask_TrainedEp080_Resnet10_V1.8_fp16.engine"

inputs_model_2, outputs_model_2, bindings_model_2, stream_model_2, context_model_2 = model_loading(trt_engine_path)


folder_path = '/root/data/Pritam/Script/data_ppekit/croppedImages_data/halmet1/'
image_paths = glob.glob(folder_path + '/*.jpg')  # Change the extension if needed

for image_path in image_paths:
    # Load and preprocess an image
    input_data = preprocess_image(image_path)
    # image = cv2.imread(image_path)
    # image = [image]
    # image = np.array([(cv2.resize(img, ( 224 , 224 )))/ 255.0 for img in image], dtype=np.float32)
    # image= image.transpose( 0 , 3 , 1 , 2 )
    
    np.copyto(inputs_model_2[0].host, input_data.ravel())

    outputs = do_model_2_inference(context_model_2, bindings=bindings_model_2, inputs=inputs_model_2, outputs=outputs_model_2, stream=stream_model_2)

    print(outputs)

    # outputs_model_2.clear()

    # predicted_class = np.argmax(outputs)
    # print("Predicted class for", image_path, ":", predicted_class)

The above script giving almost same value for the every images. Please suggest where is the problem. either with the model conversion or image preprocessing before passing to model.

below it the output.

[array([9.2172623e-04, 9.9902344e-01], dtype=float32), array([0.12335205, 0.87646484], dtype=float32), array([0.21118164, 0.7890625 ], dtype=float32)]
[array([9.2172623e-04, 9.9902344e-01], dtype=float32), array([0.12347412, 0.87646484], dtype=float32), array([0.21179199, 0.78808594], dtype=float32)]
[array([8.8977814e-04, 9.9902344e-01], dtype=float32), array([0.12011719, 0.8798828 ], dtype=float32), array([0.2088623, 0.7910156], dtype=float32)]
[array([9.1457367e-04, 9.9902344e-01], dtype=float32), array([0.12390137, 0.87597656], dtype=float32), array([0.20825195, 0.7915039 ], dtype=float32)]
[array([9.1123581e-04, 9.9902344e-01], dtype=float32), array([0.12231445, 0.8779297 ], dtype=float32), array([0.21044922, 0.7895508 ], dtype=float32)]
[array([8.1682205e-04, 9.9902344e-01], dtype=float32), array([0.11376953, 0.88623047], dtype=float32), array([0.19836426, 0.8017578 ], dtype=float32)]
[array([8.8977814e-04, 9.9902344e-01], dtype=float32), array([0.11993408, 0.8798828 ], dtype=float32), array([0.2088623, 0.7910156], dtype=float32)]
[array([8.7261200e-04, 9.9902344e-01], dtype=float32), array([0.11889648, 0.88134766], dtype=float32), array([0.20605469, 0.7939453 ], dtype=float32)]
[array([8.7594986e-04, 9.9902344e-01], dtype=float32), array([0.11938477, 0.8803711 ], dtype=float32), array([0.20568848, 0.7944336 ], dtype=float32)]
[array([9.2887878e-04, 9.9902344e-01], dtype=float32), array([0.12408447, 0.87597656], dtype=float32), array([0.21191406, 0.78808594], dtype=float32)]
[array([9.0026855e-04, 9.9902344e-01], dtype=float32), array([0.12072754, 0.87939453], dtype=float32), array([0.21044922, 0.7895508 ], dtype=float32)]
[array([8.8310242e-04, 9.9902344e-01], dtype=float32), array([0.11999512, 0.8798828 ], dtype=float32), array([0.20800781, 0.7919922 ], dtype=float32)]
[array([9.1266632e-04, 9.9902344e-01], dtype=float32), array([0.1237793 , 0.87597656], dtype=float32), array([0.21105957, 0.7890625 ], dtype=float32)]
[array([9.1648102e-04, 9.9902344e-01], dtype=float32), array([0.12261963, 0.8774414 ], dtype=float32), array([0.21154785, 0.7885742 ], dtype=float32)]
[array([8.7594986e-04, 9.9902344e-01], dtype=float32), array([0.11932373, 0.8808594 ], dtype=float32), array([0.20629883, 0.79345703], dtype=float32)]
[array([9.1838837e-04, 9.9902344e-01], dtype=float32), array([0.12219238, 0.8779297 ], dtype=float32), array([0.2121582 , 0.78808594], dtype=float32)]
[array([9.1457367e-04, 9.9902344e-01], dtype=float32), array([0.12249756, 0.8774414 ], dtype=float32), array([0.21179199, 0.78808594], dtype=float32)]
[array([9.0408325e-04, 9.9902344e-01], dtype=float32), array([0.12176514, 0.87841797], dtype=float32), array([0.20861816, 0.7915039 ], dtype=float32)]
[array([8.2492828e-04, 9.9902344e-01], dtype=float32), array([0.11376953, 0.88623047], dtype=float32), array([0.2006836, 0.7993164], dtype=float32)]
[array([9.0742111e-04, 9.9902344e-01], dtype=float32), array([0.12200928, 0.8779297 ], dtype=float32), array([0.21105957, 0.7890625 ], dtype=float32)]
[array([8.7451935e-04, 9.9902344e-01], dtype=float32), array([0.11877441, 0.88134766], dtype=float32), array([0.2076416 , 0.79248047], dtype=float32)]
[array([8.9693069e-04, 9.9902344e-01], dtype=float32), array([0.11981201, 0.8803711 ], dtype=float32), array([0.21105957, 0.7890625 ], dtype=float32)]
[array([8.6593628e-04, 9.9902344e-01], dtype=float32), array([0.1184082 , 0.88183594], dtype=float32), array([0.20666504, 0.79345703], dtype=float32)]
[array([8.8977814e-04, 9.9902344e-01], dtype=float32), array([0.11920166, 0.8808594 ], dtype=float32), array([0.20935059, 0.79052734], dtype=float32)]
[array([9.3460083e-04, 9.9902344e-01], dtype=float32), array([0.12493896, 0.875     ], dtype=float32), array([0.21289062, 0.7871094 ], dtype=float32)]
[array([8.8977814e-04, 9.9902344e-01], dtype=float32), array([0.11938477, 0.8803711 ], dtype=float32), array([0.2088623, 0.7910156], dtype=float32)]
[array([9.0742111e-04, 9.9902344e-01], dtype=float32), array([0.12158203, 0.87841797], dtype=float32), array([0.21081543, 0.7890625 ], dtype=float32)]
[array([9.0551376e-04, 9.9902344e-01], dtype=float32), array([0.12115479, 0.87890625], dtype=float32), array([0.21081543, 0.7890625 ], dtype=float32)]
[array([8.7118149e-04, 9.9902344e-01], dtype=float32), array([0.11950684, 0.8803711 ], dtype=float32), array([0.20532227, 0.7944336 ], dtype=float32)]
[array([8.9168549e-04, 9.9902344e-01], dtype=float32), array([0.12176514, 0.87841797], dtype=float32), array([0.20715332, 0.79296875], dtype=float32)]
[array([8.9168549e-04, 9.9902344e-01], dtype=float32), array([0.12145996, 0.87841797], dtype=float32), array([0.20703125, 0.79296875], dtype=float32)]
[array([9.2697144e-04, 9.9902344e-01], dtype=float32), array([0.12322998, 0.8769531 ], dtype=float32), array([0.21289062, 0.7871094 ], dtype=float32)]
[array([8.6259842e-04, 9.9902344e-01], dtype=float32), array([0.11877441, 0.88134766], dtype=float32), array([0.20605469, 0.7939453 ], dtype=float32)]

Please suggest where are the gaps…

Thanks…

1 post - 1 participant

Read full topic


Viewing all articles
Browse latest Browse all 497

Trending Articles