<a href="https://colab.research.google.com/github/sanyam83/learnopencv/blob/master/ALPR/ALPR_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

In [None]:
# Cloning DeepSORT
!git clone https://github.com/nwojke/deep_sort.git

The original DeepSORT repo uses a deprecated sklearn function called `linear_assignment`, which needs to be replaced for error free execution of code with scipy.



1.   Open ./deep_sort/deep_sort/linear_assignment.py
2.   Replace `from sklearn.utils.linear_assignment_ import linear_assignment` in line 4 with `from scipy.optimize import linear_sum_assignment`.

3.   Replace `indices = linear_assignment(cost_matrix)` in line 58 with the following lines of code:
```
  indices = linear_sum_assignment(cost_matrix)
  indices = np.asarray(indices)
  indices = np.transpose(indices)
```





Also, rename ./deep_sort/tools as ./deep_sort/tools_deepsort to avoid any name overlapping.

In [None]:
# DeepSORT imports.
%cd ./deep_sort
from application_util import preprocessing
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools_deepsort import generate_detections as gdet
import uuid

## OCR

##Installing requirements

In [None]:
%cd ../

In [None]:
# No need to install if already installed requirements.txt
!pip install paddlepaddle-gpu
!pip install "paddleocr>=2.0.1"

## Importing libraries

In [None]:
from paddleocr import PaddleOCR
ocr = PaddleOCR(lang='en',rec_algorithm='CRNN')

## Utility functions

## Detector

Continuing process from License plate detection [notebook](https://colab.research.google.com/github/sanyam83/learnopencv/blob/master/ALPR/License_plate_detection_YOLOv4.ipynb). (Assuming the files and weights are now created)

In [None]:
# Importing libraries and required functionalities.
%cd ./darknet
import os
import glob
import random
import time
import cv2
import numpy as np
import darknet
import subprocess
import uuid

import sys
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

# Darknet object detector imports.
from darknet_images import load_images
from darknet_images import image_detection

In [4]:
%cd ../
# Add absolute paths according to your folder strcuture.
# Declaring important variables.
# Path of Configuration file of YOLOv4.
config_file = '/content/gdrive/MyDrive/yolov4-darknet/darknet/cfg/yolov4-obj.cfg'
# Path of obj.data file.
data_file = '/content/gdrive/MyDrive/yolov4-darknet/darknet/data/obj.data'
# Batch size of data passed to the detector.
batch_size = 1
# Path to trained YOLOv4 weights.
weights = '/content/gdrive/MyDrive/yolov4-darknet/checkpoint/yolov4-obj_best.weights'
# Confidence threshold.
thresh = 0.6

In [5]:
# Variables storing colors and fonts.
font = cv2.FONT_HERSHEY_SIMPLEX
blue_color = (255,0,0)
white_color = (255,255,255)
black_color = (0,0,0)
green_color = (0,255,0)
yellow_color = (178, 247, 218)


## Utility functions

In [6]:
def crop(image, coord):
  # Cropping is done by -> image[y1:y2, x1:x2].
  cr_img = image[int(coord[1]):int(coord[3]), int(coord[0]):int(coord[2])]
  return cr_img

In [26]:
def resize_bbox(detections, out_size, in_size):
  coord = []
  scores = []

  # Scaling the bounding boxes to the different size
  for det in detections:
    points = list(det[2])
    conf = det[1]
    xmin, ymin, xmax, ymax = darknet.bbox2points(points)
    y_scale = float(out_size[0]) / in_size[0]
    x_scale = float(out_size[1]) / in_size[1]
    ymin = int(y_scale * ymin)
    ymax = int(y_scale * ymax)
    xmin = int(x_scale * xmin) if int(x_scale * xmin) > 0 else 0
    xmax = int(x_scale * xmax)
    final_points = [xmin, ymin, xmax-xmin, ymax-ymin]
    scores.append(conf)
    coord.append(final_points)
  return coord, scores

In [20]:
def yolo_det(frame, config_file, data_file, batch_size, weights, threshold, output, network, class_names, class_colors, save = False, out_path = ''):

  prev_time = time.time()
  
  # Preprocessing the input image.
  width = darknet.network_width(network)
  height = darknet.network_height(network)
  darknet_image = darknet.make_image(width, height, 3)
  image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  image_resized = cv2.resize(image_rgb, (width, height))
  
  # Passing the image to the detector and store the detections
  darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
  detections = darknet.detect_image(network, class_names, darknet_image, thresh=threshold)
  darknet.free_image(darknet_image)

  # Plotting the deetections using darknet in-built functions
  image = darknet.draw_boxes(detections, image_resized, class_colors)
  print(detections)
  if save:
    im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    file_name = out_path + str(uuid.uuid4()) +'-det.jpg'
    cv2.imwrite(os.path.join(output, file_name), im)

  # Calculating time taken and FPS for detection
  det_time = time.time() - prev_time
  fps = int(1/(time.time() - prev_time))
  print("Detection time: {}".format(det_time))
  
  # Resizing predicted bounding box from 416x416 to input image resolution
  out_size = frame.shape[:2]
  in_size = image_resized.shape[:2]
  if detections:
    coord, scores = resize_bbox(detections, out_size, in_size)
    return coord, scores, det_time
  else:
    scores = 0
    return detections,scores, det_time 

In [21]:
def get_best_ocr(preds, rec_conf, ocr_res, track_id):
  for info in preds:
    # Check if it is current track id
    if info['track_id'] == track_id:
      # Check if the ocr confidenence is maximum or not
      if info['ocr_conf'] < rec_conf:
        info['ocr_conf'] = rec_conf
        info['ocr_txt'] = ocr_res
      else:
        rec_conf = info['ocr_conf']
        ocr_res = info['ocr_txt']
      break
  return preds, rec_conf, ocr_res

#Inference

## Fucntion for inferencing on images 

In [10]:
def test_img(input, config_file, weights, out_path):
  # Loading darknet network and classes along with the bbox colors.
  network, class_names, class_colors = darknet.load_network(
            config_file,
            data_file,
            weights,
            batch_size= batch_size
        )
  
  # Reading the image and performing YOLOv4 detection. 
  img = cv2.imread(input)
  bboxes, scores, det_time = yolo_det(img, config_file, data_file, batch_size, weights, thresh, out_path, network, class_names, class_colors)

  # Extracting or cropping the license plate and applying the OCR.
  for bbox in bboxes:
    bbox = [bbox[0], bbox[1], bbox[2]- bbox[0], bbox[3] - bbox[1]]
    cr_img = crop(img, bbox)
    result = ocr.ocr(cr_img, cls=False, det=False)
    ocr_res = result[0][0]
    rec_conf = result[0][1]
    print(result)
    # Plotting the predictions using OpenCV.
    (label_width,label_height), baseline = cv2.getTextSize(ocr_res , font, 2, 3)
    top_left = tuple(map(int,[int(bbox[0]),int(bbox[1])-(label_height+baseline)]))
    top_right = tuple(map(int,[int(bbox[0])+label_width,int(bbox[1])]))
    org = tuple(map(int,[int(bbox[0]),int(bbox[1])-baseline]))

    cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), blue_color, 2)
    cv2.rectangle(img, top_left, top_right, blue_color,-1)
    cv2.putText(img, ocr_res, org, font, 2, white_color,3)

  # Writing output image.
  file_name = os.path.join(out_path, 'out_' + input.split('/')[-1])
  cv2.imwrite(file_name, img)

In [None]:
test_img('/content/72b88b2f-33b4-49e6-84ba-32b7df5a181c.jpg', config_file, weights, '/content/')

## Function for inferencing on videos

In [17]:
def test_vid(vid_dir, config_file, weights,out_path):
  # Declaring variables for video processing.
  cap = cv2.VideoCapture(vid_dir)
  codec = cv2.VideoWriter_fourcc(*'XVID')
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  fps = int(cap.get(cv2.CAP_PROP_FPS))
  file_name = os.path.join(out_path, 'out_' + vid_dir.split('/')[-1])
  out = cv2.VideoWriter(file_name, codec, fps, (width, height))
  
  # Frame count variable.
  ct = 0
  
  # Loading darknet network and classes along with the bbox colors.
  network, class_names, class_colors = darknet.load_network(
          config_file,
          data_file,
          weights,
          batch_size= batch_size
      )
  
  # Reading video frame by frame.
  while(cap.isOpened()):
    ret, img = cap.read()
    if ret == True:
        print(ct)

        # Noting time for calculating FPS.
        prev_time = time.time()

        # Performing the YOLOv4 detection.
        bboxes, scores, det_time = yolo_det(img, config_file, data_file, batch_size, weights, thresh, out_path, network, class_names, class_colors)
        
        # Extracting or cropping the license plate and applying the OCR.
        if list(bboxes):
          for bbox in bboxes:
            bbox = [bbox[0], bbox[1], bbox[2]- bbox[0], bbox[3] - bbox[1]]
            cr_img = crop(img, bbox)
            
            result = ocr.ocr(cr_img, cls=False, det=False)

            print(result)
            ocr_res = result[0][0]
            rec_conf = result[0][1]

            # Plotting the predictions using OpenCV.
            txt = ocr_res
            (label_width,label_height), baseline = cv2.getTextSize(ocr_res , font,2,3)
            top_left = tuple(map(int,[int(bbox[0]),int(bbox[1])-(label_height+baseline)]))
            top_right = tuple(map(int,[int(bbox[0])+label_width,int(bbox[1])]))
            org = tuple(map(int,[int(bbox[0]),int(bbox[1])-baseline]))

            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), blue_color, 2)
            cv2.rectangle(img, top_left, top_right, blue_color, -1)
            cv2.putText(img,txt, org, font, 2, white_color, 3)
            #cv2.imwrite('/content/{}.jpg'.format(ct), img)

          # Calculating time taken and FPS for the whole process.
          tot_time = time.time() - prev_time
          fps = 1/tot_time
          
          # Writing information onto the frame and saving it to be processed in a video.
          cv2.putText(img, 'frame: %d fps: %s' % (ct, fps),
                  (0, int(100 * 1)), cv2.FONT_HERSHEY_PLAIN, 5, (0, 0, 255), thickness=2)
          out.write(img)
        
        ct = ct + 1
    else:
      break

In [None]:
input_dir = 'Pexels Videos 2103099.mp4'
out_path = '/content/'
test_vid(input_dir, config_file, weights,out_path)

## Function for integrating Tracker

Download pretrained deep association metric model called `mars-small128.pb`, can be downloaded from [here](https://drive.google.com/drive/folders/1n0jB3zwJysi6YDi4n0HVKz5yOZ0eNA2B?usp=sharing) and put under ./model_data/mars-small128.pb

In [23]:
def tracker_test_vid(vid_dir, config_file, weights,out_path):
  # Declaring variables for video processing.
  cap = cv2.VideoCapture(vid_dir)
  codec = cv2.VideoWriter_fourcc(*'XVID')
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  fps = int(cap.get(cv2.CAP_PROP_FPS))
  file_name = os.path.join(out_path, 'out_' + vid_dir.split('/')[-1])

  out = cv2.VideoWriter(file_name, codec, fps, (width, height))

  # Declaring variables for tracker.
  max_cosine_distance = 0.4
  nn_budget = None
  
  # Intializing tracker
  model_filename = './model_data/mars-small128.pb'
  encoder = gdet.create_box_encoder(model_filename, batch_size=1)
  metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
  tracker = Tracker(metric)
  
  # Initializing some helper variables.
  ct = 0
  preds = []
  total_obj = 0
  rec_tot_time = 1
  alpha = 0.5
  
  # Loading darknet network and classes along with the bbox colors.
  network, class_names, class_colors = darknet.load_network(
          config_file,
          data_file,
          weights,
          batch_size= batch_size
      )
  
  # Reading video frame by frame.
  while(cap.isOpened()):
    ret, img = cap.read()
    if ret == True:

        h, w = img.shape[:2]
        print(ct)
        
        w_scale = w/1.55
        h_scale = h/17

        # Method to blend two images, here used to make the information box transparent.
        overlay_img = img.copy()
        cv2.rectangle(img, (int(w_scale), 0), (w, int(h_scale*3.4)), (0,0,0), -1)
        cv2.addWeighted(img, alpha, overlay_img, 1 - alpha, 0, overlay_img)

        # Noting time for calculating FPS.
        prev_time = time.time()

        # Performing the YOLOv4 detection.
        bboxes, scores, det_time = yolo_det(img, config_file, data_file, batch_size, weights, thresh, out_path, network, class_names, class_colors)
        
        if list(bboxes):
          # Getting appearence features of the object.
          features = encoder(img, bboxes)
          # Storing all the required info in a list.
          detections = [Detection(bbox, score, feature) for bbox, score, feature in zip(bboxes, scores, features)]

          # Applying tracker.
          # The tracker code flow: kalman filter -> target association(using hungarian algorithm) and appearance descriptor.
          tracker.predict()
          tracker.update(detections)
          track_time = time.time() - prev_time
          
          # Checking if tracks exist.
          for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            # Changing track bbox to top left, bottom right coordinates
            bbox = list(track.to_tlbr())
            
            for i in range(len(bbox)):
              if bbox[i] < 0:
                bbox[i] = 0

            # Extracting or cropping the license plate and applying the OCR.
            cr_img = crop(img, bbox)
            
            rec_pre_time = time.time()
            result = ocr.ocr(cr_img, cls=False, det=False)
            rec_tot_time = time.time() - rec_pre_time

            ocr_res = result[0][0]
            print(result)
            rec_conf = result[0][1]
            
            if rec_conf == 'nan':
              rec_conf = 0

            # Storing the ocr output for corresponding track id.
            output_frame = {"track_id":track.track_id, "ocr_txt":ocr_res, "ocr_conf":rec_conf}
            
            # Appending track_id to list only if it does not exist in the list.
            if track.track_id not in list(set(ele['track_id'] for ele in preds)):
              total_obj = total_obj + 1
              preds.append(output_frame)
            # Looking for the current track in the list and updating the highest confidence of it.
            else:
              preds, rec_conf, ocr_res = get_best_ocr(preds, rec_conf, ocr_res, track.track_id)
  
            # Plotting the predictions using OpenCV.
            txt = str(track.track_id) + '. ' + ocr_res
            (label_width,label_height), baseline = cv2.getTextSize(txt , font,2,3)
            top_left = tuple(map(int,[int(bbox[0]),int(bbox[1])-(label_height+baseline)]))
            top_right = tuple(map(int,[int(bbox[0])+label_width,int(bbox[1])]))
            org = tuple(map(int,[int(bbox[0]),int(bbox[1])-baseline]))

            cv2.rectangle(overlay_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), blue_color, 2)
            cv2.rectangle(overlay_img, top_left, top_right, blue_color, -1)
            cv2.putText(overlay_img,txt, org, font, 2, white_color, 3)
            #cv2.imwrite('/content/{}.jpg'.format(ct), img)

          # Calculating time taken and FPS for the whole process.
          tot_time = time.time() - prev_time
          fps = 1/tot_time
          
          # Writing information onto the frame and saving the frame to be processed into a video with title and values of different colors.
          if w < 2000:
            size = 1
          else:
            size = 2

          # Plotting frame count information on the frame.
          (label_width,label_height), baseline = cv2.getTextSize('Frame count:' , font,size,2)
          top_left = (int(w_scale) + 10, int(h_scale))
          cv2.putText(overlay_img, 'Frame count:', top_left, font, size, green_color, thickness=2)
          
          top_left_r1 = (int(w_scale) + 10 + label_width, int(h_scale))
          cv2.putText(overlay_img,'%d ' % (ct), top_left_r1, font, size, yellow_color, thickness=2)

          (label_width,label_height), baseline = cv2.getTextSize('Frame count:' + ' ' + str(ct) , font, size,2)
          top_left_r1 = (int(w_scale) + 10 + label_width, int(h_scale))
          cv2.putText(overlay_img, 'Total FPS:' , top_left_r1, font, size, green_color, thickness=2)

          (label_width,label_height), baseline = cv2.getTextSize('Frame count:' + ' ' + str(ct) + 'Total FPS:' , font, size,2)
          top_left_r1 = (int(w_scale) + 10 + label_width, int(h_scale))
          cv2.putText(overlay_img, '%s' % (int(fps)), top_left_r1, font, size, yellow_color, thickness=2)

          # Plotting Total FPS of ANPR information on the frame.
          cv2.putText(overlay_img, 'Detection FPS:' ,(top_left[0], int(h_scale*1.7)), font, size, green_color, thickness=2)
          (label_width,label_height), baseline = cv2.getTextSize('Detection FPS:', font,size,2)
          cv2.putText(overlay_img, '%d' % ((int(1/det_time))),(top_left[0] + label_width, int(h_scale*1.7)), font, size, yellow_color, thickness=2)

          # Plotting Recognition/OCR FPS of ANPR on the frame.
          cv2.putText(overlay_img, 'Recognition FPS:',(top_left[0], int(h_scale*2.42)), font, size, green_color, thickness=2)
          (label_width,label_height), baseline = cv2.getTextSize('Recognition FPS:', font,size,2)
          cv2.putText(overlay_img, '%s' % ((int(1/rec_tot_time))),(top_left[0] + label_width, int(h_scale*2.42)), font, size, yellow_color, thickness=2)
          cv2.imwrite('/content/{}.jpg'.format(ct), overlay_img)
          out.write(overlay_img)
        
        # Increasing frame count.
        ct = ct + 1
    else:
      break

**Test Vid**

In [24]:
input_dir = 'Pexels Videos 2103099.mp4'
out_path = '/content/'

/content/gdrive/.shortcut-targets-by-id/10yv_NIHNqOqSr7E7W2L29BzNZWVT1kPR/ALPR-ocr


In [None]:
tracker_test_vid(input_dir, config_file, weights,out_path)