In [None]:
import os
import glob as glob
import cv2
import matplotlib.pyplot as plt
import requests
import zipfile

## Constant/Config Setup

In [None]:
# Whether to carry out training or not,
# if `false`, running the whole notebook will carry out
# inference on the last trained model automatically
TRAIN = True
# Number of epochs to train for.
EPOCHS = 5

## Ultralytics Setup

In [None]:
#### RUN THIS ONCE TO CLONE THE YOLOV5 REPO ####
if not os.path.exists('yolov5'):
    !git clone https://github.com/ultralytics/yolov5.git

Cloning into 'yolov5'...
remote: Enumerating objects: 10239, done.[K
remote: Total 10239 (delta 0), reused 0 (delta 0), pack-reused 10239[K
Receiving objects: 100% (10239/10239), 10.50 MiB | 20.91 MiB/s, done.
Resolving deltas: 100% (7078/7078), done.


In [None]:
%cd yolov5/

/content/yolov5


In [None]:
!pwd

/content/yolov5


## Dataset Download

Here, we will download the Snowman dataset images and labels. The two folders that will be downloaded will contain all the images and labels that we need.

In [None]:
def download_file(url, save_name):
    url = url
    if not os.path.exists(save_name):
        file = requests.get(url)

        open(save_name, 'wb').write(file.content)
    
download_file('https://learnopencv.s3.us-west-2.amazonaws.com/snowman-dataset.zip', 'snowman_dataset.zip')

In [None]:
zip_file = 'snowman_dataset.zip'

if not os.path.exists('JPEGImages_and_Labels'):
    try:
        with zipfile.ZipFile(zip_file) as z:
            z.extractall("./")
            print("Extracted all")
    except:
        print("Invalid file")

Extracted all


## Prepare Final Dataset in YOLOv5 Format

In [None]:
# Create an `images` and a `labels` directory in `snowman_data`.
os.makedirs('snowman_data', exist_ok=True)
os.makedirs('snowman_data/labels', exist_ok=True)
os.makedirs('snowman_data/images', exist_ok=True)

In [None]:
# Create training and validation split directories.
os.makedirs('snowman_data/images/train', exist_ok=True)
os.makedirs('snowman_data/images/val', exist_ok=True)

os.makedirs('snowman_data/labels/train', exist_ok=True)
os.makedirs('snowman_data/labels/val', exist_ok=True)

### Train-Test Split

In [None]:
%%writefile splitTrainAndTest.py
import random
import os
import subprocess
import sys

def split_data_set(image_dir):

    f_val = open("snowman_test.txt", 'w')
    f_train = open("snowman_train.txt", 'w')
    
    path, dirs, files = next(os.walk(image_dir))
    data_size = len(files)

    ind = 0
    data_test_size = int(0.1 * data_size)
    test_array = random.sample(range(data_size), k=data_test_size)
    
    for f in os.listdir(image_dir):
        if(f.split(".")[1] == "jpg"):
            ind += 1
            
            if ind in test_array:
                f_val.write(image_dir+'/'+f+'\n')
            else:
                f_train.write(image_dir+'/'+f+'\n')


split_data_set(sys.argv[1])

Writing splitTrainAndTest.py


In [None]:
!python splitTrainAndTest.py JPEGImages_and_Labels/JPEGImages

### Put the Images and Labels in the Respective Directories

In [None]:
import shutil

# For training data.
def copy_image_and_txt_train(path_text_file):
    with open(path_text_file, 'r') as f:
        file_paths = f.readlines()
        file_paths = [file_path.split('.')[0].split('/')[-1] for file_path in file_paths]
        # Copy images
        for file_path in file_paths:
            shutil.copy(
                f"JPEGImages_and_Labels/JPEGImages/{file_path}.jpg", 
                f"snowman_data/images/train/{file_path}.jpg"
            )
        # Copy text
            shutil.copy(
                f"JPEGImages_and_Labels/labels/{file_path}.txt", 
                f"snowman_data/labels/train/{file_path}.txt"
            )

copy_image_and_txt_train('snowman_train.txt')

# For validation data.
def copy_image_and_txt_test(path_text_file):
    with open(path_text_file, 'r') as f:
        file_paths = f.readlines()
        file_paths = [file_path.split('.')[0].split('/')[-1] for file_path in file_paths]
        # Copy images
        for file_path in file_paths:
            shutil.copy(
                f"JPEGImages_and_Labels/JPEGImages/{file_path}.jpg", 
                f"snowman_data/images/val/{file_path}.jpg"
            )
        # Copy text
            shutil.copy(
                f"JPEGImages_and_Labels/labels/{file_path}.txt", 
                f"snowman_data/labels/val/{file_path}.txt"
            )

copy_image_and_txt_test('snowman_test.txt')

### Prepare the YAML File Containing the Paths for YOLOv5 Training

In [None]:
!ls data

Argoverse.yaml	GlobalWheat2020.yaml  Objects365.yaml  VisDrone.yaml
coco128.yaml	hyps		      scripts	       VOC.yaml
coco.yaml	images		      SKU-110K.yaml    xView.yaml


In [None]:
%%writefile data/snowman.yaml

path: snowman_data # dataset root dir
train: images/train  # train images (relative to 'path') 
val: images/val  # val images (relative to 'path')
test:  # test images (optional)

# Classes
nc: 1  # number of classes
names: ['snowman']  # class names

Writing data/snowman.yaml


In [None]:
!ls data

Argoverse.yaml	      hyps	       SKU-110K.yaml  xView.yaml
coco128.yaml	      images	       snowman.yaml
coco.yaml	      Objects365.yaml  VisDrone.yaml
GlobalWheat2020.yaml  scripts	       VOC.yaml


## Start the Training

In [None]:
#### CHOOSE BETWEEN Nano, Small, Regular, Large, and Xtra large Models.
!ls models/*.yaml

models/yolov5l.yaml  models/yolov5n.yaml  models/yolov5x.yaml
models/yolov5m.yaml  models/yolov5s.yaml


In [None]:
# Directory to store results
res_dir_count = len(glob.glob('runs/train/*'))
print(f"Current number of result directories: {res_dir_count}")

Current number of result directories: 0


In [None]:
if TRAIN:
    RES_DIR = f"results{res_dir_count+1}"
    print(RES_DIR)
else:
    RES_DIR = f"results{res_dir_count}"

results1


In [None]:
!pip install -r requirements.txt

Collecting PyYAML>=5.3.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 596 kB 5.3 MB/s 
Collecting thop
  Downloading thop-0.0.31.post2005241907-py3-none-any.whl (8.7 kB)
Installing collected packages: thop, PyYAML
  Attempting uninstall: PyYAML
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed PyYAML-6.0 thop-0.0.31.post2005241907


In [None]:
### TRAINING A Small MODEL ###
# The chosen pretrained model will be downloaded automatically.
if TRAIN:
    !python train.py --img 640 --batch 16 --epochs {EPOCHS} --data snowman.yaml --weights yolov5s.pt --name {RES_DIR}

Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=snowman.yaml, hyp=data/hyps/hyp.scratch.yaml, epochs=5, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, name=results1, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=0, save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 âœ…
YOLOv5 ðŸš€ v6.0-151-gabbdd48 torch 1.10.0+cu111 CUDA:0 (Tesla K80, 11441MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.1, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw

## Check Out the Validation Predictions Saved During Training

In [None]:
!ls runs/train/{RES_DIR}

confusion_matrix.png				   results.csv
events.out.tfevents.1639890713.0a0fe6886d38.152.0  results.png
F1_curve.png					   train_batch0.jpg
hyp.yaml					   train_batch1.jpg
labels_correlogram.jpg				   train_batch2.jpg
labels.jpg					   val_batch0_labels.jpg
opt.yaml					   val_batch0_pred.jpg
P_curve.png					   val_batch1_labels.jpg
PR_curve.png					   val_batch1_pred.jpg
R_curve.png					   weights


**Each experiment will be stores in `results<num>` directory. The most recent experiment is stored in `results<highest_number>` folder. For example, `results1`, `results2`, `results3`, and so on**. 

In [None]:
!ls runs/train/{RES_DIR}

confusion_matrix.png				   results.csv
events.out.tfevents.1639890713.0a0fe6886d38.152.0  results.png
F1_curve.png					   train_batch0.jpg
hyp.yaml					   train_batch1.jpg
labels_correlogram.jpg				   train_batch2.jpg
labels.jpg					   val_batch0_labels.jpg
opt.yaml					   val_batch0_pred.jpg
P_curve.png					   val_batch1_labels.jpg
PR_curve.png					   val_batch1_pred.jpg
R_curve.png					   weights


In [None]:
EXP_PATH = f"runs/train/{RES_DIR}"
validation_pred_images = glob.glob(f"{EXP_PATH}/*_pred.jpg")
print(validation_pred_images)

['runs/train/results1/val_batch1_pred.jpg', 'runs/train/results1/val_batch0_pred.jpg']


In [None]:
for pred_image in validation_pred_images:
    image = cv2.imread(pred_image)
    plt.figure(figsize=(19, 16))
    plt.imshow(image[:, :, ::-1])
    plt.axis('off')
    plt.show()

Output hidden; open in https://colab.research.google.com to view.

## Inference
In this section, we will carry out inference on unseen images and videos from the internet. 

The images for inference are in the `inference_images` directory.

The videos for inference are in the `inference_videos` directory.

### Download the Images and Videos
Let's download the images and videos that we will carry inference upon.

In [None]:
os.makedirs('inference_images', exist_ok=True)
os.makedirs('inference_videos', exist_ok=True)

In [None]:
download_file('https://pixabay.com/get/g56ba59637d3bbfcf738798be157376e22985aa2233f2b67b22f7909190729874e797ade2a566ba708b812db9c3e0cfdbbceba1ce77c779f829e798695883ed91e26a338d7ad478074ae5c8878d5cb331_1920.jpg'
              , 'inference_images/images_1.jpg')
download_file('https://pixabay.com/get/g200d6c9606544f56332644d0b5eeff69bb601f222ba4ac4e120da6f827cd5d0470aa04cad84e704c4ac71427732cb9616d418a18fb704445027ac2caef968101f156fe1d6079b0292133def33cc910e5_1920.jpg'
              , 'inference_images/images_2.jpg')

download_file('https://vod-progressive.akamaized.net/exp=1639902174~acl=%2Fvimeo-prod-skyfire-std-us%2F01%2F152%2F20%2F500762856%2F2280358008.mp4~hmac=c4a268ba04e78571201a8f605985e5afe7143b48b51ec65699a0d839cabb21f3/vimeo-prod-skyfire-std-us/01/152/20/500762856/2280358008.mp4?filename=Snowman+-+55410.mp4'
              , 'inference_videos/video_1.mp4')

### Inference on Images

In [None]:
# Directory to store inference results
infer_dir_count = len(glob.glob('runs/detect/*'))
print(f"Current number of inference detection directories: {infer_dir_count}")
INFER_DIR = f"inference{infer_dir_count+1}"
print(INFER_DIR)

Current number of inference detection directories: 0
inference1


**For inference on images, we can just provide the directory path where all the images are stored, and inference will happen on all images automatically**.

In [None]:
# Inference on images.
!python detect.py --weights runs/train/{RES_DIR}/weights/best.pt \
--source inference_images --name {INFER_DIR}

[34m[1mdetect: [0mweights=['runs/train/results1/weights/best.pt'], source=inference_images, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=inference1, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 ðŸš€ v6.0-151-gabbdd48 torch 1.10.0+cu111 CUDA:0 (Tesla K80, 11441MiB)

Fusing layers... 
Model Summary: 213 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
image 1/2 /content/yolov5/inference_images/images_1.jpg: 384x640 1 snowman, Done. (0.022s)
image 2/2 /content/yolov5/inference_images/images_2.jpg: 352x640 4 snowmans, Done. (0.020s)
Speed: 0.4ms pre-process, 21.0ms inference, 3.2ms NMS per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/inference1[0m


In [None]:
# Visualize infernece images.
INFER_PATH = f"runs/detect/{INFER_DIR}"
infer_images = glob.glob(f"{INFER_PATH}/*.jpg")
print(infer_images)

['runs/detect/inference1/images_1.jpg', 'runs/detect/inference1/images_2.jpg']


In [None]:
for pred_image in infer_images:
    image = cv2.imread(pred_image)
    plt.figure(figsize=(19, 16))
    plt.imshow(image[:, :, ::-1])
    plt.axis('off')
    plt.show()

Output hidden; open in https://colab.research.google.com to view.

### Inference on Videos

In [None]:
# Directory to store inference results
infer_dir_count = len(glob.glob('runs/detect/*'))
print(f"Current number of inference detection directories: {infer_dir_count}")
INFER_DIR = f"inference{infer_dir_count+1}"
print(INFER_DIR)

Current number of inference detection directories: 1
inference2


In [None]:
# Inference on images.
!python detect.py --weights runs/train/{RES_DIR}/weights/best.pt \
--source inference_videos --name {INFER_DIR}

[34m[1mdetect: [0mweights=['runs/train/results1/weights/best.pt'], source=inference_videos, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=inference2, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 ðŸš€ v6.0-151-gabbdd48 torch 1.10.0+cu111 CUDA:0 (Tesla K80, 11441MiB)

Fusing layers... 
Model Summary: 213 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
video 1/1 (1/300) /content/yolov5/inference_videos/video_1.mp4: 384x640 2 snowmans, Done. (0.020s)
video 1/1 (2/300) /content/yolov5/inference_videos/video_1.mp4: 384x640 1 snowman, Done. (0.018s)
video 1/1 (3/300) /content/yolov5/inference_videos/video_1.mp4: 384x640 2 snowmans, Done. (0.018s)
video 1/1 (4/300) /content/yolov5/inference_videos/video_1.mp4: 384x640 1 sn