TensorFlow Lite Object Detection#
This document describes how to set up and run an object detection model using TensorFlow Lite on the BeagleY-AI platform. Below is a demonstration.
To run the object detection model on the BeagleY-AI, you will need the following:
BeagleY-AI Board: Make sure to refer to the BeagleY-AI standalone connection for proper setup.
USB Webcam: The model has been tested with the Logitech Webcam C270, but it should work well with other webcam too.
Active Internet Connection: Necessary for the installation of modules. Please check the WiFi connection guide for setting up the network.
Step 1: Installation of Conda#
In this step, we will install a lightweight version of Conda.
wget https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Mambaforge-24.3.0-0-Linux-aarch64.sh
bash Mambaforge-24.3.0-0-Linux-aarch64.sh
After accepting the license terms you can verify the installation by
conda --version
Step 2: Create Virtual Environment#
Create a virtual environment with Python 3.9.
conda create --name myenv python=3.9
Step 3: Activate the Virtual Environment#
Activate the virtual environment created in the previous step.
conda activate myenv
Step 4: Install Necessary Modules#
Install the required Python modules.
pip install https://github.com/google-coral/pycoral/releases/download/v2.0.0/tflite_runtime-2.5.0.post1-cp39-cp39-linux_aarch64.whl
pip install numpy==1.26.4
pip install opencv-python
Step 5: Load Necessary Pretrained Models#
Create a directory for the object recognition models and download a pretrained model.
mkdir object-recognition
cd object-recognition
wget https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip
unzip coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip -d TFLite_model
Tip
You can train your own model using TensorFlow Lite.Here are some resources
Step 6: Connect Your USB Webcam#
Connect your USB webcam via a USB socket.
ls -l /dev | grep video
Note
Check the video driver with the above command. Here its 3 in my case.
Step 7: Create the Code File#
Create a Python file for running object detection.
nano object-detection.py
Paste the following code into the file:
import os
import argparse
import cv2
import numpy as np
import time
from threading import Thread
import importlib.util
from typing import List
import sys
from tflite_runtime.interpreter import Interpreter, load_delegate
video_driver_id = 3
class VideoStream:
"""Handles video streaming from the webcam."""
def __init__(self, resolution=(640, 480), framerate=30):
self.stream = cv2.VideoCapture(video_driver_id)
self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
self.stream.set(3, resolution[0])
self.stream.set(4, resolution[1])
self.grabbed, self.frame = self.stream.read()
self.stopped = False
def start(self):
"""Starts the thread that reads frames from the video stream."""
Thread(target=self.update, args=()).start()
return self
def update(self):
"""Continuously updates the frame from the video stream."""
while True:
if self.stopped:
self.stream.release()
return
self.grabbed, self.frame = self.stream.read()
def read(self):
"""Returns the most recent frame."""
return self.frame
def stop(self):
"""Stops the video stream and closes resources."""
self.stopped = True
def load_labels(labelmap_path: str) -> List[str]:
"""Loads labels from a label map file."""
try:
with open(labelmap_path, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
labels.pop(0)
return labels
except IOError as e:
print(f"Error reading label map file: {e}")
sys.exit()
def main():
# Argument parsing
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', required=True, help='Folder the .tflite file is located in')
parser.add_argument('--graph', default='detect.tflite', help='Name of the .tflite file')
parser.add_argument('--labels', default='labelmap.txt', help='Name of the labelmap file')
parser.add_argument('--threshold', default='0.5', help='Minimum confidence threshold')
parser.add_argument('--resolution', default='1280x720', help='Desired webcam resolution')
args = parser.parse_args()
# Configuration
model_path = os.path.join(os.getcwd(), args.modeldir, args.graph)
labelmap_path = os.path.join(os.getcwd(), args.modeldir, args.labels)
min_conf_threshold = float(args.threshold)
resW, resH = map(int, args.resolution.split('x'))
# Load labels and interpreter
labels = load_labels(labelmap_path)
interpreter = Interpreter(model_path=model_path)
interpreter.allocate_tensors()
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height, width = input_details[0]['shape'][1:3]
floating_model = (input_details[0]['dtype'] == np.float32)
outname = output_details[0]['name']
boxes_idx, classes_idx, scores_idx = (1, 3, 0) if 'StatefulPartitionedCall' in outname else (0, 1, 2)
# Initialize video stream
videostream = VideoStream(resolution=(resW, resH), framerate=30).start()
time.sleep(1)
frame_rate_calc = 1
freq = cv2.getTickFrequency()
while True:
t1 = cv2.getTickCount()
frame = videostream.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - 127.5) / 127.5
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0]
classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0]
scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0]
for i in range(len(scores)):
if min_conf_threshold < scores[i] <= 1.0:
ymin, xmin, ymax, xmax = [int(coord) for coord in (boxes[i] * [resH, resW, resH, resW])]
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])]
label = f'{object_name}: {int(scores[i] * 100)}%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
label_ymin = max(ymin, labelSize[1] + 10)
cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED)
cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.putText(frame, f'FPS: {frame_rate_calc:.2f}', (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA)
cv2.imshow('Object detector', frame)
t2 = cv2.getTickCount()
time1 = (t2 - t1) / freq
frame_rate_calc = 1 / time1
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()
videostream.stop()
if __name__ == "__main__":
main()
Note
Make sure to change your video driver ID depending on your video driver. Here, the video driver ID is set to 3.
Step 8: Run the Object Detection Script#
To run the object detection script, use the following command. Replace TFLite_model with the path to your model directory if it differs:
python3 object_detection.py --modeldir=TFLite_model
A window will open, displaying the object detection model in action.