ESP32-Cam Video Support

ESP32-Cam Video Support

This post will cover the ESP32 Cam module’s support for video capture.  The module comes with a microSD slot and Omnivision camera interface.  Earlier examples demonstrated time lapse video capture, but this post will focus more on real time video.

This topic was found accidentally while researching I2S audio support.  This Github repo by James Zahary, with contribution from Alexey Khachatryan, provided a realtime video capture process, https://github.com/jameszah/ESP32-CAM-Video-Recorder-junior.  Unlike the more common triggered still or time lapse image captures demonstrated online, this method claimed native video with the ESP32 Cam module.  Below is a video demonstrating the method.

There are several dependencies of the repo that are not needed if the sole purpose is to capture video to the microSD card.  In attempts to use the repo, the IDE would fail to compile the code due to these dependencies.  Reverse engineering the code would be a labor intensive process.  There were other repos, such as https://github.com/s60sc/ESP32-CAM_MJPEG2SD by s60sc.  However, the extra features were beyond the scope of simply capturing video.  So the question of capturing video to the ESP32 Cam module was posed to AI.  There was back and forth with the prompts as to the limitations of the module, what the goal expectations were , and what it reasonably supported.  Ultimately, the following code was generated.

/*

ESP32-CAM_Video_ver2c

This program captures video on the ESP32 Cam module at its maximum FPS
Debugging and log files have been removed to optimize FPS rates
Images are captured with millis values for rate references
Images are batched into 1 minute folders

FPS drift follows this formula:
for x in milliseconds
y = 36.54 (x^-0.5) in FPS

The code was developed using ChatGPT

Edited by Patrick Gilfeather May 20, 2025

The is Arduino code, with standard setup for ESP32-CAM
- Board AI Thinker ESP32-CAM
- Partition Scheme Huge APP (3MB No OTA)

Hardware used
- Espressif ESP32-Cam module
- Omnivision DCX-OV-5640-E 5MP 180 degree camera module
- SanDisk Ultra 64GB microSD class 10 format FAT (32-bit version)

*/

#include "esp_camera.h"
#include "FS.h"
#include "SD_MMC.h"

#define PWDN_GPIO_NUM 32
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 0
#define SIOD_GPIO_NUM 26
#define SIOC_GPIO_NUM 27
#define Y9_GPIO_NUM 35
#define Y8_GPIO_NUM 34
#define Y7_GPIO_NUM 39
#define Y6_GPIO_NUM 36
#define Y5_GPIO_NUM 21
#define Y4_GPIO_NUM 19
#define Y3_GPIO_NUM 18
#define Y2_GPIO_NUM 5
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 23
#define PCLK_GPIO_NUM 22

#define MAX_FRAMES 1000000
#define CAPTURE_WINDOW_MS 300000 // Optional: 5 minutes capture window

unsigned long millisOffset = 0; // Adjusted millis() to avoid overlaps

unsigned long findMaxTimestamp() {
unsigned long maxTimestamp = 0;

File root = SD_MMC.open("/");
if (!root || !root.isDirectory()) return 0;

File folder = root.openNextFile();
while (folder) {
if (folder.isDirectory() && String(folder.name()).startsWith("/min_")) {
File file = folder.openNextFile();
while (file) {
String fname = String(file.name());
int idx1 = fname.indexOf("img_");
int idx2 = fname.indexOf(".jpg");
if (idx1 >= 0 && idx2 > idx1) {
String tsStr = fname.substring(idx1 + 4, idx2);
unsigned long ts = tsStr.toInt();
if (ts > maxTimestamp) maxTimestamp = ts;
}
file.close();
file = folder.openNextFile();
}
}
folder.close();
folder = root.openNextFile();
}
return maxTimestamp;
}

void setup() {
camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sscb_sda = SIOD_GPIO_NUM;
config.pin_sscb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 20000000;
config.pixel_format = PIXFORMAT_JPEG;
config.frame_size = FRAMESIZE_QVGA;
config.jpeg_quality = 12;
config.fb_count = 2;

esp_camera_init(&config);
SD_MMC.begin();

// Scan SD card and find the highest timestamp to continue from
unsigned long maxTS = findMaxTimestamp();
millisOffset = maxTS + 1; // Start after the latest found timestamp
}

void loop() {
static unsigned long frameCount = 0;
if (frameCount >= MAX_FRAMES) return;

unsigned long now = millisOffset + millis();
unsigned long minuteIndex = now / 60000; // 60000 ms = 1 minute
char folderPath[32];
sprintf(folderPath, "/min_%03lu", minuteIndex);

if (!SD_MMC.exists(folderPath)) {
SD_MMC.mkdir(folderPath);
}

camera_fb_t * fb = esp_camera_fb_get();
if (!fb) return;

char filename[40];
sprintf(filename, "img_%08lu.jpg", now);
char fullPath[64];
sprintf(fullPath, "%s/%s", folderPath, filename);

File file = SD_MMC.open(fullPath, FILE_WRITE);
if (file) {
size_t imgSize = fb->len;
const size_t bufferSize = 512;
uint8_t buffer[bufferSize];

for (size_t written = 0; written < imgSize; written += bufferSize) {
size_t chunkSize = (imgSize - written > bufferSize) ? bufferSize : (imgSize - written);
memset(buffer, 0, chunkSize);
file.write(buffer, chunkSize);
}

file.seek(0);
file.write(fb->buf, fb->len);
file.close();
}

esp_camera_fb_return(fb);
frameCount++;
}

The module captures images and writes them to the microSD media as fast as possible.  This isn’t native video capture, rather a series of images captured in rapid succession that are later assembled into a video with FFMpeg.  Images are saved in separate folders for each minute of capture.  The following bash script was used to create the video file.

#!/bin/bash
# make_video.sh

# Directory containing image frames
SEQUENCE="000" # <-- Change to your desired folder
INPUT_DIR="./min_${SEQUENCE}"
OUTPUT_VIDEO="min_${SEQUENCE}.mp4"
FRAMERATE=7.5 # <-- Set desired FPS for the output video

# Check if the input directory exists
if [ ! -d "$INPUT_DIR" ]; then
echo "Error: Directory $INPUT_DIR not found."
exit 1
fi

# Create video from ordered image sequence
ffmpeg -framerate $FRAMERATE -pattern_type glob -i "${INPUT_DIR}/img_*.jpg" \
-c:v libx264 -pix_fmt yuv420p -crf 23 -preset medium "$OUTPUT_VIDEO"

echo "Video saved to $OUTPUT_VIDEO"

Then I would combine all of the videos together with this bash script.

#!/bin/bash
# combine.sh

# Combine all min_*.mp4 into output.mp4
for f in min_*.mp4; do echo "file '$f'" >> filelist.txt; done
ffmpeg -f concat -safe 0 -i filelist.txt -c copy output.mp4
rm -f filelist.txt

One thing to note about the image sequence is the increase in time it takes for additional images to be written to the folder.  Below is a plot of the time drift of FPS (frames per second) over time.

The plot is based on any number of specific conditions, such as microSD media, camera module, video resolution, and jpeg quality.  The following video attempts to compensate for this drift by matching the duration of the video with the actual capture time.

The video has a slow motion appearance at the beginning and fast forward at the end, less than ideal.  Something to consider is clamping the FPS rate with shorter video segments to reduce the likelihood of noticeable drift.

I found another Github repo by Flávio Luiz Puhl Jr., that wrote a text overlay on video, https://github.com/flaviopuhl/ESP32-Cam-Text-overlay.  So I asked AI if pixel data could be written to the video buffer without any camera module present.  To my surprise it could.  The following code is a demonstration of how pixel graphics can be drawn to represent data, much like a HUD (Heads Up Display) shows telemetry information for drone FPV operators.

/*

ESP32-CAM_Video_ver12

This program captures data only to the video frame
The code was developed using ChatGPT

Edited by Patrick Gilfeather May 21, 2025

The is Arduino code, with standard setup for ESP32-CAM
- Board AI Thinker ESP32-CAM
- Partition Scheme Huge APP (3MB No OTA)

Hardware used
- Espressif ESP32-Cam module
- SanDisk Ultra 64GB microSD class 10 format FAT (32-bit version) 

Serial Debug enabled


*/

#include "FS.h"
#include "SD_MMC.h"
#include "esp_camera.h"
#include "img_converters.h"

#define FRAME_WIDTH 320
#define FRAME_HEIGHT 240
#define FRAME_INTERVAL_MS 500 // Frame delay

uint32_t frameCounter = 0;
unsigned long lastFrameTime = 0;

// ------------------- SETUP -------------------
void setup() {
Serial.begin(115200);
delay(1000);

if (!SD_MMC.begin()) {
Serial.println("SD Card Mount Failed");
return;
}

Serial.println("SD Card Initialized. Starting simulated animation...");
}

// ------------------- LOOP -------------------
void loop() {
if (millis() - lastFrameTime < FRAME_INTERVAL_MS) return;
lastFrameTime = millis();

uint16_t* buffer = createBlackFrameRGB565();
if (!buffer) return;

drawRotatingCross(buffer, FRAME_WIDTH, FRAME_HEIGHT);
drawMillisText(buffer, FRAME_WIDTH, FRAME_HEIGHT);

char filename[32];
sprintf(filename, "/frame_sim_%05lu.jpg", frameCounter++);
saveAsJPEG(buffer, FRAME_WIDTH, FRAME_HEIGHT, filename);

free(buffer);
Serial.printf("Saved: %s\n", filename);
}

// ------------------- FRAME CREATION -------------------
uint16_t* createBlackFrameRGB565() {
size_t bufferSize = FRAME_WIDTH * FRAME_HEIGHT * sizeof(uint16_t);
uint16_t* buffer = (uint16_t*)ps_malloc(bufferSize);
if (!buffer) {
Serial.println("Failed to allocate frame buffer");
return nullptr;
}
memset(buffer, 0x00, bufferSize);
return buffer;
}

// ------------------- OVERLAYS -------------------
void drawRotatingCross(uint16_t* buffer, int w, int h) {
int cx = w / 2;
int cy = h / 2;
int radius = 40;
uint16_t green = 0x07E0;

for (int angle = 0; angle < 360; angle++) {
float rad = angle * DEG_TO_RAD;
int x = cx + cos(rad) * radius;
int y = cy + sin(rad) * radius;
if (x >= 0 && y >= 0 && x < w && y < h)
buffer[y * w + x] = green;
}

float angle = (millis() % 60000) * 0.00010472;
int len = radius - 2;

int x1 = cx + cos(angle) * len;
int y1 = cy + sin(angle) * len;
int x2 = cx - cos(angle) * len;
int y2 = cy - sin(angle) * len;

int x3 = cx + cos(angle + PI / 2) * len;
int y3 = cy + sin(angle + PI / 2) * len;
int x4 = cx - cos(angle + PI / 2) * len;
int y4 = cy - sin(angle + PI / 2) * len;

drawLineRGB565(buffer, w, h, x1, y1, x2, y2, green);
drawLineRGB565(buffer, w, h, x3, y3, x4, y4, green);
}

void drawLineRGB565(uint16_t* pixels, int w, int h, int x0, int y0, int x1, int y1, uint16_t color) {
int dx = abs(x1 - x0), sx = x0 < x1 ? 1 : -1;
int dy = -abs(y1 - y0), sy = y0 < y1 ? 1 : -1;
int err = dx + dy, e2;
while (true) {
if (x0 >= 0 && x0 < w && y0 >= 0 && y0 < h)
pixels[y0 * w + x0] = color;
if (x0 == x1 && y0 == y1) break;
e2 = 2 * err;
if (e2 >= dy) { err += dy; x0 += sx; }
if (e2 <= dx) { err += dx; y0 += sy; }
}
}

// ------------------- TEXT -------------------

void drawChar(uint16_t* buf, int w, int h, int x, int y, char c, uint16_t color) {
static const uint8_t font5x7[][5] = {
// ASCII 32 to 126 (printable characters)
{0x00,0x00,0x00,0x00,0x00},{0x00,0x00,0x5F,0x00,0x00},{0x00,0x07,0x00,0x07,0x00},{0x14,0x7F,0x14,0x7F,0x14},
{0x24,0x2A,0x7F,0x2A,0x12},{0x23,0x13,0x08,0x64,0x62},{0x36,0x49,0x55,0x22,0x50},{0x00,0x05,0x03,0x00,0x00},
{0x00,0x1C,0x22,0x41,0x00},{0x00,0x41,0x22,0x1C,0x00},{0x14,0x08,0x3E,0x08,0x14},{0x08,0x08,0x3E,0x08,0x08},
{0x00,0x50,0x30,0x00,0x00},{0x08,0x08,0x08,0x08,0x08},{0x00,0x60,0x60,0x00,0x00},{0x20,0x10,0x08,0x04,0x02},
{0x3E,0x51,0x49,0x45,0x3E},{0x00,0x42,0x7F,0x40,0x00},{0x42,0x61,0x51,0x49,0x46},{0x21,0x41,0x45,0x4B,0x31},
{0x18,0x14,0x12,0x7F,0x10},{0x27,0x45,0x45,0x45,0x39},{0x3C,0x4A,0x49,0x49,0x30},{0x01,0x71,0x09,0x05,0x03},
{0x36,0x49,0x49,0x49,0x36},{0x06,0x49,0x49,0x29,0x1E},{0x00,0x36,0x36,0x00,0x00},{0x00,0x56,0x36,0x00,0x00},
{0x08,0x14,0x22,0x41,0x00},{0x14,0x14,0x14,0x14,0x14},{0x00,0x41,0x22,0x14,0x08},{0x02,0x01,0x51,0x09,0x06},
{0x32,0x49,0x79,0x41,0x3E},{0x7E,0x11,0x11,0x11,0x7E},{0x7F,0x49,0x49,0x49,0x36},{0x3E,0x41,0x41,0x41,0x22},
{0x7F,0x41,0x41,0x22,0x1C},{0x7F,0x49,0x49,0x49,0x41},{0x7F,0x09,0x09,0x09,0x01},{0x3E,0x41,0x49,0x49,0x7A},
{0x7F,0x08,0x08,0x08,0x7F},{0x00,0x41,0x7F,0x41,0x00},{0x20,0x40,0x41,0x3F,0x01},{0x7F,0x08,0x14,0x22,0x41},
{0x7F,0x40,0x40,0x40,0x40},{0x7F,0x02,0x0C,0x02,0x7F},{0x7F,0x04,0x08,0x10,0x7F},{0x3E,0x41,0x41,0x41,0x3E},
{0x7F,0x09,0x09,0x09,0x06},{0x3E,0x41,0x51,0x21,0x5E},{0x7F,0x09,0x19,0x29,0x46},{0x46,0x49,0x49,0x49,0x31},
{0x01,0x01,0x7F,0x01,0x01},{0x3F,0x40,0x40,0x40,0x3F},{0x1F,0x20,0x40,0x20,0x1F},{0x3F,0x40,0x38,0x40,0x3F},
{0x63,0x14,0x08,0x14,0x63},{0x07,0x08,0x70,0x08,0x07},{0x61,0x51,0x49,0x45,0x43},{0x00,0x7F,0x41,0x41,0x00},
{0x02,0x04,0x08,0x10,0x20},{0x00,0x41,0x41,0x7F,0x00},{0x04,0x02,0x01,0x02,0x04},{0x40,0x40,0x40,0x40,0x40},
{0x00,0x01,0x02,0x04,0x00},{0x20,0x54,0x54,0x54,0x78},{0x7F,0x48,0x44,0x44,0x38},{0x38,0x44,0x44,0x44,0x20},
{0x38,0x44,0x44,0x48,0x7F},{0x38,0x54,0x54,0x54,0x18},{0x08,0x7E,0x09,0x01,0x02},{0x0C,0x52,0x52,0x52,0x3E},
{0x7F,0x08,0x04,0x04,0x78},{0x00,0x44,0x7D,0x40,0x00},{0x20,0x40,0x44,0x3D,0x00},{0x7F,0x10,0x28,0x44,0x00},
{0x00,0x41,0x7F,0x40,0x00},{0x7C,0x04,0x18,0x04,0x78},{0x7C,0x08,0x04,0x04,0x78},{0x38,0x44,0x44,0x44,0x38},
{0x7C,0x14,0x14,0x14,0x08},{0x08,0x14,0x14,0x18,0x7C},{0x7C,0x08,0x04,0x04,0x08},{0x48,0x54,0x54,0x54,0x20},
{0x04,0x3F,0x44,0x40,0x20},{0x3C,0x40,0x40,0x20,0x7C},{0x1C,0x20,0x40,0x20,0x1C},{0x3C,0x40,0x30,0x40,0x3C},
{0x44,0x28,0x10,0x28,0x44},{0x0C,0x50,0x50,0x50,0x3C},{0x44,0x64,0x54,0x4C,0x44},

// Extra symbols: degree (°), micro (µ), plus-minus (±)
{0x06,0x09,0x09,0x06,0x00}, // ° (176)
{0x38,0x44,0x44,0x3C,0x40}, // µ (181)
{0x10,0x54,0x7C,0x54,0x10} // ± (177)
};

const uint8_t* chr = nullptr;
if (c >= 32 && c <= 126)
chr = font5x7[c - 32];
else if (c == 176) // °
chr = font5x7[95];
else if (c == 181) // µ
chr = font5x7[96];
else if (c == 177) // ±
chr = font5x7[97];
else
return;

for (int col = 0; col < 5; col++) {
uint8_t line = chr[col];
for (int row = 0; row < 7; row++) {
if (line & (1 << row)) {
int px = x + col;
int py = y + row;
if (px >= 0 && px < w && py >= 0 && py < h)
buf[py * w + px] = color;
}
}
}
}


void drawText(uint16_t* buf, int w, int h, int x, int y, const char* text, uint16_t color) {
while (*text) {
drawChar(buf, w, h, x, y, *text++, color);
x += 6;
}
}

void drawMillisText(uint16_t* buf, int w, int h) {
char label[32];
sprintf(label, "Time: %lu", millis());
int len = strlen(label);
drawText(buf, w, h, w - len * 6 - 5, 5, label, 0x07E0); // Green
}


// ------------------- JPEG SAVE -------------------
void saveAsJPEG(uint16_t* rgb565_buf, int w, int h, const char *path) {
File file = SD_MMC.open(path, FILE_WRITE);
if (!file) {
Serial.println("Failed to open file");
return;
}

// Convert RGB565 to RGB888
uint8_t *rgb888_buf = (uint8_t*)ps_malloc(w * h * 3);
if (!rgb888_buf) {
Serial.println("RGB888 alloc failed");
return;
}

for (int i = 0; i < w * h; i++) {
uint16_t pixel = rgb565_buf[i];
uint8_t r = ((pixel >> 11) & 0x1F) * 255 / 31;
uint8_t g = ((pixel >> 5) & 0x3F) * 255 / 63;
uint8_t b = (pixel & 0x1F) * 255 / 31;

rgb888_buf[i * 3 + 0] = r;
rgb888_buf[i * 3 + 1] = g;
rgb888_buf[i * 3 + 2] = b;
}

uint8_t *jpg_buf = NULL;
size_t jpg_len = 0;

if (!fmt2jpg(rgb888_buf, w * h * 3, w, h, PIXFORMAT_RGB888, 90, &jpg_buf, &jpg_len)) {
Serial.println("JPEG encode failed");
free(rgb888_buf);
file.close();
return;
}

file.write(jpg_buf, jpg_len);
file.close();
free(jpg_buf);
free(rgb888_buf);
}

The video was generated from the images captured using the following bash script.

#!/bin/bash
# make_video.sh

# Directory containing image frames
INPUT_DIR="./microSD" # <-- Change to your desired folder
OUTPUT_VIDEO="ESP32-Cam Telemetry Demo.mp4"
FRAMERATE=5 # Set desired FPS for the output video

# Check if the input directory exists
if [ ! -d "$INPUT_DIR" ]; then
echo "Error: Directory $INPUT_DIR not found."
exit 1
fi

# Create video from ordered image sequence
ffmpeg -framerate $FRAMERATE -pattern_type glob -i "${INPUT_DIR}/frame_sim_*.jpg" \
-c:v libx264 -pix_fmt yuv420p -crf 23 -preset medium "$OUTPUT_VIDEO"

echo "Video saved to $OUTPUT_VIDEO"

Here is the demo video of the telemetry overlay.

This is likely going to be a topic covered in future posts.  Hopefully the experience won’t be like ordering a side of wheat toast.

Comments are closed.