Add motion tracking functionality to VideoEditor

This commit introduces a new MotionTracker class for handling motion tracking during video editing. The VideoEditor class has been updated to integrate motion tracking features, including adding and removing tracking points, interpolating positions, and applying tracking offsets during cropping. The user can toggle motion tracking and clear tracking points via keyboard shortcuts. Additionally, the state management has been enhanced to save and load motion tracking data, improving the overall editing experience.
This commit is contained in:
2025-09-16 19:56:58 +02:00
parent 66b23834fd
commit 85891a5f99
2 changed files with 406 additions and 3 deletions

View File

@@ -4,7 +4,7 @@ import cv2
import argparse
import numpy as np
from pathlib import Path
from typing import List
from typing import List, Tuple
import time
import re
import json
@@ -12,6 +12,7 @@ import threading
import queue
import subprocess
import ctypes
from tracking import MotionTracker
class Cv2BufferedCap:
"""Buffered wrapper around cv2.VideoCapture that handles frame loading, seeking, and caching correctly"""
@@ -596,6 +597,11 @@ class VideoEditor:
self.display_needs_update = True
self.last_display_state = None
# Motion tracking
self.motion_tracker = MotionTracker()
self.tracking_point_radius = 10 # Radius of tracking point circles
self.tracking_point_distance = 50 # Max distance to consider for removing points
# Cached transformations for performance
self.cached_transformed_frame = None
self.cached_frame_number = None
@@ -628,6 +634,9 @@ class VideoEditor:
return False
try:
# Get tracking data
tracking_data = self.motion_tracker.to_dict()
state = {
'timestamp': time.time(),
'current_frame': getattr(self, 'current_frame', 0),
@@ -643,7 +652,8 @@ class VideoEditor:
'display_offset': self.display_offset,
'playback_speed': getattr(self, 'playback_speed', 1.0),
'seek_multiplier': getattr(self, 'seek_multiplier', 1.0),
'is_playing': getattr(self, 'is_playing', False)
'is_playing': getattr(self, 'is_playing', False),
'motion_tracking': tracking_data # Add tracking data
}
with open(state_file, 'w') as f:
@@ -719,6 +729,11 @@ class VideoEditor:
if 'is_playing' in state:
self.is_playing = state['is_playing']
print(f"Loaded is_playing: {self.is_playing}")
# Load motion tracking data if available
if 'motion_tracking' in state:
self.motion_tracker.from_dict(state['motion_tracking'])
print(f"Loaded motion tracking data: {len(self.motion_tracker.tracking_points)} keyframes")
# Validate cut markers against current video length
if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
@@ -1064,6 +1079,11 @@ class VideoEditor:
if frame is None:
return None
# Get tracking offset for crop following if motion tracking is enabled
tracking_offset = (0, 0)
if self.motion_tracker.tracking_enabled:
tracking_offset = self.motion_tracker.get_tracking_offset(self.current_frame)
# Create a hash of the transformation parameters for caching
transform_hash = hash((
self.crop_rect,
@@ -1071,7 +1091,9 @@ class VideoEditor:
self.rotation_angle,
self.brightness,
self.contrast,
tuple(self.display_offset)
tuple(self.display_offset),
tracking_offset, # Include tracking offset in hash
self.motion_tracker.tracking_enabled # Include tracking state in hash
))
# Check if we can use cached transformation during auto-repeat seeking
@@ -1090,6 +1112,13 @@ class VideoEditor:
# Apply crop
if self.crop_rect:
x, y, w, h = self.crop_rect
# Apply tracking offset to crop position if motion tracking is enabled
if self.motion_tracker.tracking_enabled:
tracking_offset = self.motion_tracker.get_tracking_offset(self.current_frame)
x += int(tracking_offset[0])
y += int(tracking_offset[1])
x, y, w, h = int(x), int(y), int(w), int(h)
# Ensure crop is within frame bounds
x = max(0, min(x, processed_frame.shape[1] - 1))
@@ -1134,6 +1163,109 @@ class VideoEditor:
self.cached_transformed_frame = None
self.cached_frame_number = None
self.cached_transform_hash = None
def transform_point(self, point: Tuple[float, float]) -> Tuple[float, float]:
"""Transform a point from original frame coordinates to display coordinates
This applies the same transformations that are applied to frames:
1. Crop
2. Rotation
3. Zoom
"""
if point is None:
return None
x, y = point
# Step 1: Apply crop (adjust point relative to crop origin)
if self.crop_rect:
crop_x, crop_y, _, _ = self.crop_rect
x -= crop_x
y -= crop_y
# Step 2: Apply rotation
if self.rotation_angle != 0:
# Get dimensions after crop
if self.crop_rect:
crop_w, crop_h = self.crop_rect[2], self.crop_rect[3]
else:
if self.current_display_frame is not None:
crop_h, crop_w = self.current_display_frame.shape[:2]
else:
return None
# Apply rotation to coordinates
if self.rotation_angle == 90:
# 90° clockwise: (x,y) -> (y, width-x)
new_x = y
new_y = crop_w - x
x, y = new_x, new_y
elif self.rotation_angle == 180:
# 180° rotation: (x,y) -> (width-x, height-y)
x = crop_w - x
y = crop_h - y
elif self.rotation_angle == 270:
# 270° clockwise: (x,y) -> (height-y, x)
new_x = crop_h - y
new_y = x
x, y = new_x, new_y
# Step 3: Apply zoom
if self.zoom_factor != 1.0:
x *= self.zoom_factor
y *= self.zoom_factor
return (x, y)
def untransform_point(self, point: Tuple[float, float]) -> Tuple[float, float]:
"""Transform a point from display coordinates back to original frame coordinates
This reverses the transformations in the opposite order:
1. Reverse zoom
2. Reverse rotation
3. Reverse crop
"""
if point is None or self.current_display_frame is None:
return None
x, y = point
# Step 1: Reverse zoom
if self.zoom_factor != 1.0:
x /= self.zoom_factor
y /= self.zoom_factor
# Step 2: Reverse rotation
if self.rotation_angle != 0:
# Get dimensions after crop but before rotation
if self.crop_rect:
crop_w, crop_h = self.crop_rect[2], self.crop_rect[3]
else:
crop_h, crop_w = self.current_display_frame.shape[:2]
# Apply inverse rotation to coordinates
if self.rotation_angle == 90:
# Reverse 90° clockwise: (x,y) -> (width-y, x)
new_x = crop_w - y
new_y = x
x, y = new_x, new_y
elif self.rotation_angle == 180:
# Reverse 180° rotation: (x,y) -> (width-x, height-y)
x = crop_w - x
y = crop_h - y
elif self.rotation_angle == 270:
# Reverse 270° clockwise: (x,y) -> (y, height-x)
new_x = y
new_y = crop_h - x
x, y = new_x, new_y
# Step 3: Reverse crop (add crop offset)
if self.crop_rect:
crop_x, crop_y, _, _ = self.crop_rect
x += crop_x
y += crop_y
return (x, y)
def apply_rotation(self, frame):
@@ -1294,6 +1426,64 @@ class VideoEditor:
# Keep project view open but switch focus to video editor
# Don't destroy the project view window - just let the user switch between them
def draw_tracking_points(self, canvas, offset_x, offset_y, scale):
"""Draw tracking points and computed tracking position on the canvas
Args:
canvas: The canvas to draw on
offset_x: X offset of the frame on the canvas
offset_y: Y offset of the frame on the canvas
scale: Scale factor of the frame on the canvas
"""
if self.current_frame is None:
return
# Draw tracking points for the current frame (green circles with white border)
tracking_points = self.motion_tracker.get_tracking_points_for_frame(self.current_frame)
for point in tracking_points:
# Transform point from original frame coordinates to display coordinates
display_point = self.transform_point(point)
if display_point:
# Scale and offset the point to match the canvas
x = int(offset_x + display_point[0] * scale)
y = int(offset_y + display_point[1] * scale)
# Draw white border
cv2.circle(canvas, (x, y), self.tracking_point_radius + 2, (255, 255, 255), 2)
# Draw green circle
cv2.circle(canvas, (x, y), self.tracking_point_radius, (0, 255, 0), -1)
# Draw computed tracking position (blue cross) if tracking is enabled
if self.motion_tracker.tracking_enabled:
interpolated_pos = self.motion_tracker.get_interpolated_position(self.current_frame)
if interpolated_pos:
# Transform point from original frame coordinates to display coordinates
display_point = self.transform_point(interpolated_pos)
if display_point:
# Scale and offset the point to match the canvas
x = int(offset_x + display_point[0] * scale)
y = int(offset_y + display_point[1] * scale)
# Draw blue cross
cross_size = 10
cv2.line(canvas, (x - cross_size, y), (x + cross_size, y), (255, 0, 0), 2)
cv2.line(canvas, (x, y - cross_size), (x, y + cross_size), (255, 0, 0), 2)
# Add tracking status to the info overlay if tracking is enabled or points exist
if self.motion_tracker.tracking_enabled or self.motion_tracker.has_tracking_points():
point_count = sum(len(points) for points in self.motion_tracker.tracking_points.values())
status_text = f"Motion: {'ON' if self.motion_tracker.tracking_enabled else 'OFF'} ({point_count} pts)"
# Calculate position for the text (bottom right corner)
text_x = self.window_width - 250
text_y = self.window_height - (self.TIMELINE_HEIGHT if not self.is_image_mode else 30)
# Draw text with shadow
cv2.putText(canvas, status_text, (text_x + 2, text_y + 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.putText(canvas, status_text, (text_x, text_y),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 1)
def draw_feedback_message(self, frame):
"""Draw feedback message on frame if visible"""
if not self.feedback_message or not self.feedback_message_time:
@@ -1757,6 +1947,9 @@ class VideoEditor:
# Draw timeline
self.draw_timeline(canvas)
# Draw tracking points and tracking position
self.draw_tracking_points(canvas, start_x, start_y, scale)
# Draw progress bar (if visible)
self.draw_progress_bar(canvas)
@@ -1808,6 +2001,32 @@ class VideoEditor:
self.crop_start_point = None
self.crop_preview_rect = None
# Handle tracking points (Right-click)
if event == cv2.EVENT_RBUTTONDOWN:
# Convert display coordinates to original frame coordinates
original_point = self.untransform_point((x, y))
if original_point:
# Check if clicking on an existing tracking point to remove it
removed = self.motion_tracker.remove_tracking_point(
self.current_frame,
original_point[0],
original_point[1],
self.tracking_point_distance
)
if not removed:
# If no point was removed, add a new tracking point
self.motion_tracker.add_tracking_point(
self.current_frame,
original_point[0],
original_point[1]
)
# Save state when tracking points change
self.save_state()
self.display_needs_update = True
# Handle zoom center (Ctrl + click)
if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN:
self.zoom_center = (x, y)
@@ -2772,6 +2991,33 @@ class VideoEditor:
else:
print(f"DEBUG: File '{self.video_path.stem}' does not contain '_edited_'")
print("Enter key only overwrites files with '_edited_' in the name. Use 'n' to create new files.")
elif key == ord("v") or key == ord("V"):
# Motion tracking controls
if key == ord("v"):
# Toggle motion tracking
if self.motion_tracker.tracking_enabled:
self.motion_tracker.stop_tracking()
print("Motion tracking disabled")
else:
# Start tracking with current crop and zoom center
base_zoom_center = self.zoom_center
if not base_zoom_center and self.current_display_frame is not None:
# Use frame center if no zoom center is set
h, w = self.current_display_frame.shape[:2]
base_zoom_center = (w // 2, h // 2)
self.motion_tracker.start_tracking(
self.crop_rect,
base_zoom_center
)
print("Motion tracking enabled")
self.save_state()
else: # V - Clear all tracking points
self.motion_tracker.clear_tracking_points()
print("All tracking points cleared")
self.save_state()
self.display_needs_update = True
elif key == ord("t"):
# Marker looping only for videos
if not self.is_image_mode:

157
croppa/tracking.py Normal file
View File

@@ -0,0 +1,157 @@
from typing import List, Dict, Tuple, Optional
class MotionTracker:
"""Handles motion tracking for crop and pan operations"""
def __init__(self):
self.tracking_points = {} # {frame_number: [(x, y), ...]}
self.tracking_enabled = False
self.base_crop_rect = None # Original crop rect when tracking started
self.base_zoom_center = None # Original zoom center when tracking started
def add_tracking_point(self, frame_number: int, x: int, y: int):
"""Add a tracking point at the specified frame and coordinates"""
if frame_number not in self.tracking_points:
self.tracking_points[frame_number] = []
self.tracking_points[frame_number].append((x, y))
def remove_tracking_point(self, frame_number: int, x: int, y: int, radius: int = 50):
"""Remove a tracking point by frame and proximity to x,y"""
if frame_number not in self.tracking_points:
return False
points = self.tracking_points[frame_number]
for i, (px, py) in enumerate(points):
# Calculate distance between points
distance = ((px - x) ** 2 + (py - y) ** 2) ** 0.5
if distance <= radius:
del points[i]
if not points:
del self.tracking_points[frame_number]
return True
return False
def clear_tracking_points(self):
"""Clear all tracking points"""
self.tracking_points.clear()
def get_tracking_points_for_frame(self, frame_number: int) -> List[Tuple[int, int]]:
"""Get all tracking points for a specific frame"""
return self.tracking_points.get(frame_number, [])
def has_tracking_points(self) -> bool:
"""Check if any tracking points exist"""
return bool(self.tracking_points)
def get_interpolated_position(self, frame_number: int) -> Optional[Tuple[float, float]]:
"""Get interpolated position for a frame based on tracking points"""
if not self.tracking_points:
return None
# Get all frames with tracking points
frames = sorted(self.tracking_points.keys())
if not frames:
return None
# If we have a point at this exact frame, return it
if frame_number in self.tracking_points:
points = self.tracking_points[frame_number]
if points:
# Return average of all points at this frame
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
return (avg_x, avg_y)
# If frame is before first tracking point
if frame_number < frames[0]:
points = self.tracking_points[frames[0]]
if points:
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
return (avg_x, avg_y)
# If frame is after last tracking point
if frame_number > frames[-1]:
points = self.tracking_points[frames[-1]]
if points:
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
return (avg_x, avg_y)
# Find the two frames to interpolate between
for i in range(len(frames) - 1):
if frames[i] <= frame_number <= frames[i + 1]:
frame1, frame2 = frames[i], frames[i + 1]
points1 = self.tracking_points[frame1]
points2 = self.tracking_points[frame2]
if not points1 or not points2:
continue
# Get average positions for each frame
avg_x1 = sum(p[0] for p in points1) / len(points1)
avg_y1 = sum(p[1] for p in points1) / len(points1)
avg_x2 = sum(p[0] for p in points2) / len(points2)
avg_y2 = sum(p[1] for p in points2) / len(points2)
# Linear interpolation
t = (frame_number - frame1) / (frame2 - frame1)
interp_x = avg_x1 + t * (avg_x2 - avg_x1)
interp_y = avg_y1 + t * (avg_y2 - avg_y1)
return (interp_x, interp_y)
return None
def get_tracking_offset(self, frame_number: int) -> Tuple[float, float]:
"""Get the offset to center the crop on the tracked point"""
if not self.tracking_enabled or not self.base_zoom_center:
return (0.0, 0.0)
current_pos = self.get_interpolated_position(frame_number)
if not current_pos:
return (0.0, 0.0)
# Calculate offset to center the crop on the tracked point
# The offset should move the display so the tracked point stays centered
offset_x = current_pos[0] - self.base_zoom_center[0]
offset_y = current_pos[1] - self.base_zoom_center[1]
return (offset_x, offset_y)
def start_tracking(self, base_crop_rect: Tuple[int, int, int, int], base_zoom_center: Tuple[int, int]):
"""Start motion tracking with base positions"""
self.tracking_enabled = True
self.base_crop_rect = base_crop_rect
self.base_zoom_center = base_zoom_center
def stop_tracking(self):
"""Stop motion tracking"""
self.tracking_enabled = False
self.base_crop_rect = None
self.base_zoom_center = None
def to_dict(self) -> Dict:
"""Convert to dictionary for serialization"""
return {
'tracking_points': self.tracking_points,
'tracking_enabled': self.tracking_enabled,
'base_crop_rect': self.base_crop_rect,
'base_zoom_center': self.base_zoom_center
}
def from_dict(self, data: Dict):
"""Load from dictionary for deserialization"""
# Convert string keys back to integers for tracking_points
tracking_points_data = data.get('tracking_points', {})
self.tracking_points = {}
for frame_str, points in tracking_points_data.items():
frame_num = int(frame_str) # Convert string key to integer
self.tracking_points[frame_num] = points
self.tracking_enabled = data.get('tracking_enabled', False)
self.base_crop_rect = data.get('base_crop_rect', None)
self.base_zoom_center = data.get('base_zoom_center', None)