Add MotionTracker class for enhanced motion tracking in VideoEditor

This commit introduces the MotionTracker class, which manages motion tracking points for crop and pan operations. It allows users to add, remove, and clear tracking points, as well as interpolate positions based on these points. The VideoEditor class has been updated to integrate motion tracking functionality, including user interactions for adding tracking points and toggling tracking on and off. Additionally, the display now reflects motion tracking status and visualizes tracking points on the canvas, improving the editing experience.
This commit is contained in:
2025-09-16 14:26:44 +02:00
parent 5baa2572ea
commit f111571601

View File

@@ -4,7 +4,7 @@ import cv2
import argparse
import numpy as np
from pathlib import Path
from typing import List
from typing import List, Dict, Tuple, Optional
import time
import re
import json
@@ -96,6 +96,146 @@ class Cv2BufferedCap:
"""Check if capture is opened"""
return self.cap and self.cap.isOpened()
class MotionTracker:
"""Handles motion tracking for crop and pan operations"""
def __init__(self):
self.tracking_points = {} # {frame_number: [(x, y), ...]}
self.tracking_enabled = False
self.base_crop_rect = None # Original crop rect when tracking started
self.base_zoom_center = None # Original zoom center when tracking started
def add_tracking_point(self, frame_number: int, x: int, y: int):
"""Add a tracking point at the specified frame and coordinates"""
if frame_number not in self.tracking_points:
self.tracking_points[frame_number] = []
self.tracking_points[frame_number].append((x, y))
def remove_tracking_point(self, frame_number: int, point_index: int):
"""Remove a tracking point by frame and index"""
if frame_number in self.tracking_points and 0 <= point_index < len(self.tracking_points[frame_number]):
del self.tracking_points[frame_number][point_index]
if not self.tracking_points[frame_number]:
del self.tracking_points[frame_number]
def clear_tracking_points(self):
"""Clear all tracking points"""
self.tracking_points.clear()
def get_tracking_points_for_frame(self, frame_number: int) -> List[Tuple[int, int]]:
"""Get all tracking points for a specific frame"""
return self.tracking_points.get(frame_number, [])
def has_tracking_points(self) -> bool:
"""Check if any tracking points exist"""
return bool(self.tracking_points)
def get_interpolated_position(self, frame_number: int) -> Optional[Tuple[float, float]]:
"""Get interpolated position for a frame based on tracking points"""
if not self.tracking_points:
return None
# Get all frames with tracking points
frames = sorted(self.tracking_points.keys())
if not frames:
return None
# If we have a point at this exact frame, return it
if frame_number in self.tracking_points:
points = self.tracking_points[frame_number]
if points:
# Return average of all points at this frame
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
return (avg_x, avg_y)
# If frame is before first tracking point
if frame_number < frames[0]:
points = self.tracking_points[frames[0]]
if points:
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
return (avg_x, avg_y)
# If frame is after last tracking point
if frame_number > frames[-1]:
points = self.tracking_points[frames[-1]]
if points:
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
return (avg_x, avg_y)
# Find the two frames to interpolate between
for i in range(len(frames) - 1):
if frames[i] <= frame_number <= frames[i + 1]:
frame1, frame2 = frames[i], frames[i + 1]
points1 = self.tracking_points[frame1]
points2 = self.tracking_points[frame2]
if not points1 or not points2:
continue
# Get average positions for each frame
avg_x1 = sum(p[0] for p in points1) / len(points1)
avg_y1 = sum(p[1] for p in points1) / len(points1)
avg_x2 = sum(p[0] for p in points2) / len(points2)
avg_y2 = sum(p[1] for p in points2) / len(points2)
# Linear interpolation
t = (frame_number - frame1) / (frame2 - frame1)
interp_x = avg_x1 + t * (avg_x2 - avg_x1)
interp_y = avg_y1 + t * (avg_y2 - avg_y1)
return (interp_x, interp_y)
return None
def get_tracking_offset(self, frame_number: int) -> Tuple[float, float]:
"""Get the offset from the base position for motion tracking"""
if not self.tracking_enabled or not self.base_zoom_center:
return (0.0, 0.0)
current_pos = self.get_interpolated_position(frame_number)
if not current_pos:
return (0.0, 0.0)
# Calculate offset from base position
offset_x = current_pos[0] - self.base_zoom_center[0]
offset_y = current_pos[1] - self.base_zoom_center[1]
return (offset_x, offset_y)
def start_tracking(self, base_crop_rect: Tuple[int, int, int, int], base_zoom_center: Tuple[int, int]):
"""Start motion tracking with base positions"""
self.tracking_enabled = True
self.base_crop_rect = base_crop_rect
self.base_zoom_center = base_zoom_center
def stop_tracking(self):
"""Stop motion tracking"""
self.tracking_enabled = False
self.base_crop_rect = None
self.base_zoom_center = None
def to_dict(self) -> Dict:
"""Convert to dictionary for serialization"""
return {
'tracking_points': self.tracking_points,
'tracking_enabled': self.tracking_enabled,
'base_crop_rect': self.base_crop_rect,
'base_zoom_center': self.base_zoom_center
}
def from_dict(self, data: Dict):
"""Load from dictionary for deserialization"""
self.tracking_points = data.get('tracking_points', {})
self.tracking_enabled = data.get('tracking_enabled', False)
self.base_crop_rect = data.get('base_crop_rect', None)
self.base_zoom_center = data.get('base_zoom_center', None)
def get_active_window_title():
"""Get the title of the currently active window"""
try:
@@ -586,6 +726,9 @@ class VideoEditor:
# Crop adjustment settings
self.crop_size_step = self.CROP_SIZE_STEP
# Motion tracking
self.motion_tracker = MotionTracker()
# Render thread management
self.render_thread = None
self.render_cancelled = False
@@ -643,7 +786,8 @@ class VideoEditor:
'display_offset': self.display_offset,
'playback_speed': getattr(self, 'playback_speed', 1.0),
'seek_multiplier': getattr(self, 'seek_multiplier', 1.0),
'is_playing': getattr(self, 'is_playing', False)
'is_playing': getattr(self, 'is_playing', False),
'motion_tracker': self.motion_tracker.to_dict()
}
with open(state_file, 'w') as f:
@@ -719,6 +863,9 @@ class VideoEditor:
if 'is_playing' in state:
self.is_playing = state['is_playing']
print(f"Loaded is_playing: {self.is_playing}")
if 'motion_tracker' in state:
self.motion_tracker.from_dict(state['motion_tracker'])
print(f"Loaded motion_tracker data")
# Validate cut markers against current video length
if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
@@ -1112,11 +1259,14 @@ class VideoEditor:
processed_frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR
)
# Handle zoom center and display offset
# Handle zoom center and display offset with motion tracking
if new_width > self.window_width or new_height > self.window_height:
# Apply motion tracking offset to display offset
tracking_offset_x, tracking_offset_y = self.motion_tracker.get_tracking_offset(self.current_frame)
# Calculate crop from zoomed image to fit window
start_x = max(0, self.display_offset[0])
start_y = max(0, self.display_offset[1])
start_x = max(0, self.display_offset[0] + tracking_offset_x)
start_y = max(0, self.display_offset[1] + tracking_offset_y)
end_x = min(new_width, start_x + self.window_width)
end_y = min(new_height, start_y + self.window_height)
processed_frame = processed_frame[start_y:end_y, start_x:end_x]
@@ -1652,6 +1802,9 @@ class VideoEditor:
canvas, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0), 2
)
# Draw motion tracking points
self.draw_tracking_points(canvas, start_x, start_y, scale)
# Add info overlay
rotation_text = (
f" | Rotation: {self.rotation_angle}°" if self.rotation_angle != 0 else ""
@@ -1665,10 +1818,16 @@ class VideoEditor:
seek_multiplier_text = (
f" | Seek: {self.seek_multiplier:.1f}x" if self.seek_multiplier != 1.0 else ""
)
motion_tracking_text = ""
if not self.is_image_mode and self.motion_tracker.has_tracking_points():
tracking_status = "ON" if self.motion_tracker.tracking_enabled else "OFF"
point_count = sum(len(points) for points in self.motion_tracker.tracking_points.values())
motion_tracking_text = f" | Motion: {tracking_status} ({point_count} pts)"
if self.is_image_mode:
info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}"
else:
info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text} | {'Playing' if self.is_playing else 'Paused'}"
info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_tracking_text} | {'Playing' if self.is_playing else 'Paused'}"
cv2.putText(
canvas,
info_text,
@@ -1812,6 +1971,15 @@ class VideoEditor:
if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN:
self.zoom_center = (x, y)
# Handle motion tracking point addition (Right click)
if event == cv2.EVENT_RBUTTONDOWN:
if not self.is_image_mode: # Only for videos
# Convert screen coordinates to video coordinates
video_x, video_y = self.screen_to_video_coords(x, y)
self.motion_tracker.add_tracking_point(self.current_frame, video_x, video_y)
self.set_feedback_message(f"Tracking point added at frame {self.current_frame}")
self.save_state() # Save state when tracking point is added
# Handle scroll wheel for zoom (Ctrl + scroll)
if flags & cv2.EVENT_FLAG_CTRLKEY:
if event == cv2.EVENT_MOUSEWHEEL:
@@ -1953,6 +2121,189 @@ class VideoEditor:
target_frame = int(position_ratio * (self.total_frames - 1))
self.seek_to_frame(target_frame)
def screen_to_video_coords(self, screen_x: int, screen_y: int) -> Tuple[int, int]:
"""Convert screen coordinates to video frame coordinates"""
if self.current_display_frame is None:
return (0, 0)
# Get the original frame dimensions
original_height, original_width = self.current_display_frame.shape[:2]
available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT)
# Calculate how the original frame is displayed (after crop/zoom/rotation)
display_frame = self.apply_crop_zoom_and_rotation(
self.current_display_frame.copy()
)
if display_frame is None:
return (0, 0)
display_height, display_width = display_frame.shape[:2]
# Calculate scale for the display frame
scale = min(
self.window_width / display_width, available_height / display_height
)
if scale < 1.0:
final_display_width = int(display_width * scale)
final_display_height = int(display_height * scale)
else:
final_display_width = display_width
final_display_height = display_height
scale = 1.0
start_x = (self.window_width - final_display_width) // 2
start_y = (available_height - final_display_height) // 2
# Convert screen coordinates to display frame coordinates
display_x = (screen_x - start_x) / scale
display_y = (screen_y - start_y) / scale
# Clamp to display frame bounds
display_x = max(0, min(display_x, display_width))
display_y = max(0, min(display_y, display_height))
# Now convert from display frame coordinates back to original frame coordinates
# Step 1: Reverse zoom
if self.zoom_factor != 1.0:
display_x = display_x / self.zoom_factor
display_y = display_y / self.zoom_factor
# Step 2: Reverse rotation
if self.rotation_angle != 0:
# Get the dimensions of the frame after crop but before rotation
if self.crop_rect:
crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3])
else:
crop_w, crop_h = original_width, original_height
# Apply inverse rotation to coordinates
if self.rotation_angle == 90:
rotated_w, rotated_h = crop_h, crop_w
new_x = display_y
new_y = rotated_w - display_x
elif self.rotation_angle == 180:
new_x = crop_w - display_x
new_y = crop_h - display_y
elif self.rotation_angle == 270:
rotated_w, rotated_h = crop_h, crop_w
new_x = rotated_h - display_y
new_y = display_x
else:
new_x, new_y = display_x, display_y
display_x, display_y = new_x, new_y
# Step 3: Convert from cropped frame coordinates to original frame coordinates
original_x = display_x
original_y = display_y
# Add the crop offset to get back to original frame coordinates
if self.crop_rect:
crop_x, crop_y, crop_w, crop_h = self.crop_rect
original_x += crop_x
original_y += crop_y
# Clamp to original frame bounds
original_x = max(0, min(original_x, original_width))
original_y = max(0, min(original_y, original_height))
return (int(original_x), int(original_y))
def set_feedback_message(self, message: str):
"""Set a feedback message to display to the user"""
self.feedback_message = message
self.feedback_message_time = time.time()
def draw_tracking_points(self, canvas, start_x, start_y, scale):
"""Draw motion tracking points on the canvas"""
if not self.motion_tracker.has_tracking_points():
return
# Get tracking points for current frame
current_points = self.motion_tracker.get_tracking_points_for_frame(self.current_frame)
# Draw current frame points
for point in current_points:
video_x, video_y = point
# Convert video coordinates to screen coordinates
screen_x, screen_y = self.video_to_screen_coords(video_x, video_y, start_x, start_y, scale)
if screen_x is not None and screen_y is not None:
# Draw a filled circle for current frame points
cv2.circle(canvas, (int(screen_x), int(screen_y)), 8, (0, 255, 0), -1)
cv2.circle(canvas, (int(screen_x), int(screen_y)), 10, (255, 255, 255), 2)
# Draw interpolated position if tracking is enabled
if self.motion_tracker.tracking_enabled:
interp_pos = self.motion_tracker.get_interpolated_position(self.current_frame)
if interp_pos:
video_x, video_y = interp_pos
screen_x, screen_y = self.video_to_screen_coords(video_x, video_y, start_x, start_y, scale)
if screen_x is not None and screen_y is not None:
# Draw a cross for interpolated position
size = 12
cv2.line(canvas,
(int(screen_x - size), int(screen_y)),
(int(screen_x + size), int(screen_y)),
(255, 0, 0), 3)
cv2.line(canvas,
(int(screen_x), int(screen_y - size)),
(int(screen_x), int(screen_y + size)),
(255, 0, 0), 3)
def video_to_screen_coords(self, video_x, video_y, start_x, start_y, scale):
"""Convert video coordinates to screen coordinates"""
if self.current_display_frame is None:
return None, None
# Get the original frame dimensions
original_height, original_width = self.current_display_frame.shape[:2]
# Apply transformations in reverse order to get display coordinates
# Step 1: Start with video coordinates
display_x = video_x
display_y = video_y
# Step 2: Subtract crop offset if there's a crop
if self.crop_rect:
crop_x, crop_y, crop_w, crop_h = self.crop_rect
display_x -= crop_x
display_y -= crop_y
# Step 3: Apply rotation
if self.rotation_angle != 0:
if self.crop_rect:
crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3])
else:
crop_w, crop_h = original_width, original_height
if self.rotation_angle == 90:
# 90° clockwise rotation: (x,y) -> (y, crop_w-x)
new_x = display_y
new_y = crop_w - display_x
elif self.rotation_angle == 180:
# 180° rotation: (x,y) -> (crop_w-x, crop_h-y)
new_x = crop_w - display_x
new_y = crop_h - display_y
elif self.rotation_angle == 270:
# 270° clockwise rotation: (x,y) -> (crop_h-y, x)
new_x = crop_h - display_y
new_y = display_x
else:
new_x, new_y = display_x, display_y
display_x, display_y = new_x, new_y
# Step 4: Apply zoom
if self.zoom_factor != 1.0:
display_x *= self.zoom_factor
display_y *= self.zoom_factor
# Step 5: Apply scale and offset to get screen coordinates
screen_x = start_x + display_x * scale
screen_y = start_y + display_y * scale
return screen_x, screen_y
def undo_crop(self):
"""Undo the last crop operation"""
if self.crop_history:
@@ -2539,6 +2890,11 @@ class VideoEditor:
print(" 1: Set cut start point")
print(" 2: Set cut end point")
print(" T: Toggle loop between markers")
print()
print("Motion Tracking:")
print(" Right-click: Add tracking point")
print(" t: Toggle motion tracking on/off")
print(" Shift+T: Clear all tracking points")
if len(self.video_files) > 1:
print(" N: Next video")
print(" n: Previous video")
@@ -2710,6 +3066,27 @@ class VideoEditor:
self.zoom_factor = 1.0
self.clear_transformation_cache()
self.save_state() # Save state when crop is cleared
elif key == ord("t"): # T - Toggle motion tracking
if not self.is_image_mode:
if self.motion_tracker.tracking_enabled:
self.motion_tracker.stop_tracking()
self.set_feedback_message("Motion tracking disabled")
else:
if self.motion_tracker.has_tracking_points():
# Start tracking with current crop and zoom center
base_crop = self.crop_rect if self.crop_rect else (0, 0, self.current_display_frame.shape[1], self.current_display_frame.shape[0])
base_zoom = self.zoom_center if self.zoom_center else (self.current_display_frame.shape[1]//2, self.current_display_frame.shape[0]//2)
self.motion_tracker.start_tracking(base_crop, base_zoom)
self.set_feedback_message("Motion tracking enabled")
else:
self.set_feedback_message("Add tracking points first (right-click)")
self.save_state()
elif key == ord("T"): # Shift+T - Clear all tracking points
if not self.is_image_mode:
self.motion_tracker.clear_tracking_points()
self.motion_tracker.stop_tracking()
self.set_feedback_message("All tracking points cleared")
self.save_state()
elif key == ord("1"):
# Cut markers only for videos
if not self.is_image_mode: