Add motion tracking functionality to VideoEditor

This commit introduces motion tracking capabilities, allowing users to add and remove tracking points on video frames. The tracking state is managed with new attributes, and the crop functionality is enhanced to follow the tracked motion. Additionally, the user interface is updated to reflect the tracking status, and keyboard shortcuts are added for toggling tracking and clearing points. This feature improves the editing experience by enabling dynamic cropping based on motion analysis.
This commit is contained in:
2025-09-17 01:14:26 +02:00
parent 66b23834fd
commit fdf7d98850

View File

@@ -601,6 +601,10 @@ class VideoEditor:
self.cached_frame_number = None
self.cached_transform_hash = None
# Motion tracking state
self.tracking_points = {} # {frame_number: [(x, y), ...]} in original frame coords
self.tracking_enabled = False
# Project view mode
self.project_view_mode = False
self.project_view = None
@@ -643,7 +647,9 @@ class VideoEditor:
'display_offset': self.display_offset,
'playback_speed': getattr(self, 'playback_speed', 1.0),
'seek_multiplier': getattr(self, 'seek_multiplier', 1.0),
'is_playing': getattr(self, 'is_playing', False)
'is_playing': getattr(self, 'is_playing', False),
'tracking_enabled': self.tracking_enabled,
'tracking_points': {str(k): v for k, v in self.tracking_points.items()}
}
with open(state_file, 'w') as f:
@@ -719,6 +725,12 @@ class VideoEditor:
if 'is_playing' in state:
self.is_playing = state['is_playing']
print(f"Loaded is_playing: {self.is_playing}")
if 'tracking_enabled' in state:
self.tracking_enabled = state['tracking_enabled']
print(f"Loaded tracking_enabled: {self.tracking_enabled}")
if 'tracking_points' in state and isinstance(state['tracking_points'], dict):
self.tracking_points = {int(k): v for k, v in state['tracking_points'].items()}
print(f"Loaded tracking_points: {sum(len(v) for v in self.tracking_points.values())} points")
# Validate cut markers against current video length
if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
@@ -1087,9 +1099,15 @@ class VideoEditor:
# Apply brightness/contrast first (to original frame for best quality)
processed_frame = self.apply_brightness_contrast(processed_frame)
# Apply crop
# Apply crop (with motion tracking follow if enabled)
if self.crop_rect:
x, y, w, h = self.crop_rect
if self.tracking_enabled:
interp = self._get_interpolated_tracking_position(getattr(self, 'current_frame', 0))
if interp:
cx, cy = interp
x = int(round(cx - w / 2))
y = int(round(cy - h / 2))
x, y, w, h = int(x), int(y), int(w), int(h)
# Ensure crop is within frame bounds
x = max(0, min(x, processed_frame.shape[1] - 1))
@@ -1129,6 +1147,135 @@ class VideoEditor:
return processed_frame
# --- Motion tracking helpers ---
def _get_effective_crop_rect_for_frame(self, frame_number):
"""Compute crop rect applied to a given frame, considering tracking follow."""
if not self.crop_rect:
return (0, 0, self.frame_width, self.frame_height)
x, y, w, h = map(int, self.crop_rect)
if self.tracking_enabled:
pos = self._get_interpolated_tracking_position(frame_number)
if pos:
cx, cy = pos
x = int(round(cx - w / 2))
y = int(round(cy - h / 2))
# Clamp to frame bounds
x = max(0, min(x, self.frame_width - 1))
y = max(0, min(y, self.frame_height - 1))
w = min(w, self.frame_width - x)
h = min(h, self.frame_height - y)
return (x, y, w, h)
def _get_interpolated_tracking_position(self, frame_number):
"""Linear interpolation between keyed tracking points.
Returns (x, y) in original frame coords or None.
"""
if not self.tracking_points:
return None
frames = sorted(self.tracking_points.keys())
if not frames:
return None
if frame_number in self.tracking_points and self.tracking_points[frame_number]:
pts = self.tracking_points[frame_number]
return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts))
if frame_number < frames[0]:
pts = self.tracking_points[frames[0]]
return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts)) if pts else None
if frame_number > frames[-1]:
pts = self.tracking_points[frames[-1]]
return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts)) if pts else None
for i in range(len(frames) - 1):
f1, f2 = frames[i], frames[i + 1]
if f1 <= frame_number <= f2:
pts1 = self.tracking_points.get(f1) or []
pts2 = self.tracking_points.get(f2) or []
if not pts1 or not pts2:
continue
x1 = sum(p[0] for p in pts1) / len(pts1)
y1 = sum(p[1] for p in pts1) / len(pts1)
x2 = sum(p[0] for p in pts2) / len(pts2)
y2 = sum(p[1] for p in pts2) / len(pts2)
t = (frame_number - f1) / (f2 - f1) if f2 != f1 else 0.0
return (x1 + t * (x2 - x1), y1 + t * (y2 - y1))
return None
def _map_original_to_screen(self, ox, oy):
"""Map a point in original frame coords to canvas screen coords."""
cx, cy, cw, ch = self._get_effective_crop_rect_for_frame(getattr(self, 'current_frame', 0))
px = ox - cx
py = oy - cy
angle = self.rotation_angle
if angle in (90, 270):
rotated_w, rotated_h = ch, cw
else:
rotated_w, rotated_h = cw, ch
if angle == 90:
rx, ry = py, rotated_w - px
elif angle == 180:
rx, ry = rotated_w - px, rotated_h - py
elif angle == 270:
rx, ry = rotated_h - py, px
else:
rx, ry = px, py
zx = rx * self.zoom_factor
zy = ry * self.zoom_factor
base_w, base_h = rotated_w, rotated_h
disp_w = int(base_w * self.zoom_factor)
disp_h = int(base_h * self.zoom_factor)
available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT)
scale = min(self.window_width / max(1, disp_w), available_height / max(1, disp_h))
if scale < 1.0:
final_w = int(disp_w * scale)
final_h = int(disp_h * scale)
else:
final_w = disp_w
final_h = disp_h
scale = 1.0
start_x = (self.window_width - final_w) // 2
start_y = (available_height - final_h) // 2
sx = int(round(start_x + zx * scale))
sy = int(round(start_y + zy * scale))
return sx, sy
def _map_screen_to_original(self, sx, sy):
"""Map a point on canvas screen coords back to original frame coords."""
cx, cy, cw, ch = self._get_effective_crop_rect_for_frame(getattr(self, 'current_frame', 0))
angle = self.rotation_angle
if angle in (90, 270):
rotated_w, rotated_h = ch, cw
else:
rotated_w, rotated_h = cw, ch
disp_w = int(rotated_w * self.zoom_factor)
disp_h = int(rotated_h * self.zoom_factor)
available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT)
scale = min(self.window_width / max(1, disp_w), available_height / max(1, disp_h))
if scale < 1.0:
final_w = int(disp_w * scale)
final_h = int(disp_h * scale)
else:
final_w = disp_w
final_h = disp_h
scale = 1.0
start_x = (self.window_width - final_w) // 2
start_y = (available_height - final_h) // 2
zx = (sx - start_x) / max(1e-6, scale)
zy = (sy - start_y) / max(1e-6, scale)
rx = zx / max(1e-6, self.zoom_factor)
ry = zy / max(1e-6, self.zoom_factor)
if angle == 90:
px, py = rotated_w - ry, rx
elif angle == 180:
px, py = rotated_w - rx, rotated_h - ry
elif angle == 270:
px, py = ry, rotated_h - rx
else:
px, py = rx, ry
ox = px + cx
oy = py + cy
ox = max(0, min(int(round(ox)), self.frame_width - 1))
oy = max(0, min(int(round(oy)), self.frame_height - 1))
return ox, oy
def clear_transformation_cache(self):
"""Clear the cached transformation to force recalculation"""
self.cached_transformed_frame = None
@@ -1665,10 +1812,13 @@ class VideoEditor:
seek_multiplier_text = (
f" | Seek: {self.seek_multiplier:.1f}x" if self.seek_multiplier != 1.0 else ""
)
motion_text = (
f" | Motion: {self.tracking_enabled}" if self.tracking_enabled else ""
)
if self.is_image_mode:
info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}"
info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}{motion_text}"
else:
info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text} | {'Playing' if self.is_playing else 'Paused'}"
info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_text} | {'Playing' if self.is_playing else 'Paused'}"
cv2.putText(
canvas,
info_text,
@@ -1754,6 +1904,19 @@ class VideoEditor:
1,
)
# Draw tracking overlays (points and interpolated cross)
pts = self.tracking_points.get(self.current_frame, []) if not self.is_image_mode else []
for (ox, oy) in pts:
sx, sy = self._map_original_to_screen(ox, oy)
cv2.circle(canvas, (sx, sy), 6, (0, 255, 0), -1)
cv2.circle(canvas, (sx, sy), 6, (255, 255, 255), 1)
if self.tracking_enabled and not self.is_image_mode:
interp = self._get_interpolated_tracking_position(self.current_frame)
if interp:
sx, sy = self._map_original_to_screen(interp[0], interp[1])
cv2.line(canvas, (sx - 10, sy), (sx + 10, sy), (255, 0, 0), 2)
cv2.line(canvas, (sx, sy - 10), (sx, sy + 10), (255, 0, 0), 2)
# Draw timeline
self.draw_timeline(canvas)
@@ -1812,6 +1975,31 @@ class VideoEditor:
if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN:
self.zoom_center = (x, y)
# Handle right-click for tracking points (no modifiers)
if event == cv2.EVENT_RBUTTONDOWN and not (flags & (cv2.EVENT_FLAG_CTRLKEY | cv2.EVENT_FLAG_SHIFTKEY)):
if not self.is_image_mode:
ox, oy = self._map_screen_to_original(x, y)
threshold = 50
removed = False
if self.current_frame in self.tracking_points:
pts_screen = []
for idx, (px, py) in enumerate(self.tracking_points[self.current_frame]):
sxp, syp = self._map_original_to_screen(px, py)
pts_screen.append((idx, sxp, syp))
for idx, sxp, syp in pts_screen:
if (sxp - x) ** 2 + (syp - y) ** 2 <= threshold ** 2:
del self.tracking_points[self.current_frame][idx]
if not self.tracking_points[self.current_frame]:
del self.tracking_points[self.current_frame]
self.show_feedback_message("Tracking point removed")
removed = True
break
if not removed:
self.tracking_points.setdefault(self.current_frame, []).append((int(ox), int(oy)))
self.show_feedback_message("Tracking point added")
self.clear_transformation_cache()
self.save_state()
# Handle scroll wheel for zoom (Ctrl + scroll)
if flags & cv2.EVENT_FLAG_CTRLKEY:
if event == cv2.EVENT_MOUSEWHEEL:
@@ -1832,119 +2020,34 @@ class VideoEditor:
if self.current_display_frame is None:
return
# Get the original frame dimensions
original_height, original_width = self.current_display_frame.shape[:2]
available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT)
# Calculate how the original frame is displayed (after crop/zoom/rotation)
display_frame = self.apply_crop_zoom_and_rotation(
self.current_display_frame.copy()
)
if display_frame is None:
return
display_height, display_width = display_frame.shape[:2]
# Calculate scale for the display frame
scale = min(
self.window_width / display_width, available_height / display_height
)
if scale < 1.0:
final_display_width = int(display_width * scale)
final_display_height = int(display_height * scale)
else:
final_display_width = display_width
final_display_height = display_height
scale = 1.0
start_x = (self.window_width - final_display_width) // 2
start_y = (available_height - final_display_height) // 2
# Convert screen coordinates to display frame coordinates
display_x = (x - start_x) / scale
display_y = (y - start_y) / scale
display_w = w / scale
display_h = h / scale
# Clamp to display frame bounds
display_x = max(0, min(display_x, display_width))
display_y = max(0, min(display_y, display_height))
display_w = min(display_w, display_width - display_x)
display_h = min(display_h, display_height - display_y)
# Now we need to convert from the display frame coordinates back to original frame coordinates
# The display frame is the result of: original -> crop -> rotation -> zoom
# Step 1: Reverse zoom
if self.zoom_factor != 1.0:
display_x = display_x / self.zoom_factor
display_y = display_y / self.zoom_factor
display_w = display_w / self.zoom_factor
display_h = display_h / self.zoom_factor
# Step 2: Reverse rotation
if self.rotation_angle != 0:
# Get the dimensions of the frame after crop but before rotation
if self.crop_rect:
crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3])
else:
crop_w, crop_h = original_width, original_height
# Apply inverse rotation to coordinates
# The key insight: we need to use the dimensions of the ROTATED frame for the coordinate transformation
# because the coordinates we have are in the rotated coordinate system
if self.rotation_angle == 90:
# 90° clockwise rotation: (x,y) -> (y, rotated_width-x-w)
# The rotated frame has dimensions: height x width (swapped)
rotated_w, rotated_h = crop_h, crop_w
new_x = display_y
new_y = rotated_w - display_x - display_w
new_w = display_h
new_h = display_w
elif self.rotation_angle == 180:
# 180° rotation: (x,y) -> (width-x-w, height-y-h)
new_x = crop_w - display_x - display_w
new_y = crop_h - display_y - display_h
new_w = display_w
new_h = display_h
elif self.rotation_angle == 270:
# 270° clockwise rotation: (x,y) -> (rotated_height-y-h, x)
# The rotated frame has dimensions: height x width (swapped)
rotated_w, rotated_h = crop_h, crop_w
new_x = rotated_h - display_y - display_h
new_y = display_x
new_w = display_h
new_h = display_w
else:
new_x, new_y, new_w, new_h = display_x, display_y, display_w, display_h
display_x, display_y, display_w, display_h = new_x, new_y, new_w, new_h
# Step 3: Convert from cropped frame coordinates to original frame coordinates
original_x = display_x
original_y = display_y
original_w = display_w
original_h = display_h
# Add the crop offset to get back to original frame coordinates
if self.crop_rect:
crop_x, crop_y, crop_w, crop_h = self.crop_rect
original_x += crop_x
original_y += crop_y
# Map both corners from screen to original to form an axis-aligned crop
# All coordinates are in reference to the ORIGINAL frame
# User input arrives in processed display space → map back to original
x2 = x + w
y2 = y + h
ox1, oy1 = self._map_screen_to_original(x, y)
ox2, oy2 = self._map_screen_to_original(x2, y2)
left = min(ox1, ox2)
top = min(oy1, oy2)
right = max(ox1, ox2)
bottom = max(oy1, oy2)
original_x = left
original_y = top
original_w = max(10, right - left)
original_h = max(10, bottom - top)
# Clamp to original frame bounds
original_x = max(0, min(original_x, original_width))
original_y = max(0, min(original_y, original_height))
original_w = min(original_w, original_width - original_x)
original_h = min(original_h, original_height - original_y)
original_x = max(0, min(original_x, self.frame_width - 1))
original_y = max(0, min(original_y, self.frame_height - 1))
original_w = min(original_w, self.frame_width - original_x)
original_h = min(original_h, self.frame_height - original_y)
if original_w > 10 and original_h > 10: # Minimum size check
# Save current crop for undo
if original_w > 10 and original_h > 10:
if self.crop_rect:
self.crop_history.append(self.crop_rect)
self.crop_rect = (original_x, original_y, original_w, original_h)
self.clear_transformation_cache()
self.save_state() # Save state when crop is set
self.save_state()
def seek_to_timeline_position(self, mouse_x, bar_x_start, bar_width):
"""Seek to position based on mouse click on timeline"""
@@ -2291,12 +2394,15 @@ class VideoEditor:
return False
def _process_frame_for_render(self, frame, output_width: int, output_height: int):
def _process_frame_for_render(self, frame, output_width: int, output_height: int, frame_number: int = None):
"""Process a single frame for rendering (optimized for speed)"""
try:
# Apply crop (vectorized operation)
if self.crop_rect:
x, y, w, h = map(int, self.crop_rect)
if frame_number is None:
x, y, w, h = map(int, self.crop_rect)
else:
x, y, w, h = map(int, self._get_effective_crop_rect_for_frame(frame_number))
# Clamp coordinates to frame bounds
h_frame, w_frame = frame.shape[:2]
@@ -2409,7 +2515,7 @@ class VideoEditor:
if not ret:
break
processed_frame = self._process_frame_for_render(frame, output_width, output_height)
processed_frame = self._process_frame_for_render(frame, output_width, output_height, start_frame + i)
if processed_frame is not None:
if i == 0:
print(f"Processed frame dimensions: {processed_frame.shape[1]}x{processed_frame.shape[0]}")
@@ -2500,6 +2606,11 @@ class VideoEditor:
print(" U: Undo crop")
print(" C: Clear crop")
print()
print("Motion Tracking:")
print(" Right-click: Add/remove tracking point (at current frame)")
print(" v: Toggle motion tracking on/off")
print(" V: Clear all tracking points")
print()
print("Other Controls:")
print(" Ctrl+Scroll: Zoom in/out")
print(" Shift+S: Save screenshot")
@@ -2772,6 +2883,16 @@ class VideoEditor:
else:
print(f"DEBUG: File '{self.video_path.stem}' does not contain '_edited_'")
print("Enter key only overwrites files with '_edited_' in the name. Use 'n' to create new files.")
elif key == ord("v"):
# Toggle motion tracking on/off
self.tracking_enabled = not self.tracking_enabled
self.show_feedback_message(f"Motion tracking {'ON' if self.tracking_enabled else 'OFF'}")
self.save_state()
elif key == ord("V"):
# Clear all tracking points
self.tracking_points = {}
self.show_feedback_message("Tracking points cleared")
self.save_state()
elif key == ord("t"):
# Marker looping only for videos
if not self.is_image_mode: