Add motion tracking functionality to VideoEditor

This commit introduces a new MotionTracker class for handling motion tracking during video editing. The VideoEditor class has been updated to integrate motion tracking features, including adding and removing tracking points, interpolating positions, and applying tracking offsets during cropping. The user can toggle motion tracking and clear tracking points via keyboard shortcuts. Additionally, the state management has been enhanced to save and load motion tracking data, improving the overall editing experience.
2025-09-16 19:56:58 +02:00
parent 66b23834fd
commit 85891a5f99
2 changed files with 406 additions and 3 deletions
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -4,7 +4,7 @@ import cv2
 import argparse
 import numpy as np
 from pathlib import Path
-from typing import List
+from typing import List, Tuple
 import time
 import re
 import json
@@ -12,6 +12,7 @@ import threading
 import queue
 import subprocess
 import ctypes
+from tracking import MotionTracker

 class Cv2BufferedCap:
    """Buffered wrapper around cv2.VideoCapture that handles frame loading, seeking, and caching correctly"""
@@ -596,6 +597,11 @@ class VideoEditor:
        self.display_needs_update = True
        self.last_display_state = None
        
+        # Motion tracking
+        self.motion_tracker = MotionTracker()
+        self.tracking_point_radius = 10  # Radius of tracking point circles
+        self.tracking_point_distance = 50  # Max distance to consider for removing points
+        
        # Cached transformations for performance
        self.cached_transformed_frame = None
        self.cached_frame_number = None
@@ -628,6 +634,9 @@ class VideoEditor:
            return False

        try:
+            # Get tracking data
+            tracking_data = self.motion_tracker.to_dict()
+            
            state = {
                'timestamp': time.time(),
                'current_frame': getattr(self, 'current_frame', 0),
@@ -643,7 +652,8 @@ class VideoEditor:
                'display_offset': self.display_offset,
                'playback_speed': getattr(self, 'playback_speed', 1.0),
                'seek_multiplier': getattr(self, 'seek_multiplier', 1.0),
-                'is_playing': getattr(self, 'is_playing', False)
+                'is_playing': getattr(self, 'is_playing', False),
+                'motion_tracking': tracking_data  # Add tracking data
            }

            with open(state_file, 'w') as f:
@@ -720,6 +730,11 @@ class VideoEditor:
                self.is_playing = state['is_playing']
                print(f"Loaded is_playing: {self.is_playing}")
                
+            # Load motion tracking data if available
+            if 'motion_tracking' in state:
+                self.motion_tracker.from_dict(state['motion_tracking'])
+                print(f"Loaded motion tracking data: {len(self.motion_tracker.tracking_points)} keyframes")
+
            # Validate cut markers against current video length
            if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
                print(f"DEBUG: cut_start_frame {self.cut_start_frame} is beyond video length {self.total_frames}, clearing")
@@ -1064,6 +1079,11 @@ class VideoEditor:
        if frame is None:
            return None

+        # Get tracking offset for crop following if motion tracking is enabled
+        tracking_offset = (0, 0)
+        if self.motion_tracker.tracking_enabled:
+            tracking_offset = self.motion_tracker.get_tracking_offset(self.current_frame)
+            
        # Create a hash of the transformation parameters for caching
        transform_hash = hash((
            self.crop_rect,
@@ -1071,7 +1091,9 @@ class VideoEditor:
            self.rotation_angle,
            self.brightness,
            self.contrast,
-            tuple(self.display_offset)
+            tuple(self.display_offset),
+            tracking_offset,  # Include tracking offset in hash
+            self.motion_tracker.tracking_enabled  # Include tracking state in hash
        ))

        # Check if we can use cached transformation during auto-repeat seeking
@@ -1090,6 +1112,13 @@ class VideoEditor:
        # Apply crop
        if self.crop_rect:
            x, y, w, h = self.crop_rect
+            
+            # Apply tracking offset to crop position if motion tracking is enabled
+            if self.motion_tracker.tracking_enabled:
+                tracking_offset = self.motion_tracker.get_tracking_offset(self.current_frame)
+                x += int(tracking_offset[0])
+                y += int(tracking_offset[1])
+                
            x, y, w, h = int(x), int(y), int(w), int(h)
            # Ensure crop is within frame bounds
            x = max(0, min(x, processed_frame.shape[1] - 1))
@@ -1135,6 +1164,109 @@ class VideoEditor:
        self.cached_frame_number = None
        self.cached_transform_hash = None
        
+    def transform_point(self, point: Tuple[float, float]) -> Tuple[float, float]:
+        """Transform a point from original frame coordinates to display coordinates
+        
+        This applies the same transformations that are applied to frames:
+        1. Crop
+        2. Rotation
+        3. Zoom
+        """
+        if point is None:
+            return None
+            
+        x, y = point
+        
+        # Step 1: Apply crop (adjust point relative to crop origin)
+        if self.crop_rect:
+            crop_x, crop_y, _, _ = self.crop_rect
+            x -= crop_x
+            y -= crop_y
+            
+        # Step 2: Apply rotation
+        if self.rotation_angle != 0:
+            # Get dimensions after crop
+            if self.crop_rect:
+                crop_w, crop_h = self.crop_rect[2], self.crop_rect[3]
+            else:
+                if self.current_display_frame is not None:
+                    crop_h, crop_w = self.current_display_frame.shape[:2]
+                else:
+                    return None
+                    
+            # Apply rotation to coordinates
+            if self.rotation_angle == 90:
+                # 90° clockwise: (x,y) -> (y, width-x)
+                new_x = y
+                new_y = crop_w - x
+                x, y = new_x, new_y
+            elif self.rotation_angle == 180:
+                # 180° rotation: (x,y) -> (width-x, height-y)
+                x = crop_w - x
+                y = crop_h - y
+            elif self.rotation_angle == 270:
+                # 270° clockwise: (x,y) -> (height-y, x)
+                new_x = crop_h - y
+                new_y = x
+                x, y = new_x, new_y
+                
+        # Step 3: Apply zoom
+        if self.zoom_factor != 1.0:
+            x *= self.zoom_factor
+            y *= self.zoom_factor
+            
+        return (x, y)
+        
+    def untransform_point(self, point: Tuple[float, float]) -> Tuple[float, float]:
+        """Transform a point from display coordinates back to original frame coordinates
+        
+        This reverses the transformations in the opposite order:
+        1. Reverse zoom
+        2. Reverse rotation
+        3. Reverse crop
+        """
+        if point is None or self.current_display_frame is None:
+            return None
+            
+        x, y = point
+        
+        # Step 1: Reverse zoom
+        if self.zoom_factor != 1.0:
+            x /= self.zoom_factor
+            y /= self.zoom_factor
+            
+        # Step 2: Reverse rotation
+        if self.rotation_angle != 0:
+            # Get dimensions after crop but before rotation
+            if self.crop_rect:
+                crop_w, crop_h = self.crop_rect[2], self.crop_rect[3]
+            else:
+                crop_h, crop_w = self.current_display_frame.shape[:2]
+                
+            # Apply inverse rotation to coordinates
+            if self.rotation_angle == 90:
+                # Reverse 90° clockwise: (x,y) -> (width-y, x)
+                new_x = crop_w - y
+                new_y = x
+                x, y = new_x, new_y
+            elif self.rotation_angle == 180:
+                # Reverse 180° rotation: (x,y) -> (width-x, height-y)
+                x = crop_w - x
+                y = crop_h - y
+            elif self.rotation_angle == 270:
+                # Reverse 270° clockwise: (x,y) -> (y, height-x)
+                new_x = y
+                new_y = crop_h - x
+                x, y = new_x, new_y
+                
+        # Step 3: Reverse crop (add crop offset)
+        if self.crop_rect:
+            crop_x, crop_y, _, _ = self.crop_rect
+            x += crop_x
+            y += crop_y
+            
+        return (x, y)
+

    def apply_rotation(self, frame):
        """Apply rotation to frame"""
@@ -1294,6 +1426,64 @@ class VideoEditor:
        # Keep project view open but switch focus to video editor
        # Don't destroy the project view window - just let the user switch between them

+    def draw_tracking_points(self, canvas, offset_x, offset_y, scale):
+        """Draw tracking points and computed tracking position on the canvas
+        
+        Args:
+            canvas: The canvas to draw on
+            offset_x: X offset of the frame on the canvas
+            offset_y: Y offset of the frame on the canvas
+            scale: Scale factor of the frame on the canvas
+        """
+        if self.current_frame is None:
+            return
+            
+        # Draw tracking points for the current frame (green circles with white border)
+        tracking_points = self.motion_tracker.get_tracking_points_for_frame(self.current_frame)
+        for point in tracking_points:
+            # Transform point from original frame coordinates to display coordinates
+            display_point = self.transform_point(point)
+            if display_point:
+                # Scale and offset the point to match the canvas
+                x = int(offset_x + display_point[0] * scale)
+                y = int(offset_y + display_point[1] * scale)
+                
+                # Draw white border
+                cv2.circle(canvas, (x, y), self.tracking_point_radius + 2, (255, 255, 255), 2)
+                # Draw green circle
+                cv2.circle(canvas, (x, y), self.tracking_point_radius, (0, 255, 0), -1)
+                
+        # Draw computed tracking position (blue cross) if tracking is enabled
+        if self.motion_tracker.tracking_enabled:
+            interpolated_pos = self.motion_tracker.get_interpolated_position(self.current_frame)
+            if interpolated_pos:
+                # Transform point from original frame coordinates to display coordinates
+                display_point = self.transform_point(interpolated_pos)
+                if display_point:
+                    # Scale and offset the point to match the canvas
+                    x = int(offset_x + display_point[0] * scale)
+                    y = int(offset_y + display_point[1] * scale)
+                    
+                    # Draw blue cross
+                    cross_size = 10
+                    cv2.line(canvas, (x - cross_size, y), (x + cross_size, y), (255, 0, 0), 2)
+                    cv2.line(canvas, (x, y - cross_size), (x, y + cross_size), (255, 0, 0), 2)
+                    
+        # Add tracking status to the info overlay if tracking is enabled or points exist
+        if self.motion_tracker.tracking_enabled or self.motion_tracker.has_tracking_points():
+            point_count = sum(len(points) for points in self.motion_tracker.tracking_points.values())
+            status_text = f"Motion: {'ON' if self.motion_tracker.tracking_enabled else 'OFF'} ({point_count} pts)"
+            
+            # Calculate position for the text (bottom right corner)
+            text_x = self.window_width - 250
+            text_y = self.window_height - (self.TIMELINE_HEIGHT if not self.is_image_mode else 30)
+            
+            # Draw text with shadow
+            cv2.putText(canvas, status_text, (text_x + 2, text_y + 2), 
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
+            cv2.putText(canvas, status_text, (text_x, text_y), 
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 1)
+    
    def draw_feedback_message(self, frame):
        """Draw feedback message on frame if visible"""
        if not self.feedback_message or not self.feedback_message_time:
@@ -1757,6 +1947,9 @@ class VideoEditor:
        # Draw timeline
        self.draw_timeline(canvas)

+        # Draw tracking points and tracking position
+        self.draw_tracking_points(canvas, start_x, start_y, scale)
+        
        # Draw progress bar (if visible)
        self.draw_progress_bar(canvas)

@@ -1808,6 +2001,32 @@ class VideoEditor:
                self.crop_start_point = None
                self.crop_preview_rect = None

+        # Handle tracking points (Right-click)
+        if event == cv2.EVENT_RBUTTONDOWN:
+            # Convert display coordinates to original frame coordinates
+            original_point = self.untransform_point((x, y))
+            
+            if original_point:
+                # Check if clicking on an existing tracking point to remove it
+                removed = self.motion_tracker.remove_tracking_point(
+                    self.current_frame, 
+                    original_point[0], 
+                    original_point[1], 
+                    self.tracking_point_distance
+                )
+                
+                if not removed:
+                    # If no point was removed, add a new tracking point
+                    self.motion_tracker.add_tracking_point(
+                        self.current_frame,
+                        original_point[0],
+                        original_point[1]
+                    )
+                    
+                # Save state when tracking points change
+                self.save_state()
+                self.display_needs_update = True
+                
        # Handle zoom center (Ctrl + click)
        if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN:
            self.zoom_center = (x, y)
@@ -2772,6 +2991,33 @@ class VideoEditor:
                else:
                    print(f"DEBUG: File '{self.video_path.stem}' does not contain '_edited_'")
                    print("Enter key only overwrites files with '_edited_' in the name. Use 'n' to create new files.")
+            elif key == ord("v") or key == ord("V"):
+                # Motion tracking controls
+                if key == ord("v"):
+                    # Toggle motion tracking
+                    if self.motion_tracker.tracking_enabled:
+                        self.motion_tracker.stop_tracking()
+                        print("Motion tracking disabled")
+                    else:
+                        # Start tracking with current crop and zoom center
+                        base_zoom_center = self.zoom_center
+                        if not base_zoom_center and self.current_display_frame is not None:
+                            # Use frame center if no zoom center is set
+                            h, w = self.current_display_frame.shape[:2]
+                            base_zoom_center = (w // 2, h // 2)
+                            
+                        self.motion_tracker.start_tracking(
+                            self.crop_rect,
+                            base_zoom_center
+                        )
+                        print("Motion tracking enabled")
+                    self.save_state()
+                else:  # V - Clear all tracking points
+                    self.motion_tracker.clear_tracking_points()
+                    print("All tracking points cleared")
+                    self.save_state()
+                self.display_needs_update = True
+                    
            elif key == ord("t"):
                # Marker looping only for videos
                if not self.is_image_mode:
--- a/croppa/tracking.py
+++ b/croppa/tracking.py
@@ -0,0 +1,157 @@
+from typing import List, Dict, Tuple, Optional
+
+
+class MotionTracker:
+    """Handles motion tracking for crop and pan operations"""
+    
+    def __init__(self):
+        self.tracking_points = {}  # {frame_number: [(x, y), ...]}
+        self.tracking_enabled = False
+        self.base_crop_rect = None  # Original crop rect when tracking started
+        self.base_zoom_center = None  # Original zoom center when tracking started
+        
+    def add_tracking_point(self, frame_number: int, x: int, y: int):
+        """Add a tracking point at the specified frame and coordinates"""
+        if frame_number not in self.tracking_points:
+            self.tracking_points[frame_number] = []
+        self.tracking_points[frame_number].append((x, y))
+        
+    def remove_tracking_point(self, frame_number: int, x: int, y: int, radius: int = 50):
+        """Remove a tracking point by frame and proximity to x,y"""
+        if frame_number not in self.tracking_points:
+            return False
+            
+        points = self.tracking_points[frame_number]
+        for i, (px, py) in enumerate(points):
+            # Calculate distance between points
+            distance = ((px - x) ** 2 + (py - y) ** 2) ** 0.5
+            if distance <= radius:
+                del points[i]
+                if not points:
+                    del self.tracking_points[frame_number]
+                return True
+                
+        return False
+                
+    def clear_tracking_points(self):
+        """Clear all tracking points"""
+        self.tracking_points.clear()
+        
+    def get_tracking_points_for_frame(self, frame_number: int) -> List[Tuple[int, int]]:
+        """Get all tracking points for a specific frame"""
+        return self.tracking_points.get(frame_number, [])
+        
+    def has_tracking_points(self) -> bool:
+        """Check if any tracking points exist"""
+        return bool(self.tracking_points)
+        
+    def get_interpolated_position(self, frame_number: int) -> Optional[Tuple[float, float]]:
+        """Get interpolated position for a frame based on tracking points"""
+        if not self.tracking_points:
+            return None
+            
+        # Get all frames with tracking points
+        frames = sorted(self.tracking_points.keys())
+        
+        if not frames:
+            return None
+            
+        # If we have a point at this exact frame, return it
+        if frame_number in self.tracking_points:
+            points = self.tracking_points[frame_number]
+            if points:
+                # Return average of all points at this frame
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+        
+        # If frame is before first tracking point
+        if frame_number < frames[0]:
+            points = self.tracking_points[frames[0]]
+            if points:
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+                
+        # If frame is after last tracking point
+        if frame_number > frames[-1]:
+            points = self.tracking_points[frames[-1]]
+            if points:
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+        
+        # Find the two frames to interpolate between
+        for i in range(len(frames) - 1):
+            if frames[i] <= frame_number <= frames[i + 1]:
+                frame1, frame2 = frames[i], frames[i + 1]
+                points1 = self.tracking_points[frame1]
+                points2 = self.tracking_points[frame2]
+                
+                if not points1 or not points2:
+                    continue
+                    
+                # Get average positions for each frame
+                avg_x1 = sum(p[0] for p in points1) / len(points1)
+                avg_y1 = sum(p[1] for p in points1) / len(points1)
+                avg_x2 = sum(p[0] for p in points2) / len(points2)
+                avg_y2 = sum(p[1] for p in points2) / len(points2)
+                
+                # Linear interpolation
+                t = (frame_number - frame1) / (frame2 - frame1)
+                interp_x = avg_x1 + t * (avg_x2 - avg_x1)
+                interp_y = avg_y1 + t * (avg_y2 - avg_y1)
+                
+                return (interp_x, interp_y)
+        
+        return None
+        
+    def get_tracking_offset(self, frame_number: int) -> Tuple[float, float]:
+        """Get the offset to center the crop on the tracked point"""
+        if not self.tracking_enabled or not self.base_zoom_center:
+            return (0.0, 0.0)
+            
+        current_pos = self.get_interpolated_position(frame_number)
+        if not current_pos:
+            return (0.0, 0.0)
+            
+        # Calculate offset to center the crop on the tracked point
+        # The offset should move the display so the tracked point stays centered
+        offset_x = current_pos[0] - self.base_zoom_center[0]
+        offset_y = current_pos[1] - self.base_zoom_center[1]
+        
+        return (offset_x, offset_y)
+        
+    def start_tracking(self, base_crop_rect: Tuple[int, int, int, int], base_zoom_center: Tuple[int, int]):
+        """Start motion tracking with base positions"""
+        self.tracking_enabled = True
+        self.base_crop_rect = base_crop_rect
+        self.base_zoom_center = base_zoom_center
+        
+    def stop_tracking(self):
+        """Stop motion tracking"""
+        self.tracking_enabled = False
+        self.base_crop_rect = None
+        self.base_zoom_center = None
+        
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for serialization"""
+        return {
+            'tracking_points': self.tracking_points,
+            'tracking_enabled': self.tracking_enabled,
+            'base_crop_rect': self.base_crop_rect,
+            'base_zoom_center': self.base_zoom_center
+        }
+        
+    def from_dict(self, data: Dict):
+        """Load from dictionary for deserialization"""
+        # Convert string keys back to integers for tracking_points
+        tracking_points_data = data.get('tracking_points', {})
+        self.tracking_points = {}
+        for frame_str, points in tracking_points_data.items():
+            frame_num = int(frame_str)  # Convert string key to integer
+            self.tracking_points[frame_num] = points
+        
+        self.tracking_enabled = data.get('tracking_enabled', False)
+        self.base_crop_rect = data.get('base_crop_rect', None)
+        self.base_zoom_center = data.get('base_zoom_center', None)