Add MotionTracker class for enhanced motion tracking in VideoEditor

This commit introduces the MotionTracker class, which manages motion tracking points for crop and pan operations. It allows users to add, remove, and clear tracking points, as well as interpolate positions based on these points. The VideoEditor class has been updated to integrate motion tracking functionality, including user interactions for adding tracking points and toggling tracking on and off. Additionally, the display now reflects motion tracking status and visualizes tracking points on the canvas, improving the editing experience.
2025-09-16 14:26:44 +02:00
parent 5baa2572ea
commit f111571601
1 changed files with 383 additions and 6 deletions
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -4,7 +4,7 @@ import cv2
 import argparse
 import numpy as np
 from pathlib import Path
-from typing import List
+from typing import List, Dict, Tuple, Optional
 import time
 import re
 import json
@@ -96,6 +96,146 @@ class Cv2BufferedCap:
        """Check if capture is opened"""
        return self.cap and self.cap.isOpened()

+
+class MotionTracker:
+    """Handles motion tracking for crop and pan operations"""
+    
+    def __init__(self):
+        self.tracking_points = {}  # {frame_number: [(x, y), ...]}
+        self.tracking_enabled = False
+        self.base_crop_rect = None  # Original crop rect when tracking started
+        self.base_zoom_center = None  # Original zoom center when tracking started
+        
+    def add_tracking_point(self, frame_number: int, x: int, y: int):
+        """Add a tracking point at the specified frame and coordinates"""
+        if frame_number not in self.tracking_points:
+            self.tracking_points[frame_number] = []
+        self.tracking_points[frame_number].append((x, y))
+        
+    def remove_tracking_point(self, frame_number: int, point_index: int):
+        """Remove a tracking point by frame and index"""
+        if frame_number in self.tracking_points and 0 <= point_index < len(self.tracking_points[frame_number]):
+            del self.tracking_points[frame_number][point_index]
+            if not self.tracking_points[frame_number]:
+                del self.tracking_points[frame_number]
+                
+    def clear_tracking_points(self):
+        """Clear all tracking points"""
+        self.tracking_points.clear()
+        
+    def get_tracking_points_for_frame(self, frame_number: int) -> List[Tuple[int, int]]:
+        """Get all tracking points for a specific frame"""
+        return self.tracking_points.get(frame_number, [])
+        
+    def has_tracking_points(self) -> bool:
+        """Check if any tracking points exist"""
+        return bool(self.tracking_points)
+        
+    def get_interpolated_position(self, frame_number: int) -> Optional[Tuple[float, float]]:
+        """Get interpolated position for a frame based on tracking points"""
+        if not self.tracking_points:
+            return None
+            
+        # Get all frames with tracking points
+        frames = sorted(self.tracking_points.keys())
+        
+        if not frames:
+            return None
+            
+        # If we have a point at this exact frame, return it
+        if frame_number in self.tracking_points:
+            points = self.tracking_points[frame_number]
+            if points:
+                # Return average of all points at this frame
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+        
+        # If frame is before first tracking point
+        if frame_number < frames[0]:
+            points = self.tracking_points[frames[0]]
+            if points:
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+                
+        # If frame is after last tracking point
+        if frame_number > frames[-1]:
+            points = self.tracking_points[frames[-1]]
+            if points:
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+        
+        # Find the two frames to interpolate between
+        for i in range(len(frames) - 1):
+            if frames[i] <= frame_number <= frames[i + 1]:
+                frame1, frame2 = frames[i], frames[i + 1]
+                points1 = self.tracking_points[frame1]
+                points2 = self.tracking_points[frame2]
+                
+                if not points1 or not points2:
+                    continue
+                    
+                # Get average positions for each frame
+                avg_x1 = sum(p[0] for p in points1) / len(points1)
+                avg_y1 = sum(p[1] for p in points1) / len(points1)
+                avg_x2 = sum(p[0] for p in points2) / len(points2)
+                avg_y2 = sum(p[1] for p in points2) / len(points2)
+                
+                # Linear interpolation
+                t = (frame_number - frame1) / (frame2 - frame1)
+                interp_x = avg_x1 + t * (avg_x2 - avg_x1)
+                interp_y = avg_y1 + t * (avg_y2 - avg_y1)
+                
+                return (interp_x, interp_y)
+        
+        return None
+        
+    def get_tracking_offset(self, frame_number: int) -> Tuple[float, float]:
+        """Get the offset from the base position for motion tracking"""
+        if not self.tracking_enabled or not self.base_zoom_center:
+            return (0.0, 0.0)
+            
+        current_pos = self.get_interpolated_position(frame_number)
+        if not current_pos:
+            return (0.0, 0.0)
+            
+        # Calculate offset from base position
+        offset_x = current_pos[0] - self.base_zoom_center[0]
+        offset_y = current_pos[1] - self.base_zoom_center[1]
+        
+        return (offset_x, offset_y)
+        
+    def start_tracking(self, base_crop_rect: Tuple[int, int, int, int], base_zoom_center: Tuple[int, int]):
+        """Start motion tracking with base positions"""
+        self.tracking_enabled = True
+        self.base_crop_rect = base_crop_rect
+        self.base_zoom_center = base_zoom_center
+        
+    def stop_tracking(self):
+        """Stop motion tracking"""
+        self.tracking_enabled = False
+        self.base_crop_rect = None
+        self.base_zoom_center = None
+        
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for serialization"""
+        return {
+            'tracking_points': self.tracking_points,
+            'tracking_enabled': self.tracking_enabled,
+            'base_crop_rect': self.base_crop_rect,
+            'base_zoom_center': self.base_zoom_center
+        }
+        
+    def from_dict(self, data: Dict):
+        """Load from dictionary for deserialization"""
+        self.tracking_points = data.get('tracking_points', {})
+        self.tracking_enabled = data.get('tracking_enabled', False)
+        self.base_crop_rect = data.get('base_crop_rect', None)
+        self.base_zoom_center = data.get('base_zoom_center', None)
+
+
 def get_active_window_title():
    """Get the title of the currently active window"""
    try:
@@ -586,6 +726,9 @@ class VideoEditor:
        # Crop adjustment settings
        self.crop_size_step = self.CROP_SIZE_STEP

+        # Motion tracking
+        self.motion_tracker = MotionTracker()
+
        # Render thread management
        self.render_thread = None
        self.render_cancelled = False
@@ -643,7 +786,8 @@ class VideoEditor:
                'display_offset': self.display_offset,
                'playback_speed': getattr(self, 'playback_speed', 1.0),
                'seek_multiplier': getattr(self, 'seek_multiplier', 1.0),
-                'is_playing': getattr(self, 'is_playing', False)
+                'is_playing': getattr(self, 'is_playing', False),
+                'motion_tracker': self.motion_tracker.to_dict()
            }

            with open(state_file, 'w') as f:
@@ -719,6 +863,9 @@ class VideoEditor:
            if 'is_playing' in state:
                self.is_playing = state['is_playing']
                print(f"Loaded is_playing: {self.is_playing}")
+            if 'motion_tracker' in state:
+                self.motion_tracker.from_dict(state['motion_tracker'])
+                print(f"Loaded motion_tracker data")

            # Validate cut markers against current video length
            if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
@@ -1112,11 +1259,14 @@ class VideoEditor:
                processed_frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR
            )

-            # Handle zoom center and display offset
+            # Handle zoom center and display offset with motion tracking
            if new_width > self.window_width or new_height > self.window_height:
+                # Apply motion tracking offset to display offset
+                tracking_offset_x, tracking_offset_y = self.motion_tracker.get_tracking_offset(self.current_frame)
+                
                # Calculate crop from zoomed image to fit window
-                start_x = max(0, self.display_offset[0])
-                start_y = max(0, self.display_offset[1])
+                start_x = max(0, self.display_offset[0] + tracking_offset_x)
+                start_y = max(0, self.display_offset[1] + tracking_offset_y)
                end_x = min(new_width, start_x + self.window_width)
                end_y = min(new_height, start_y + self.window_height)
                processed_frame = processed_frame[start_y:end_y, start_x:end_x]
@@ -1652,6 +1802,9 @@ class VideoEditor:
                canvas, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0), 2
            )

+        # Draw motion tracking points
+        self.draw_tracking_points(canvas, start_x, start_y, scale)
+
        # Add info overlay
        rotation_text = (
            f" | Rotation: {self.rotation_angle}°" if self.rotation_angle != 0 else ""
@@ -1665,10 +1818,16 @@ class VideoEditor:
        seek_multiplier_text = (
            f" | Seek: {self.seek_multiplier:.1f}x" if self.seek_multiplier != 1.0 else ""
        )
+        motion_tracking_text = ""
+        if not self.is_image_mode and self.motion_tracker.has_tracking_points():
+            tracking_status = "ON" if self.motion_tracker.tracking_enabled else "OFF"
+            point_count = sum(len(points) for points in self.motion_tracker.tracking_points.values())
+            motion_tracking_text = f" | Motion: {tracking_status} ({point_count} pts)"
+        
        if self.is_image_mode:
            info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}"
        else:
-            info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text} | {'Playing' if self.is_playing else 'Paused'}"
+            info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_tracking_text} | {'Playing' if self.is_playing else 'Paused'}"
        cv2.putText(
            canvas,
            info_text,
@@ -1812,6 +1971,15 @@ class VideoEditor:
        if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN:
            self.zoom_center = (x, y)

+        # Handle motion tracking point addition (Right click)
+        if event == cv2.EVENT_RBUTTONDOWN:
+            if not self.is_image_mode:  # Only for videos
+                # Convert screen coordinates to video coordinates
+                video_x, video_y = self.screen_to_video_coords(x, y)
+                self.motion_tracker.add_tracking_point(self.current_frame, video_x, video_y)
+                self.set_feedback_message(f"Tracking point added at frame {self.current_frame}")
+                self.save_state()  # Save state when tracking point is added
+
        # Handle scroll wheel for zoom (Ctrl + scroll)
        if flags & cv2.EVENT_FLAG_CTRLKEY:
            if event == cv2.EVENT_MOUSEWHEEL:
@@ -1953,6 +2121,189 @@ class VideoEditor:
        target_frame = int(position_ratio * (self.total_frames - 1))
        self.seek_to_frame(target_frame)

+    def screen_to_video_coords(self, screen_x: int, screen_y: int) -> Tuple[int, int]:
+        """Convert screen coordinates to video frame coordinates"""
+        if self.current_display_frame is None:
+            return (0, 0)
+
+        # Get the original frame dimensions
+        original_height, original_width = self.current_display_frame.shape[:2]
+        available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT)
+
+        # Calculate how the original frame is displayed (after crop/zoom/rotation)
+        display_frame = self.apply_crop_zoom_and_rotation(
+            self.current_display_frame.copy()
+        )
+        if display_frame is None:
+            return (0, 0)
+
+        display_height, display_width = display_frame.shape[:2]
+
+        # Calculate scale for the display frame
+        scale = min(
+            self.window_width / display_width, available_height / display_height
+        )
+        if scale < 1.0:
+            final_display_width = int(display_width * scale)
+            final_display_height = int(display_height * scale)
+        else:
+            final_display_width = display_width
+            final_display_height = display_height
+            scale = 1.0
+
+        start_x = (self.window_width - final_display_width) // 2
+        start_y = (available_height - final_display_height) // 2
+
+        # Convert screen coordinates to display frame coordinates
+        display_x = (screen_x - start_x) / scale
+        display_y = (screen_y - start_y) / scale
+
+        # Clamp to display frame bounds
+        display_x = max(0, min(display_x, display_width))
+        display_y = max(0, min(display_y, display_height))
+
+        # Now convert from display frame coordinates back to original frame coordinates
+        # Step 1: Reverse zoom
+        if self.zoom_factor != 1.0:
+            display_x = display_x / self.zoom_factor
+            display_y = display_y / self.zoom_factor
+
+        # Step 2: Reverse rotation
+        if self.rotation_angle != 0:
+            # Get the dimensions of the frame after crop but before rotation
+            if self.crop_rect:
+                crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3])
+            else:
+                crop_w, crop_h = original_width, original_height
+
+            # Apply inverse rotation to coordinates
+            if self.rotation_angle == 90:
+                rotated_w, rotated_h = crop_h, crop_w
+                new_x = display_y
+                new_y = rotated_w - display_x
+            elif self.rotation_angle == 180:
+                new_x = crop_w - display_x
+                new_y = crop_h - display_y
+            elif self.rotation_angle == 270:
+                rotated_w, rotated_h = crop_h, crop_w
+                new_x = rotated_h - display_y
+                new_y = display_x
+            else:
+                new_x, new_y = display_x, display_y
+
+            display_x, display_y = new_x, new_y
+
+        # Step 3: Convert from cropped frame coordinates to original frame coordinates
+        original_x = display_x
+        original_y = display_y
+
+        # Add the crop offset to get back to original frame coordinates
+        if self.crop_rect:
+            crop_x, crop_y, crop_w, crop_h = self.crop_rect
+            original_x += crop_x
+            original_y += crop_y
+
+        # Clamp to original frame bounds
+        original_x = max(0, min(original_x, original_width))
+        original_y = max(0, min(original_y, original_height))
+
+        return (int(original_x), int(original_y))
+
+    def set_feedback_message(self, message: str):
+        """Set a feedback message to display to the user"""
+        self.feedback_message = message
+        self.feedback_message_time = time.time()
+
+    def draw_tracking_points(self, canvas, start_x, start_y, scale):
+        """Draw motion tracking points on the canvas"""
+        if not self.motion_tracker.has_tracking_points():
+            return
+
+        # Get tracking points for current frame
+        current_points = self.motion_tracker.get_tracking_points_for_frame(self.current_frame)
+        
+        # Draw current frame points
+        for point in current_points:
+            video_x, video_y = point
+            # Convert video coordinates to screen coordinates
+            screen_x, screen_y = self.video_to_screen_coords(video_x, video_y, start_x, start_y, scale)
+            if screen_x is not None and screen_y is not None:
+                # Draw a filled circle for current frame points
+                cv2.circle(canvas, (int(screen_x), int(screen_y)), 8, (0, 255, 0), -1)
+                cv2.circle(canvas, (int(screen_x), int(screen_y)), 10, (255, 255, 255), 2)
+
+        # Draw interpolated position if tracking is enabled
+        if self.motion_tracker.tracking_enabled:
+            interp_pos = self.motion_tracker.get_interpolated_position(self.current_frame)
+            if interp_pos:
+                video_x, video_y = interp_pos
+                screen_x, screen_y = self.video_to_screen_coords(video_x, video_y, start_x, start_y, scale)
+                if screen_x is not None and screen_y is not None:
+                    # Draw a cross for interpolated position
+                    size = 12
+                    cv2.line(canvas, 
+                            (int(screen_x - size), int(screen_y)), 
+                            (int(screen_x + size), int(screen_y)), 
+                            (255, 0, 0), 3)
+                    cv2.line(canvas, 
+                            (int(screen_x), int(screen_y - size)), 
+                            (int(screen_x), int(screen_y + size)), 
+                            (255, 0, 0), 3)
+
+    def video_to_screen_coords(self, video_x, video_y, start_x, start_y, scale):
+        """Convert video coordinates to screen coordinates"""
+        if self.current_display_frame is None:
+            return None, None
+
+        # Get the original frame dimensions
+        original_height, original_width = self.current_display_frame.shape[:2]
+
+        # Apply transformations in reverse order to get display coordinates
+        # Step 1: Start with video coordinates
+        display_x = video_x
+        display_y = video_y
+
+        # Step 2: Subtract crop offset if there's a crop
+        if self.crop_rect:
+            crop_x, crop_y, crop_w, crop_h = self.crop_rect
+            display_x -= crop_x
+            display_y -= crop_y
+
+        # Step 3: Apply rotation
+        if self.rotation_angle != 0:
+            if self.crop_rect:
+                crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3])
+            else:
+                crop_w, crop_h = original_width, original_height
+
+            if self.rotation_angle == 90:
+                # 90° clockwise rotation: (x,y) -> (y, crop_w-x)
+                new_x = display_y
+                new_y = crop_w - display_x
+            elif self.rotation_angle == 180:
+                # 180° rotation: (x,y) -> (crop_w-x, crop_h-y)
+                new_x = crop_w - display_x
+                new_y = crop_h - display_y
+            elif self.rotation_angle == 270:
+                # 270° clockwise rotation: (x,y) -> (crop_h-y, x)
+                new_x = crop_h - display_y
+                new_y = display_x
+            else:
+                new_x, new_y = display_x, display_y
+
+            display_x, display_y = new_x, new_y
+
+        # Step 4: Apply zoom
+        if self.zoom_factor != 1.0:
+            display_x *= self.zoom_factor
+            display_y *= self.zoom_factor
+
+        # Step 5: Apply scale and offset to get screen coordinates
+        screen_x = start_x + display_x * scale
+        screen_y = start_y + display_y * scale
+
+        return screen_x, screen_y
+
    def undo_crop(self):
        """Undo the last crop operation"""
        if self.crop_history:
@@ -2539,6 +2890,11 @@ class VideoEditor:
            print("  1: Set cut start point")
            print("  2: Set cut end point")
            print("  T: Toggle loop between markers")
+            print()
+            print("Motion Tracking:")
+            print("  Right-click: Add tracking point")
+            print("  t: Toggle motion tracking on/off")
+            print("  Shift+T: Clear all tracking points")
            if len(self.video_files) > 1:
                print("  N: Next video")
                print("  n: Previous video")
@@ -2710,6 +3066,27 @@ class VideoEditor:
                self.zoom_factor = 1.0
                self.clear_transformation_cache()
                self.save_state()  # Save state when crop is cleared
+            elif key == ord("t"):  # T - Toggle motion tracking
+                if not self.is_image_mode:
+                    if self.motion_tracker.tracking_enabled:
+                        self.motion_tracker.stop_tracking()
+                        self.set_feedback_message("Motion tracking disabled")
+                    else:
+                        if self.motion_tracker.has_tracking_points():
+                            # Start tracking with current crop and zoom center
+                            base_crop = self.crop_rect if self.crop_rect else (0, 0, self.current_display_frame.shape[1], self.current_display_frame.shape[0])
+                            base_zoom = self.zoom_center if self.zoom_center else (self.current_display_frame.shape[1]//2, self.current_display_frame.shape[0]//2)
+                            self.motion_tracker.start_tracking(base_crop, base_zoom)
+                            self.set_feedback_message("Motion tracking enabled")
+                        else:
+                            self.set_feedback_message("Add tracking points first (right-click)")
+                    self.save_state()
+            elif key == ord("T"):  # Shift+T - Clear all tracking points
+                if not self.is_image_mode:
+                    self.motion_tracker.clear_tracking_points()
+                    self.motion_tracker.stop_tracking()
+                    self.set_feedback_message("All tracking points cleared")
+                    self.save_state()
            elif key == ord("1"):
                # Cut markers only for videos
                if not self.is_image_mode: