diff --git a/croppa/main.py b/croppa/main.py index 7b28338..6de9345 100644 --- a/croppa/main.py +++ b/croppa/main.py @@ -4,7 +4,7 @@ import cv2 import argparse import numpy as np from pathlib import Path -from typing import List +from typing import List, Dict, Tuple, Optional import time import re import json @@ -96,6 +96,146 @@ class Cv2BufferedCap: """Check if capture is opened""" return self.cap and self.cap.isOpened() + +class MotionTracker: + """Handles motion tracking for crop and pan operations""" + + def __init__(self): + self.tracking_points = {} # {frame_number: [(x, y), ...]} + self.tracking_enabled = False + self.base_crop_rect = None # Original crop rect when tracking started + self.base_zoom_center = None # Original zoom center when tracking started + + def add_tracking_point(self, frame_number: int, x: int, y: int): + """Add a tracking point at the specified frame and coordinates""" + if frame_number not in self.tracking_points: + self.tracking_points[frame_number] = [] + self.tracking_points[frame_number].append((x, y)) + + def remove_tracking_point(self, frame_number: int, point_index: int): + """Remove a tracking point by frame and index""" + if frame_number in self.tracking_points and 0 <= point_index < len(self.tracking_points[frame_number]): + del self.tracking_points[frame_number][point_index] + if not self.tracking_points[frame_number]: + del self.tracking_points[frame_number] + + def clear_tracking_points(self): + """Clear all tracking points""" + self.tracking_points.clear() + + def get_tracking_points_for_frame(self, frame_number: int) -> List[Tuple[int, int]]: + """Get all tracking points for a specific frame""" + return self.tracking_points.get(frame_number, []) + + def has_tracking_points(self) -> bool: + """Check if any tracking points exist""" + return bool(self.tracking_points) + + def get_interpolated_position(self, frame_number: int) -> Optional[Tuple[float, float]]: + """Get interpolated position for a frame based on tracking points""" + if not self.tracking_points: + return None + + # Get all frames with tracking points + frames = sorted(self.tracking_points.keys()) + + if not frames: + return None + + # If we have a point at this exact frame, return it + if frame_number in self.tracking_points: + points = self.tracking_points[frame_number] + if points: + # Return average of all points at this frame + avg_x = sum(p[0] for p in points) / len(points) + avg_y = sum(p[1] for p in points) / len(points) + return (avg_x, avg_y) + + # If frame is before first tracking point + if frame_number < frames[0]: + points = self.tracking_points[frames[0]] + if points: + avg_x = sum(p[0] for p in points) / len(points) + avg_y = sum(p[1] for p in points) / len(points) + return (avg_x, avg_y) + + # If frame is after last tracking point + if frame_number > frames[-1]: + points = self.tracking_points[frames[-1]] + if points: + avg_x = sum(p[0] for p in points) / len(points) + avg_y = sum(p[1] for p in points) / len(points) + return (avg_x, avg_y) + + # Find the two frames to interpolate between + for i in range(len(frames) - 1): + if frames[i] <= frame_number <= frames[i + 1]: + frame1, frame2 = frames[i], frames[i + 1] + points1 = self.tracking_points[frame1] + points2 = self.tracking_points[frame2] + + if not points1 or not points2: + continue + + # Get average positions for each frame + avg_x1 = sum(p[0] for p in points1) / len(points1) + avg_y1 = sum(p[1] for p in points1) / len(points1) + avg_x2 = sum(p[0] for p in points2) / len(points2) + avg_y2 = sum(p[1] for p in points2) / len(points2) + + # Linear interpolation + t = (frame_number - frame1) / (frame2 - frame1) + interp_x = avg_x1 + t * (avg_x2 - avg_x1) + interp_y = avg_y1 + t * (avg_y2 - avg_y1) + + return (interp_x, interp_y) + + return None + + def get_tracking_offset(self, frame_number: int) -> Tuple[float, float]: + """Get the offset from the base position for motion tracking""" + if not self.tracking_enabled or not self.base_zoom_center: + return (0.0, 0.0) + + current_pos = self.get_interpolated_position(frame_number) + if not current_pos: + return (0.0, 0.0) + + # Calculate offset from base position + offset_x = current_pos[0] - self.base_zoom_center[0] + offset_y = current_pos[1] - self.base_zoom_center[1] + + return (offset_x, offset_y) + + def start_tracking(self, base_crop_rect: Tuple[int, int, int, int], base_zoom_center: Tuple[int, int]): + """Start motion tracking with base positions""" + self.tracking_enabled = True + self.base_crop_rect = base_crop_rect + self.base_zoom_center = base_zoom_center + + def stop_tracking(self): + """Stop motion tracking""" + self.tracking_enabled = False + self.base_crop_rect = None + self.base_zoom_center = None + + def to_dict(self) -> Dict: + """Convert to dictionary for serialization""" + return { + 'tracking_points': self.tracking_points, + 'tracking_enabled': self.tracking_enabled, + 'base_crop_rect': self.base_crop_rect, + 'base_zoom_center': self.base_zoom_center + } + + def from_dict(self, data: Dict): + """Load from dictionary for deserialization""" + self.tracking_points = data.get('tracking_points', {}) + self.tracking_enabled = data.get('tracking_enabled', False) + self.base_crop_rect = data.get('base_crop_rect', None) + self.base_zoom_center = data.get('base_zoom_center', None) + + def get_active_window_title(): """Get the title of the currently active window""" try: @@ -586,6 +726,9 @@ class VideoEditor: # Crop adjustment settings self.crop_size_step = self.CROP_SIZE_STEP + # Motion tracking + self.motion_tracker = MotionTracker() + # Render thread management self.render_thread = None self.render_cancelled = False @@ -643,7 +786,8 @@ class VideoEditor: 'display_offset': self.display_offset, 'playback_speed': getattr(self, 'playback_speed', 1.0), 'seek_multiplier': getattr(self, 'seek_multiplier', 1.0), - 'is_playing': getattr(self, 'is_playing', False) + 'is_playing': getattr(self, 'is_playing', False), + 'motion_tracker': self.motion_tracker.to_dict() } with open(state_file, 'w') as f: @@ -719,6 +863,9 @@ class VideoEditor: if 'is_playing' in state: self.is_playing = state['is_playing'] print(f"Loaded is_playing: {self.is_playing}") + if 'motion_tracker' in state: + self.motion_tracker.from_dict(state['motion_tracker']) + print(f"Loaded motion_tracker data") # Validate cut markers against current video length if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames: @@ -1112,11 +1259,14 @@ class VideoEditor: processed_frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR ) - # Handle zoom center and display offset + # Handle zoom center and display offset with motion tracking if new_width > self.window_width or new_height > self.window_height: + # Apply motion tracking offset to display offset + tracking_offset_x, tracking_offset_y = self.motion_tracker.get_tracking_offset(self.current_frame) + # Calculate crop from zoomed image to fit window - start_x = max(0, self.display_offset[0]) - start_y = max(0, self.display_offset[1]) + start_x = max(0, self.display_offset[0] + tracking_offset_x) + start_y = max(0, self.display_offset[1] + tracking_offset_y) end_x = min(new_width, start_x + self.window_width) end_y = min(new_height, start_y + self.window_height) processed_frame = processed_frame[start_y:end_y, start_x:end_x] @@ -1652,6 +1802,9 @@ class VideoEditor: canvas, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0), 2 ) + # Draw motion tracking points + self.draw_tracking_points(canvas, start_x, start_y, scale) + # Add info overlay rotation_text = ( f" | Rotation: {self.rotation_angle}°" if self.rotation_angle != 0 else "" @@ -1665,10 +1818,16 @@ class VideoEditor: seek_multiplier_text = ( f" | Seek: {self.seek_multiplier:.1f}x" if self.seek_multiplier != 1.0 else "" ) + motion_tracking_text = "" + if not self.is_image_mode and self.motion_tracker.has_tracking_points(): + tracking_status = "ON" if self.motion_tracker.tracking_enabled else "OFF" + point_count = sum(len(points) for points in self.motion_tracker.tracking_points.values()) + motion_tracking_text = f" | Motion: {tracking_status} ({point_count} pts)" + if self.is_image_mode: info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}" else: - info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text} | {'Playing' if self.is_playing else 'Paused'}" + info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_tracking_text} | {'Playing' if self.is_playing else 'Paused'}" cv2.putText( canvas, info_text, @@ -1812,6 +1971,15 @@ class VideoEditor: if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN: self.zoom_center = (x, y) + # Handle motion tracking point addition (Right click) + if event == cv2.EVENT_RBUTTONDOWN: + if not self.is_image_mode: # Only for videos + # Convert screen coordinates to video coordinates + video_x, video_y = self.screen_to_video_coords(x, y) + self.motion_tracker.add_tracking_point(self.current_frame, video_x, video_y) + self.set_feedback_message(f"Tracking point added at frame {self.current_frame}") + self.save_state() # Save state when tracking point is added + # Handle scroll wheel for zoom (Ctrl + scroll) if flags & cv2.EVENT_FLAG_CTRLKEY: if event == cv2.EVENT_MOUSEWHEEL: @@ -1953,6 +2121,189 @@ class VideoEditor: target_frame = int(position_ratio * (self.total_frames - 1)) self.seek_to_frame(target_frame) + def screen_to_video_coords(self, screen_x: int, screen_y: int) -> Tuple[int, int]: + """Convert screen coordinates to video frame coordinates""" + if self.current_display_frame is None: + return (0, 0) + + # Get the original frame dimensions + original_height, original_width = self.current_display_frame.shape[:2] + available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT) + + # Calculate how the original frame is displayed (after crop/zoom/rotation) + display_frame = self.apply_crop_zoom_and_rotation( + self.current_display_frame.copy() + ) + if display_frame is None: + return (0, 0) + + display_height, display_width = display_frame.shape[:2] + + # Calculate scale for the display frame + scale = min( + self.window_width / display_width, available_height / display_height + ) + if scale < 1.0: + final_display_width = int(display_width * scale) + final_display_height = int(display_height * scale) + else: + final_display_width = display_width + final_display_height = display_height + scale = 1.0 + + start_x = (self.window_width - final_display_width) // 2 + start_y = (available_height - final_display_height) // 2 + + # Convert screen coordinates to display frame coordinates + display_x = (screen_x - start_x) / scale + display_y = (screen_y - start_y) / scale + + # Clamp to display frame bounds + display_x = max(0, min(display_x, display_width)) + display_y = max(0, min(display_y, display_height)) + + # Now convert from display frame coordinates back to original frame coordinates + # Step 1: Reverse zoom + if self.zoom_factor != 1.0: + display_x = display_x / self.zoom_factor + display_y = display_y / self.zoom_factor + + # Step 2: Reverse rotation + if self.rotation_angle != 0: + # Get the dimensions of the frame after crop but before rotation + if self.crop_rect: + crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3]) + else: + crop_w, crop_h = original_width, original_height + + # Apply inverse rotation to coordinates + if self.rotation_angle == 90: + rotated_w, rotated_h = crop_h, crop_w + new_x = display_y + new_y = rotated_w - display_x + elif self.rotation_angle == 180: + new_x = crop_w - display_x + new_y = crop_h - display_y + elif self.rotation_angle == 270: + rotated_w, rotated_h = crop_h, crop_w + new_x = rotated_h - display_y + new_y = display_x + else: + new_x, new_y = display_x, display_y + + display_x, display_y = new_x, new_y + + # Step 3: Convert from cropped frame coordinates to original frame coordinates + original_x = display_x + original_y = display_y + + # Add the crop offset to get back to original frame coordinates + if self.crop_rect: + crop_x, crop_y, crop_w, crop_h = self.crop_rect + original_x += crop_x + original_y += crop_y + + # Clamp to original frame bounds + original_x = max(0, min(original_x, original_width)) + original_y = max(0, min(original_y, original_height)) + + return (int(original_x), int(original_y)) + + def set_feedback_message(self, message: str): + """Set a feedback message to display to the user""" + self.feedback_message = message + self.feedback_message_time = time.time() + + def draw_tracking_points(self, canvas, start_x, start_y, scale): + """Draw motion tracking points on the canvas""" + if not self.motion_tracker.has_tracking_points(): + return + + # Get tracking points for current frame + current_points = self.motion_tracker.get_tracking_points_for_frame(self.current_frame) + + # Draw current frame points + for point in current_points: + video_x, video_y = point + # Convert video coordinates to screen coordinates + screen_x, screen_y = self.video_to_screen_coords(video_x, video_y, start_x, start_y, scale) + if screen_x is not None and screen_y is not None: + # Draw a filled circle for current frame points + cv2.circle(canvas, (int(screen_x), int(screen_y)), 8, (0, 255, 0), -1) + cv2.circle(canvas, (int(screen_x), int(screen_y)), 10, (255, 255, 255), 2) + + # Draw interpolated position if tracking is enabled + if self.motion_tracker.tracking_enabled: + interp_pos = self.motion_tracker.get_interpolated_position(self.current_frame) + if interp_pos: + video_x, video_y = interp_pos + screen_x, screen_y = self.video_to_screen_coords(video_x, video_y, start_x, start_y, scale) + if screen_x is not None and screen_y is not None: + # Draw a cross for interpolated position + size = 12 + cv2.line(canvas, + (int(screen_x - size), int(screen_y)), + (int(screen_x + size), int(screen_y)), + (255, 0, 0), 3) + cv2.line(canvas, + (int(screen_x), int(screen_y - size)), + (int(screen_x), int(screen_y + size)), + (255, 0, 0), 3) + + def video_to_screen_coords(self, video_x, video_y, start_x, start_y, scale): + """Convert video coordinates to screen coordinates""" + if self.current_display_frame is None: + return None, None + + # Get the original frame dimensions + original_height, original_width = self.current_display_frame.shape[:2] + + # Apply transformations in reverse order to get display coordinates + # Step 1: Start with video coordinates + display_x = video_x + display_y = video_y + + # Step 2: Subtract crop offset if there's a crop + if self.crop_rect: + crop_x, crop_y, crop_w, crop_h = self.crop_rect + display_x -= crop_x + display_y -= crop_y + + # Step 3: Apply rotation + if self.rotation_angle != 0: + if self.crop_rect: + crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3]) + else: + crop_w, crop_h = original_width, original_height + + if self.rotation_angle == 90: + # 90° clockwise rotation: (x,y) -> (y, crop_w-x) + new_x = display_y + new_y = crop_w - display_x + elif self.rotation_angle == 180: + # 180° rotation: (x,y) -> (crop_w-x, crop_h-y) + new_x = crop_w - display_x + new_y = crop_h - display_y + elif self.rotation_angle == 270: + # 270° clockwise rotation: (x,y) -> (crop_h-y, x) + new_x = crop_h - display_y + new_y = display_x + else: + new_x, new_y = display_x, display_y + + display_x, display_y = new_x, new_y + + # Step 4: Apply zoom + if self.zoom_factor != 1.0: + display_x *= self.zoom_factor + display_y *= self.zoom_factor + + # Step 5: Apply scale and offset to get screen coordinates + screen_x = start_x + display_x * scale + screen_y = start_y + display_y * scale + + return screen_x, screen_y + def undo_crop(self): """Undo the last crop operation""" if self.crop_history: @@ -2539,6 +2890,11 @@ class VideoEditor: print(" 1: Set cut start point") print(" 2: Set cut end point") print(" T: Toggle loop between markers") + print() + print("Motion Tracking:") + print(" Right-click: Add tracking point") + print(" t: Toggle motion tracking on/off") + print(" Shift+T: Clear all tracking points") if len(self.video_files) > 1: print(" N: Next video") print(" n: Previous video") @@ -2710,6 +3066,27 @@ class VideoEditor: self.zoom_factor = 1.0 self.clear_transformation_cache() self.save_state() # Save state when crop is cleared + elif key == ord("t"): # T - Toggle motion tracking + if not self.is_image_mode: + if self.motion_tracker.tracking_enabled: + self.motion_tracker.stop_tracking() + self.set_feedback_message("Motion tracking disabled") + else: + if self.motion_tracker.has_tracking_points(): + # Start tracking with current crop and zoom center + base_crop = self.crop_rect if self.crop_rect else (0, 0, self.current_display_frame.shape[1], self.current_display_frame.shape[0]) + base_zoom = self.zoom_center if self.zoom_center else (self.current_display_frame.shape[1]//2, self.current_display_frame.shape[0]//2) + self.motion_tracker.start_tracking(base_crop, base_zoom) + self.set_feedback_message("Motion tracking enabled") + else: + self.set_feedback_message("Add tracking points first (right-click)") + self.save_state() + elif key == ord("T"): # Shift+T - Clear all tracking points + if not self.is_image_mode: + self.motion_tracker.clear_tracking_points() + self.motion_tracker.stop_tracking() + self.set_feedback_message("All tracking points cleared") + self.save_state() elif key == ord("1"): # Cut markers only for videos if not self.is_image_mode: