diff --git a/croppa/main.py b/croppa/main.py index 7b28338..42cb87b 100644 --- a/croppa/main.py +++ b/croppa/main.py @@ -601,6 +601,10 @@ class VideoEditor: self.cached_frame_number = None self.cached_transform_hash = None + # Motion tracking state + self.tracking_points = {} # {frame_number: [(x, y), ...]} in original frame coords + self.tracking_enabled = False + # Project view mode self.project_view_mode = False self.project_view = None @@ -643,7 +647,9 @@ class VideoEditor: 'display_offset': self.display_offset, 'playback_speed': getattr(self, 'playback_speed', 1.0), 'seek_multiplier': getattr(self, 'seek_multiplier', 1.0), - 'is_playing': getattr(self, 'is_playing', False) + 'is_playing': getattr(self, 'is_playing', False), + 'tracking_enabled': self.tracking_enabled, + 'tracking_points': {str(k): v for k, v in self.tracking_points.items()} } with open(state_file, 'w') as f: @@ -719,6 +725,12 @@ class VideoEditor: if 'is_playing' in state: self.is_playing = state['is_playing'] print(f"Loaded is_playing: {self.is_playing}") + if 'tracking_enabled' in state: + self.tracking_enabled = state['tracking_enabled'] + print(f"Loaded tracking_enabled: {self.tracking_enabled}") + if 'tracking_points' in state and isinstance(state['tracking_points'], dict): + self.tracking_points = {int(k): v for k, v in state['tracking_points'].items()} + print(f"Loaded tracking_points: {sum(len(v) for v in self.tracking_points.values())} points") # Validate cut markers against current video length if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames: @@ -1087,9 +1099,15 @@ class VideoEditor: # Apply brightness/contrast first (to original frame for best quality) processed_frame = self.apply_brightness_contrast(processed_frame) - # Apply crop + # Apply crop (with motion tracking follow if enabled) if self.crop_rect: x, y, w, h = self.crop_rect + if self.tracking_enabled: + interp = self._get_interpolated_tracking_position(getattr(self, 'current_frame', 0)) + if interp: + cx, cy = interp + x = int(round(cx - w / 2)) + y = int(round(cy - h / 2)) x, y, w, h = int(x), int(y), int(w), int(h) # Ensure crop is within frame bounds x = max(0, min(x, processed_frame.shape[1] - 1)) @@ -1129,6 +1147,135 @@ class VideoEditor: return processed_frame + # --- Motion tracking helpers --- + def _get_effective_crop_rect_for_frame(self, frame_number): + """Compute crop rect applied to a given frame, considering tracking follow.""" + if not self.crop_rect: + return (0, 0, self.frame_width, self.frame_height) + x, y, w, h = map(int, self.crop_rect) + if self.tracking_enabled: + pos = self._get_interpolated_tracking_position(frame_number) + if pos: + cx, cy = pos + x = int(round(cx - w / 2)) + y = int(round(cy - h / 2)) + # Clamp to frame bounds + x = max(0, min(x, self.frame_width - 1)) + y = max(0, min(y, self.frame_height - 1)) + w = min(w, self.frame_width - x) + h = min(h, self.frame_height - y) + return (x, y, w, h) + + def _get_interpolated_tracking_position(self, frame_number): + """Linear interpolation between keyed tracking points. + Returns (x, y) in original frame coords or None. + """ + if not self.tracking_points: + return None + frames = sorted(self.tracking_points.keys()) + if not frames: + return None + if frame_number in self.tracking_points and self.tracking_points[frame_number]: + pts = self.tracking_points[frame_number] + return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts)) + if frame_number < frames[0]: + pts = self.tracking_points[frames[0]] + return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts)) if pts else None + if frame_number > frames[-1]: + pts = self.tracking_points[frames[-1]] + return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts)) if pts else None + for i in range(len(frames) - 1): + f1, f2 = frames[i], frames[i + 1] + if f1 <= frame_number <= f2: + pts1 = self.tracking_points.get(f1) or [] + pts2 = self.tracking_points.get(f2) or [] + if not pts1 or not pts2: + continue + x1 = sum(p[0] for p in pts1) / len(pts1) + y1 = sum(p[1] for p in pts1) / len(pts1) + x2 = sum(p[0] for p in pts2) / len(pts2) + y2 = sum(p[1] for p in pts2) / len(pts2) + t = (frame_number - f1) / (f2 - f1) if f2 != f1 else 0.0 + return (x1 + t * (x2 - x1), y1 + t * (y2 - y1)) + return None + + def _map_original_to_screen(self, ox, oy): + """Map a point in original frame coords to canvas screen coords.""" + cx, cy, cw, ch = self._get_effective_crop_rect_for_frame(getattr(self, 'current_frame', 0)) + px = ox - cx + py = oy - cy + angle = self.rotation_angle + if angle in (90, 270): + rotated_w, rotated_h = ch, cw + else: + rotated_w, rotated_h = cw, ch + if angle == 90: + rx, ry = py, rotated_w - px + elif angle == 180: + rx, ry = rotated_w - px, rotated_h - py + elif angle == 270: + rx, ry = rotated_h - py, px + else: + rx, ry = px, py + zx = rx * self.zoom_factor + zy = ry * self.zoom_factor + base_w, base_h = rotated_w, rotated_h + disp_w = int(base_w * self.zoom_factor) + disp_h = int(base_h * self.zoom_factor) + available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT) + scale = min(self.window_width / max(1, disp_w), available_height / max(1, disp_h)) + if scale < 1.0: + final_w = int(disp_w * scale) + final_h = int(disp_h * scale) + else: + final_w = disp_w + final_h = disp_h + scale = 1.0 + start_x = (self.window_width - final_w) // 2 + start_y = (available_height - final_h) // 2 + sx = int(round(start_x + zx * scale)) + sy = int(round(start_y + zy * scale)) + return sx, sy + + def _map_screen_to_original(self, sx, sy): + """Map a point on canvas screen coords back to original frame coords.""" + cx, cy, cw, ch = self._get_effective_crop_rect_for_frame(getattr(self, 'current_frame', 0)) + angle = self.rotation_angle + if angle in (90, 270): + rotated_w, rotated_h = ch, cw + else: + rotated_w, rotated_h = cw, ch + disp_w = int(rotated_w * self.zoom_factor) + disp_h = int(rotated_h * self.zoom_factor) + available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT) + scale = min(self.window_width / max(1, disp_w), available_height / max(1, disp_h)) + if scale < 1.0: + final_w = int(disp_w * scale) + final_h = int(disp_h * scale) + else: + final_w = disp_w + final_h = disp_h + scale = 1.0 + start_x = (self.window_width - final_w) // 2 + start_y = (available_height - final_h) // 2 + zx = (sx - start_x) / max(1e-6, scale) + zy = (sy - start_y) / max(1e-6, scale) + rx = zx / max(1e-6, self.zoom_factor) + ry = zy / max(1e-6, self.zoom_factor) + if angle == 90: + px, py = rotated_w - ry, rx + elif angle == 180: + px, py = rotated_w - rx, rotated_h - ry + elif angle == 270: + px, py = ry, rotated_h - rx + else: + px, py = rx, ry + ox = px + cx + oy = py + cy + ox = max(0, min(int(round(ox)), self.frame_width - 1)) + oy = max(0, min(int(round(oy)), self.frame_height - 1)) + return ox, oy + def clear_transformation_cache(self): """Clear the cached transformation to force recalculation""" self.cached_transformed_frame = None @@ -1665,10 +1812,13 @@ class VideoEditor: seek_multiplier_text = ( f" | Seek: {self.seek_multiplier:.1f}x" if self.seek_multiplier != 1.0 else "" ) + motion_text = ( + f" | Motion: {self.tracking_enabled}" if self.tracking_enabled else "" + ) if self.is_image_mode: - info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}" + info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}{motion_text}" else: - info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text} | {'Playing' if self.is_playing else 'Paused'}" + info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_text} | {'Playing' if self.is_playing else 'Paused'}" cv2.putText( canvas, info_text, @@ -1754,6 +1904,19 @@ class VideoEditor: 1, ) + # Draw tracking overlays (points and interpolated cross) + pts = self.tracking_points.get(self.current_frame, []) if not self.is_image_mode else [] + for (ox, oy) in pts: + sx, sy = self._map_original_to_screen(ox, oy) + cv2.circle(canvas, (sx, sy), 6, (0, 255, 0), -1) + cv2.circle(canvas, (sx, sy), 6, (255, 255, 255), 1) + if self.tracking_enabled and not self.is_image_mode: + interp = self._get_interpolated_tracking_position(self.current_frame) + if interp: + sx, sy = self._map_original_to_screen(interp[0], interp[1]) + cv2.line(canvas, (sx - 10, sy), (sx + 10, sy), (255, 0, 0), 2) + cv2.line(canvas, (sx, sy - 10), (sx, sy + 10), (255, 0, 0), 2) + # Draw timeline self.draw_timeline(canvas) @@ -1812,6 +1975,31 @@ class VideoEditor: if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN: self.zoom_center = (x, y) + # Handle right-click for tracking points (no modifiers) + if event == cv2.EVENT_RBUTTONDOWN and not (flags & (cv2.EVENT_FLAG_CTRLKEY | cv2.EVENT_FLAG_SHIFTKEY)): + if not self.is_image_mode: + ox, oy = self._map_screen_to_original(x, y) + threshold = 50 + removed = False + if self.current_frame in self.tracking_points: + pts_screen = [] + for idx, (px, py) in enumerate(self.tracking_points[self.current_frame]): + sxp, syp = self._map_original_to_screen(px, py) + pts_screen.append((idx, sxp, syp)) + for idx, sxp, syp in pts_screen: + if (sxp - x) ** 2 + (syp - y) ** 2 <= threshold ** 2: + del self.tracking_points[self.current_frame][idx] + if not self.tracking_points[self.current_frame]: + del self.tracking_points[self.current_frame] + self.show_feedback_message("Tracking point removed") + removed = True + break + if not removed: + self.tracking_points.setdefault(self.current_frame, []).append((int(ox), int(oy))) + self.show_feedback_message("Tracking point added") + self.clear_transformation_cache() + self.save_state() + # Handle scroll wheel for zoom (Ctrl + scroll) if flags & cv2.EVENT_FLAG_CTRLKEY: if event == cv2.EVENT_MOUSEWHEEL: @@ -1832,119 +2020,34 @@ class VideoEditor: if self.current_display_frame is None: return - # Get the original frame dimensions - original_height, original_width = self.current_display_frame.shape[:2] - available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT) - - # Calculate how the original frame is displayed (after crop/zoom/rotation) - display_frame = self.apply_crop_zoom_and_rotation( - self.current_display_frame.copy() - ) - if display_frame is None: - return - - display_height, display_width = display_frame.shape[:2] - - # Calculate scale for the display frame - scale = min( - self.window_width / display_width, available_height / display_height - ) - if scale < 1.0: - final_display_width = int(display_width * scale) - final_display_height = int(display_height * scale) - else: - final_display_width = display_width - final_display_height = display_height - scale = 1.0 - - start_x = (self.window_width - final_display_width) // 2 - start_y = (available_height - final_display_height) // 2 - - # Convert screen coordinates to display frame coordinates - display_x = (x - start_x) / scale - display_y = (y - start_y) / scale - display_w = w / scale - display_h = h / scale - - # Clamp to display frame bounds - display_x = max(0, min(display_x, display_width)) - display_y = max(0, min(display_y, display_height)) - display_w = min(display_w, display_width - display_x) - display_h = min(display_h, display_height - display_y) - - # Now we need to convert from the display frame coordinates back to original frame coordinates - # The display frame is the result of: original -> crop -> rotation -> zoom - - # Step 1: Reverse zoom - if self.zoom_factor != 1.0: - display_x = display_x / self.zoom_factor - display_y = display_y / self.zoom_factor - display_w = display_w / self.zoom_factor - display_h = display_h / self.zoom_factor - - # Step 2: Reverse rotation - if self.rotation_angle != 0: - # Get the dimensions of the frame after crop but before rotation - if self.crop_rect: - crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3]) - else: - crop_w, crop_h = original_width, original_height - - # Apply inverse rotation to coordinates - # The key insight: we need to use the dimensions of the ROTATED frame for the coordinate transformation - # because the coordinates we have are in the rotated coordinate system - if self.rotation_angle == 90: - # 90° clockwise rotation: (x,y) -> (y, rotated_width-x-w) - # The rotated frame has dimensions: height x width (swapped) - rotated_w, rotated_h = crop_h, crop_w - new_x = display_y - new_y = rotated_w - display_x - display_w - new_w = display_h - new_h = display_w - elif self.rotation_angle == 180: - # 180° rotation: (x,y) -> (width-x-w, height-y-h) - new_x = crop_w - display_x - display_w - new_y = crop_h - display_y - display_h - new_w = display_w - new_h = display_h - elif self.rotation_angle == 270: - # 270° clockwise rotation: (x,y) -> (rotated_height-y-h, x) - # The rotated frame has dimensions: height x width (swapped) - rotated_w, rotated_h = crop_h, crop_w - new_x = rotated_h - display_y - display_h - new_y = display_x - new_w = display_h - new_h = display_w - else: - new_x, new_y, new_w, new_h = display_x, display_y, display_w, display_h - - display_x, display_y, display_w, display_h = new_x, new_y, new_w, new_h - - # Step 3: Convert from cropped frame coordinates to original frame coordinates - original_x = display_x - original_y = display_y - original_w = display_w - original_h = display_h - - # Add the crop offset to get back to original frame coordinates - if self.crop_rect: - crop_x, crop_y, crop_w, crop_h = self.crop_rect - original_x += crop_x - original_y += crop_y + # Map both corners from screen to original to form an axis-aligned crop + # All coordinates are in reference to the ORIGINAL frame + # User input arrives in processed display space → map back to original + x2 = x + w + y2 = y + h + ox1, oy1 = self._map_screen_to_original(x, y) + ox2, oy2 = self._map_screen_to_original(x2, y2) + left = min(ox1, ox2) + top = min(oy1, oy2) + right = max(ox1, ox2) + bottom = max(oy1, oy2) + original_x = left + original_y = top + original_w = max(10, right - left) + original_h = max(10, bottom - top) # Clamp to original frame bounds - original_x = max(0, min(original_x, original_width)) - original_y = max(0, min(original_y, original_height)) - original_w = min(original_w, original_width - original_x) - original_h = min(original_h, original_height - original_y) + original_x = max(0, min(original_x, self.frame_width - 1)) + original_y = max(0, min(original_y, self.frame_height - 1)) + original_w = min(original_w, self.frame_width - original_x) + original_h = min(original_h, self.frame_height - original_y) - if original_w > 10 and original_h > 10: # Minimum size check - # Save current crop for undo + if original_w > 10 and original_h > 10: if self.crop_rect: self.crop_history.append(self.crop_rect) self.crop_rect = (original_x, original_y, original_w, original_h) self.clear_transformation_cache() - self.save_state() # Save state when crop is set + self.save_state() def seek_to_timeline_position(self, mouse_x, bar_x_start, bar_width): """Seek to position based on mouse click on timeline""" @@ -2291,12 +2394,15 @@ class VideoEditor: return False - def _process_frame_for_render(self, frame, output_width: int, output_height: int): + def _process_frame_for_render(self, frame, output_width: int, output_height: int, frame_number: int = None): """Process a single frame for rendering (optimized for speed)""" try: # Apply crop (vectorized operation) if self.crop_rect: - x, y, w, h = map(int, self.crop_rect) + if frame_number is None: + x, y, w, h = map(int, self.crop_rect) + else: + x, y, w, h = map(int, self._get_effective_crop_rect_for_frame(frame_number)) # Clamp coordinates to frame bounds h_frame, w_frame = frame.shape[:2] @@ -2409,7 +2515,7 @@ class VideoEditor: if not ret: break - processed_frame = self._process_frame_for_render(frame, output_width, output_height) + processed_frame = self._process_frame_for_render(frame, output_width, output_height, start_frame + i) if processed_frame is not None: if i == 0: print(f"Processed frame dimensions: {processed_frame.shape[1]}x{processed_frame.shape[0]}") @@ -2500,6 +2606,11 @@ class VideoEditor: print(" U: Undo crop") print(" C: Clear crop") print() + print("Motion Tracking:") + print(" Right-click: Add/remove tracking point (at current frame)") + print(" v: Toggle motion tracking on/off") + print(" V: Clear all tracking points") + print() print("Other Controls:") print(" Ctrl+Scroll: Zoom in/out") print(" Shift+S: Save screenshot") @@ -2772,6 +2883,16 @@ class VideoEditor: else: print(f"DEBUG: File '{self.video_path.stem}' does not contain '_edited_'") print("Enter key only overwrites files with '_edited_' in the name. Use 'n' to create new files.") + elif key == ord("v"): + # Toggle motion tracking on/off + self.tracking_enabled = not self.tracking_enabled + self.show_feedback_message(f"Motion tracking {'ON' if self.tracking_enabled else 'OFF'}") + self.save_state() + elif key == ord("V"): + # Clear all tracking points + self.tracking_points = {} + self.show_feedback_message("Tracking points cleared") + self.save_state() elif key == ord("t"): # Marker looping only for videos if not self.is_image_mode: