Enhance feature extraction with coordinate mapping in VideoEditor

This commit modifies the feature extraction process to include a coordinate mapper, allowing for accurate mapping of features from transformed frames back to their original coordinates. It introduces new methods for handling coordinate transformations during cropping and rotation, ensuring that features are correctly stored and retrieved in the appropriate frame space. This improvement enhances the tracking accuracy and overall functionality of the VideoEditor.
2025-09-26 13:24:48 +02:00
parent c50234f5c1
commit d1b9e7c470
1 changed files with 60 additions and 12 deletions
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -69,7 +69,7 @@ class FeatureTracker:
        else:
            print(f"Invalid detector type: {detector_type}")
    
-    def extract_features(self, frame: np.ndarray, frame_number: int) -> bool:
+    def extract_features(self, frame: np.ndarray, frame_number: int, coord_mapper=None) -> bool:
        """Extract features from a frame and store them"""
        try:
            # Convert to grayscale if needed
@@ -84,11 +84,20 @@ class FeatureTracker:
            if keypoints is None or descriptors is None:
                return False
            
+            # Map coordinates back to original frame space if mapper provided
+            if coord_mapper:
+                mapped_positions = []
+                for kp in keypoints:
+                    orig_x, orig_y = coord_mapper(kp.pt[0], kp.pt[1])
+                    mapped_positions.append((int(orig_x), int(orig_y)))
+            else:
+                mapped_positions = [(int(kp.pt[0]), int(kp.pt[1])) for kp in keypoints]
+            
            # Store features
            self.features[frame_number] = {
                'keypoints': keypoints,
                'descriptors': descriptors,
-                'positions': [(int(kp.pt[0]), int(kp.pt[1])) for kp in keypoints]
+                'positions': mapped_positions
            }
            
            print(f"Extracted {len(keypoints)} features from frame {frame_number}")
@@ -1238,7 +1247,8 @@ class VideoEditor:
                # Extract features from the transformed frame (what user sees after crop/zoom/rotation)
                display_frame = self.apply_crop_zoom_and_rotation(self.current_display_frame)
                if display_frame is not None:
-                    self.feature_tracker.extract_features(display_frame, self.current_frame)
+                    # Pass coordinate mapper to map features back to original frame space
+                    self.feature_tracker.extract_features(display_frame, self.current_frame, self._map_transformed_to_original_coords)

    def jump_to_previous_marker(self):
        """Jump to the previous tracking marker (frame with tracking points)."""
@@ -1455,11 +1465,48 @@ class VideoEditor:

    def _map_transformed_to_original_coords(self, x, y):
        """Map coordinates from transformed frame back to original frame coordinates."""
-        # This is a simplified mapping - in practice, we'd need to reverse all transformations
-        # For now, just return the coordinates as-is since the tracking system expects
-        # coordinates in the original frame space, but we're extracting from transformed frame
-        # This is a limitation that needs proper coordinate transformation
-        return (x, y)
+        # The transformed frame is the result of apply_crop_zoom_and_rotation
+        # We need to reverse the transformations to get back to original frame coordinates
+        
+        # First, reverse the crop transformation
+        if self.crop_rect:
+            crop_x, crop_y, crop_w, crop_h = self.crop_rect
+            # Add crop offset back
+            orig_x = x + crop_x
+            orig_y = y + crop_y
+        else:
+            orig_x, orig_y = x, y
+        
+        # Then reverse the rotation
+        if self.rotation_angle == 90:
+            # 90° clockwise -> 270° counterclockwise
+            orig_x, orig_y = orig_y, self.frame_width - orig_x
+        elif self.rotation_angle == 180:
+            # 180° -> flip both axes
+            orig_x = self.frame_width - orig_x
+            orig_y = self.frame_height - orig_y
+        elif self.rotation_angle == 270:
+            # 270° clockwise -> 90° counterclockwise
+            orig_x, orig_y = self.frame_height - orig_y, orig_x
+        
+        return (int(orig_x), int(orig_y))
+    
+    def _map_original_to_rotated_coords(self, x, y):
+        """Map coordinates from original frame to rotated frame coordinates."""
+        # First apply rotation
+        if self.rotation_angle == 90:
+            # 90° clockwise
+            rot_x, rot_y = self.frame_height - y, x
+        elif self.rotation_angle == 180:
+            # 180° -> flip both axes
+            rot_x, rot_y = self.frame_width - x, self.frame_height - y
+        elif self.rotation_angle == 270:
+            # 270° clockwise
+            rot_x, rot_y = y, self.frame_width - x
+        else:
+            rot_x, rot_y = x, y
+        
+        return (int(rot_x), int(rot_y))
    
    def _get_interpolated_tracking_position(self, frame_number):
        """Linear interpolation in ROTATED frame coords. Returns (rx, ry) or None."""
@@ -1467,9 +1514,9 @@ class VideoEditor:
        if self.feature_tracker.tracking_enabled:
            feature_pos = self.feature_tracker.get_tracking_position(frame_number)
            if feature_pos:
-                # Features are extracted from transformed frame, need to map back to original
-                orig_x, orig_y = self._map_transformed_to_original_coords(feature_pos[0], feature_pos[1])
-                return (orig_x, orig_y)
+                # Features are stored in original frame coordinates, transform to rotated frame coordinates
+                rot_x, rot_y = self._map_original_to_rotated_coords(feature_pos[0], feature_pos[1])
+                return (rot_x, rot_y)
        
        # Fall back to manual tracking points
        if not self.tracking_points:
@@ -3507,7 +3554,8 @@ class VideoEditor:
                    # Extract features from the transformed frame (what user sees after crop/zoom/rotation)
                    display_frame = self.apply_crop_zoom_and_rotation(self.current_display_frame)
                    if display_frame is not None:
-                        success = self.feature_tracker.extract_features(display_frame, self.current_frame)
+                        # Pass coordinate mapper to map features back to original frame space
+                        success = self.feature_tracker.extract_features(display_frame, self.current_frame, self._map_transformed_to_original_coords)
                        if success:
                            count = self.feature_tracker.get_feature_count(self.current_frame)
                            self.show_feedback_message(f"Extracted {count} features from transformed frame")