Enhance feature extraction with coordinate mapping in VideoEditor

This commit modifies the feature extraction process to include a coordinate mapper, allowing for accurate mapping of features from transformed frames back to their original coordinates. It introduces new methods for handling coordinate transformations during cropping and rotation, ensuring that features are correctly stored and retrieved in the appropriate frame space. This improvement enhances the tracking accuracy and overall functionality of the VideoEditor.
2025-09-26 13:24:48 +02:00
parent c50234f5c1
commit d1b9e7c470
1 changed files with 60 additions and 12 deletions
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -69,7 +69,7 @@ class FeatureTracker:
        else:
            print(f"Invalid detector type: {detector_type}")
-    def extract_features(self, frame: np.ndarray, frame_number: int) -> bool:
+    def extract_features(self, frame: np.ndarray, frame_number: int, coord_mapper=None) -> bool:
        """Extract features from a frame and store them"""
        try:
            # Convert to grayscale if needed
@@ -84,11 +84,20 @@ class FeatureTracker:
            if keypoints is None or descriptors is None:
                return False
            # Map coordinates back to original frame space if mapper provided
            if coord_mapper:
                mapped_positions = []
                for kp in keypoints:
                    orig_x, orig_y = coord_mapper(kp.pt[0], kp.pt[1])
                    mapped_positions.append((int(orig_x), int(orig_y)))
            else:
                mapped_positions = [(int(kp.pt[0]), int(kp.pt[1])) for kp in keypoints]
            # Store features
            self.features[frame_number] = {
                'keypoints': keypoints,
                'descriptors': descriptors,
-                'positions': [(int(kp.pt[0]), int(kp.pt[1])) for kp in keypoints]
+                'positions': mapped_positions
            }
            print(f"Extracted {len(keypoints)} features from frame {frame_number}")
@@ -1238,7 +1247,8 @@ class VideoEditor:
                # Extract features from the transformed frame (what user sees after crop/zoom/rotation)
                display_frame = self.apply_crop_zoom_and_rotation(self.current_display_frame)
                if display_frame is not None:
-                    self.feature_tracker.extract_features(display_frame, self.current_frame)
+                    # Pass coordinate mapper to map features back to original frame space
                    self.feature_tracker.extract_features(display_frame, self.current_frame, self._map_transformed_to_original_coords)
    def jump_to_previous_marker(self):
        """Jump to the previous tracking marker (frame with tracking points)."""
@@ -1455,11 +1465,48 @@ class VideoEditor:
    def _map_transformed_to_original_coords(self, x, y):
        """Map coordinates from transformed frame back to original frame coordinates."""
-        # This is a simplified mapping - in practice, we'd need to reverse all transformations
+        # The transformed frame is the result of apply_crop_zoom_and_rotation
-        # For now, just return the coordinates as-is since the tracking system expects
+        # We need to reverse the transformations to get back to original frame coordinates
-        # coordinates in the original frame space, but we're extracting from transformed frame
+        
-        # This is a limitation that needs proper coordinate transformation
+        # First, reverse the crop transformation
-        return (x, y)
+        if self.crop_rect:
            crop_x, crop_y, crop_w, crop_h = self.crop_rect
            # Add crop offset back
            orig_x = x + crop_x
            orig_y = y + crop_y
        else:
            orig_x, orig_y = x, y
        # Then reverse the rotation
        if self.rotation_angle == 90:
            # 90° clockwise -> 270° counterclockwise
            orig_x, orig_y = orig_y, self.frame_width - orig_x
        elif self.rotation_angle == 180:
            # 180° -> flip both axes
            orig_x = self.frame_width - orig_x
            orig_y = self.frame_height - orig_y
        elif self.rotation_angle == 270:
            # 270° clockwise -> 90° counterclockwise
            orig_x, orig_y = self.frame_height - orig_y, orig_x
        return (int(orig_x), int(orig_y))
    def _map_original_to_rotated_coords(self, x, y):
        """Map coordinates from original frame to rotated frame coordinates."""
        # First apply rotation
        if self.rotation_angle == 90:
            # 90° clockwise
            rot_x, rot_y = self.frame_height - y, x
        elif self.rotation_angle == 180:
            # 180° -> flip both axes
            rot_x, rot_y = self.frame_width - x, self.frame_height - y
        elif self.rotation_angle == 270:
            # 270° clockwise
            rot_x, rot_y = y, self.frame_width - x
        else:
            rot_x, rot_y = x, y
        return (int(rot_x), int(rot_y))
    def _get_interpolated_tracking_position(self, frame_number):
        """Linear interpolation in ROTATED frame coords. Returns (rx, ry) or None."""
@@ -1467,9 +1514,9 @@ class VideoEditor:
        if self.feature_tracker.tracking_enabled:
            feature_pos = self.feature_tracker.get_tracking_position(frame_number)
            if feature_pos:
-                # Features are extracted from transformed frame, need to map back to original
+                # Features are stored in original frame coordinates, transform to rotated frame coordinates
-                orig_x, orig_y = self._map_transformed_to_original_coords(feature_pos[0], feature_pos[1])
+                rot_x, rot_y = self._map_original_to_rotated_coords(feature_pos[0], feature_pos[1])
-                return (orig_x, orig_y)
+                return (rot_x, rot_y)
        # Fall back to manual tracking points
        if not self.tracking_points:
@@ -3507,7 +3554,8 @@ class VideoEditor:
                    # Extract features from the transformed frame (what user sees after crop/zoom/rotation)
                    display_frame = self.apply_crop_zoom_and_rotation(self.current_display_frame)
                    if display_frame is not None:
-                        success = self.feature_tracker.extract_features(display_frame, self.current_frame)
+                        # Pass coordinate mapper to map features back to original frame space
                        success = self.feature_tracker.extract_features(display_frame, self.current_frame, self._map_transformed_to_original_coords)
                        if success:
                            count = self.feature_tracker.get_feature_count(self.current_frame)
                            self.show_feedback_message(f"Extracted {count} features from transformed frame")