Disable multi-scale template matching by default in VideoEditor and update state management

This commit changes the default setting for multi-scale template matching to false, streamlining the initial configuration. It also updates the state management to include the multi-scale setting, ensuring that the user's preference is preserved across sessions. Additionally, the display information has been enhanced to reflect the current template matching status, improving user feedback during video editing.
Enable multi-scale template matching in VideoEditor with toggle functionality
2025-09-26 14:59:30 +02:00 · 2025-09-26 14:56:06 +02:00 · 2025-09-26 14:53:16 +02:00
1 changed files with 105 additions and 23 deletions
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -880,7 +880,9 @@ class VideoEditor:
        # Template matching tracking
        self.template_matching_enabled = False
        self.tracking_template = None
-        self.template_region = None  # (x, y, w, h) in rotated frame coordinates
+        self.template_region = None
        self.template_match_history = []  # Store recent match confidences for adaptive thresholding
        self.multi_scale_template_matching = False  # Disable multi-scale by default  # (x, y, w, h) in rotated frame coordinates
        self.template_selection_start = None
        self.template_selection_rect = None
@@ -931,7 +933,8 @@ class VideoEditor:
                'tracking_points': {str(k): v for k, v in self.tracking_points.items()},
                'feature_tracker': self.feature_tracker.get_state_dict(),
                'template_matching_enabled': self.template_matching_enabled,
-                'template_region': self.template_region
+                'template_region': self.template_region,
                'multi_scale_template_matching': self.multi_scale_template_matching
            }
            with open(state_file, 'w') as f:
@@ -1025,7 +1028,9 @@ class VideoEditor:
            if 'template_region' in state and state['template_region'] is not None:
                self.template_region = state['template_region']
                # Recreate template from region when needed
-                self.tracking_template = None  # Will be recreated on first use
+                self.tracking_template = None
            if 'multi_scale_template_matching' in state:
                self.multi_scale_template_matching = state['multi_scale_template_matching']  # Will be recreated on first use
            # Validate cut markers against current video length
            if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
@@ -2225,28 +2230,67 @@ class VideoEditor:
                return None
        try:
-            # Convert to grayscale
+            # Apply image preprocessing for better template matching
-            if len(frame.shape) == 3:
+            gray_frame, gray_template = self._improve_template_matching(frame, self.tracking_template)
-                gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            
-            else:
+            # Multi-scale template matching for better tracking (if enabled)
-                gray_frame = frame
+            if self.multi_scale_template_matching:
                scales = [0.8, 0.9, 1.0, 1.1, 1.2]  # Different scales to try
                best_match = None
                best_confidence = 0.0
-            if len(self.tracking_template.shape) == 3:
+                for scale in scales:
-                gray_template = cv2.cvtColor(self.tracking_template, cv2.COLOR_BGR2GRAY)
+                    # Resize template
                    template_h, template_w = gray_template.shape
                    new_w = int(template_w * scale)
                    new_h = int(template_h * scale)
                    if new_w <= 0 or new_h <= 0 or new_w > gray_frame.shape[1] or new_h > gray_frame.shape[0]:
                        continue
                    scaled_template = cv2.resize(gray_template, (new_w, new_h))
                    # Perform template matching
                    result = cv2.matchTemplate(gray_frame, scaled_template, cv2.TM_CCOEFF_NORMED)
                    _, max_val, _, max_loc = cv2.minMaxLoc(result)
                    # Check if this is the best match so far
                    if max_val > best_confidence:
                        best_confidence = max_val
                        # Get center of template
                        center_x = max_loc[0] + new_w // 2
                        center_y = max_loc[1] + new_h // 2
                        best_match = (center_x, center_y, max_val)
            else:
-                gray_template = self.tracking_template
+                # Single-scale template matching (faster)
                result = cv2.matchTemplate(gray_frame, gray_template, cv2.TM_CCOEFF_NORMED)
                _, max_val, _, max_loc = cv2.minMaxLoc(result)
                if max_val > 0.6:  # Higher threshold for single-scale
                    template_h, template_w = gray_template.shape
                    center_x = max_loc[0] + template_w // 2
                    center_y = max_loc[1] + template_h // 2
                    best_match = (center_x, center_y, max_val)
                    best_confidence = max_val
                else:
                    best_match = None
                    best_confidence = 0.0
-            # Template matching
+            # Adaptive thresholding based on recent match history
-            result = cv2.matchTemplate(gray_frame, gray_template, cv2.TM_CCOEFF_NORMED)
+            if len(self.template_match_history) > 0:
-            _, max_val, _, max_loc = cv2.minMaxLoc(result)
+                # Use average of recent matches as baseline
                avg_confidence = sum(self.template_match_history[-10:]) / len(self.template_match_history[-10:])
                threshold = max(0.3, avg_confidence * 0.8)  # 80% of recent average, minimum 0.3
            else:
                threshold = 0.5  # Default threshold
-            # Only accept matches above threshold
+            # Only accept matches above adaptive threshold
-            if max_val > 0.6:  # Adjust threshold as needed
+            if best_confidence > threshold:
-                # Get template center
+                # Store confidence for adaptive thresholding
-                template_h, template_w = gray_template.shape
+                self.template_match_history.append(best_confidence)
-                center_x = max_loc[0] + template_w // 2
+                if len(self.template_match_history) > 20:  # Keep only last 20 matches
-                center_y = max_loc[1] + template_h // 2
+                    self.template_match_history.pop(0)
-                return (center_x, center_y, max_val)
+                return best_match
            else:
                return None
@@ -2284,6 +2328,33 @@ class VideoEditor:
        except Exception as e:
            print(f"Error recreating template: {e}")
            return False
    def _improve_template_matching(self, frame, template):
        """Apply image preprocessing to improve template matching"""
        try:
            # Convert to grayscale if needed
            if len(frame.shape) == 3:
                gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            else:
                gray_frame = frame
            if len(template.shape) == 3:
                gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
            else:
                gray_template = template
            # Apply histogram equalization for better contrast
            gray_frame = cv2.equalizeHist(gray_frame)
            gray_template = cv2.equalizeHist(gray_template)
            # Apply Gaussian blur to reduce noise
            gray_frame = cv2.GaussianBlur(gray_frame, (3, 3), 0)
            gray_template = cv2.GaussianBlur(gray_template, (3, 3), 0)
            return gray_frame, gray_template
        except Exception as e:
            print(f"Error improving template matching: {e}")
            return frame, template
    def _set_template_from_region(self, screen_rect):
        """Set template from selected region"""
@@ -2878,13 +2949,18 @@ class VideoEditor:
            feature_text = f" | Features: {feature_count} pts"
            if self.optical_flow_enabled:
                feature_text += " (OPTICAL FLOW)"
        template_text = (
            f" | Template: {self.template_matching_enabled}" if self.template_matching_enabled else ""
        )
        if self.template_matching_enabled and self.multi_scale_template_matching:
            template_text += " (MULTI-SCALE)"
        autorepeat_text = (
            f" | Loop: ON" if self.looping_between_markers else ""
        )
        if self.is_image_mode:
-            info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}{motion_text}{feature_text}"
+            info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}{motion_text}{feature_text}{template_text}"
        else:
-            info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_text}{feature_text}{autorepeat_text} | {'Playing' if self.is_playing else 'Paused'}"
+            info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_text}{feature_text}{template_text}{autorepeat_text} | {'Playing' if self.is_playing else 'Paused'}"
        cv2.putText(
            canvas,
            info_text,
@@ -4075,6 +4151,7 @@ class VideoEditor:
            print("  H: Switch detector (SIFT/ORB)")
            print("  o: Toggle optical flow tracking")
            print("  m: Toggle template matching tracking")
            print("  M: Toggle multi-scale template matching")
            print("  Shift+Right-click+drag: Extract features from selected region")
            print("  Ctrl+Right-click+drag: Delete features from selected region")
            print("  Ctrl+Left-click+drag: Set template region for tracking")
@@ -4419,6 +4496,11 @@ class VideoEditor:
                print(f"DEBUG: Template matching toggled to {self.template_matching_enabled}")
                self.show_feedback_message(f"Template matching {'ON' if self.template_matching_enabled else 'OFF'}")
                self.save_state()
            elif key == ord("M"):  # Shift+M - Toggle multi-scale template matching
                self.multi_scale_template_matching = not self.multi_scale_template_matching
                print(f"DEBUG: Multi-scale template matching toggled to {self.multi_scale_template_matching}")
                self.show_feedback_message(f"Multi-scale template matching {'ON' if self.multi_scale_template_matching else 'OFF'}")
                self.save_state()
            elif key == ord("t"):
                # Marker looping only for videos
                if not self.is_image_mode: