Disable multi-scale template matching by default in VideoEditor and update state management

This commit changes the default setting for multi-scale template matching to false, streamlining the initial configuration. It also updates the state management to include the multi-scale setting, ensuring that the user's preference is preserved across sessions. Additionally, the display information has been enhanced to reflect the current template matching status, improving user feedback during video editing.
Enable multi-scale template matching in VideoEditor with toggle functionality
2025-09-26 14:59:30 +02:00 · 2025-09-26 14:56:06 +02:00 · 2025-09-26 14:53:16 +02:00
1 changed files with 105 additions and 23 deletions
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -880,7 +880,9 @@ class VideoEditor:
        # Template matching tracking
        self.template_matching_enabled = False
        self.tracking_template = None
-        self.template_region = None  # (x, y, w, h) in rotated frame coordinates
+        self.template_region = None
+        self.template_match_history = []  # Store recent match confidences for adaptive thresholding
+        self.multi_scale_template_matching = False  # Disable multi-scale by default  # (x, y, w, h) in rotated frame coordinates
        self.template_selection_start = None
        self.template_selection_rect = None

@@ -931,7 +933,8 @@ class VideoEditor:
                'tracking_points': {str(k): v for k, v in self.tracking_points.items()},
                'feature_tracker': self.feature_tracker.get_state_dict(),
                'template_matching_enabled': self.template_matching_enabled,
-                'template_region': self.template_region
+                'template_region': self.template_region,
+                'multi_scale_template_matching': self.multi_scale_template_matching
            }

            with open(state_file, 'w') as f:
@@ -1025,7 +1028,9 @@ class VideoEditor:
            if 'template_region' in state and state['template_region'] is not None:
                self.template_region = state['template_region']
                # Recreate template from region when needed
-                self.tracking_template = None  # Will be recreated on first use
+                self.tracking_template = None
+            if 'multi_scale_template_matching' in state:
+                self.multi_scale_template_matching = state['multi_scale_template_matching']  # Will be recreated on first use

            # Validate cut markers against current video length
            if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
@@ -2225,28 +2230,67 @@ class VideoEditor:
                return None
        
        try:
-            # Convert to grayscale
-            if len(frame.shape) == 3:
-                gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-            else:
-                gray_frame = frame
+            # Apply image preprocessing for better template matching
+            gray_frame, gray_template = self._improve_template_matching(frame, self.tracking_template)
+            
+            # Multi-scale template matching for better tracking (if enabled)
+            if self.multi_scale_template_matching:
+                scales = [0.8, 0.9, 1.0, 1.1, 1.2]  # Different scales to try
+                best_match = None
+                best_confidence = 0.0
                
-            if len(self.tracking_template.shape) == 3:
-                gray_template = cv2.cvtColor(self.tracking_template, cv2.COLOR_BGR2GRAY)
+                for scale in scales:
+                    # Resize template
+                    template_h, template_w = gray_template.shape
+                    new_w = int(template_w * scale)
+                    new_h = int(template_h * scale)
+                    
+                    if new_w <= 0 or new_h <= 0 or new_w > gray_frame.shape[1] or new_h > gray_frame.shape[0]:
+                        continue
+                        
+                    scaled_template = cv2.resize(gray_template, (new_w, new_h))
+                    
+                    # Perform template matching
+                    result = cv2.matchTemplate(gray_frame, scaled_template, cv2.TM_CCOEFF_NORMED)
+                    _, max_val, _, max_loc = cv2.minMaxLoc(result)
+                    
+                    # Check if this is the best match so far
+                    if max_val > best_confidence:
+                        best_confidence = max_val
+                        # Get center of template
+                        center_x = max_loc[0] + new_w // 2
+                        center_y = max_loc[1] + new_h // 2
+                        best_match = (center_x, center_y, max_val)
            else:
-                gray_template = self.tracking_template
+                # Single-scale template matching (faster)
+                result = cv2.matchTemplate(gray_frame, gray_template, cv2.TM_CCOEFF_NORMED)
+                _, max_val, _, max_loc = cv2.minMaxLoc(result)
+                
+                if max_val > 0.6:  # Higher threshold for single-scale
+                    template_h, template_w = gray_template.shape
+                    center_x = max_loc[0] + template_w // 2
+                    center_y = max_loc[1] + template_h // 2
+                    best_match = (center_x, center_y, max_val)
+                    best_confidence = max_val
+                else:
+                    best_match = None
+                    best_confidence = 0.0
            
-            # Template matching
-            result = cv2.matchTemplate(gray_frame, gray_template, cv2.TM_CCOEFF_NORMED)
-            _, max_val, _, max_loc = cv2.minMaxLoc(result)
+            # Adaptive thresholding based on recent match history
+            if len(self.template_match_history) > 0:
+                # Use average of recent matches as baseline
+                avg_confidence = sum(self.template_match_history[-10:]) / len(self.template_match_history[-10:])
+                threshold = max(0.3, avg_confidence * 0.8)  # 80% of recent average, minimum 0.3
+            else:
+                threshold = 0.5  # Default threshold
            
-            # Only accept matches above threshold
-            if max_val > 0.6:  # Adjust threshold as needed
-                # Get template center
-                template_h, template_w = gray_template.shape
-                center_x = max_loc[0] + template_w // 2
-                center_y = max_loc[1] + template_h // 2
-                return (center_x, center_y, max_val)
+            # Only accept matches above adaptive threshold
+            if best_confidence > threshold:
+                # Store confidence for adaptive thresholding
+                self.template_match_history.append(best_confidence)
+                if len(self.template_match_history) > 20:  # Keep only last 20 matches
+                    self.template_match_history.pop(0)
+                return best_match
            else:
                return None
                
@@ -2284,6 +2328,33 @@ class VideoEditor:
        except Exception as e:
            print(f"Error recreating template: {e}")
            return False
+
+    def _improve_template_matching(self, frame, template):
+        """Apply image preprocessing to improve template matching"""
+        try:
+            # Convert to grayscale if needed
+            if len(frame.shape) == 3:
+                gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            else:
+                gray_frame = frame
+                
+            if len(template.shape) == 3:
+                gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
+            else:
+                gray_template = template
+            
+            # Apply histogram equalization for better contrast
+            gray_frame = cv2.equalizeHist(gray_frame)
+            gray_template = cv2.equalizeHist(gray_template)
+            
+            # Apply Gaussian blur to reduce noise
+            gray_frame = cv2.GaussianBlur(gray_frame, (3, 3), 0)
+            gray_template = cv2.GaussianBlur(gray_template, (3, 3), 0)
+            
+            return gray_frame, gray_template
+        except Exception as e:
+            print(f"Error improving template matching: {e}")
+            return frame, template
    
    def _set_template_from_region(self, screen_rect):
        """Set template from selected region"""
@@ -2878,13 +2949,18 @@ class VideoEditor:
            feature_text = f" | Features: {feature_count} pts"
            if self.optical_flow_enabled:
                feature_text += " (OPTICAL FLOW)"
+        template_text = (
+            f" | Template: {self.template_matching_enabled}" if self.template_matching_enabled else ""
+        )
+        if self.template_matching_enabled and self.multi_scale_template_matching:
+            template_text += " (MULTI-SCALE)"
        autorepeat_text = (
            f" | Loop: ON" if self.looping_between_markers else ""
        )
        if self.is_image_mode:
-            info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}{motion_text}{feature_text}"
+            info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}{motion_text}{feature_text}{template_text}"
        else:
-            info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_text}{feature_text}{autorepeat_text} | {'Playing' if self.is_playing else 'Paused'}"
+            info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_text}{feature_text}{template_text}{autorepeat_text} | {'Playing' if self.is_playing else 'Paused'}"
        cv2.putText(
            canvas,
            info_text,
@@ -4075,6 +4151,7 @@ class VideoEditor:
            print("  H: Switch detector (SIFT/ORB)")
            print("  o: Toggle optical flow tracking")
            print("  m: Toggle template matching tracking")
+            print("  M: Toggle multi-scale template matching")
            print("  Shift+Right-click+drag: Extract features from selected region")
            print("  Ctrl+Right-click+drag: Delete features from selected region")
            print("  Ctrl+Left-click+drag: Set template region for tracking")
@@ -4419,6 +4496,11 @@ class VideoEditor:
                print(f"DEBUG: Template matching toggled to {self.template_matching_enabled}")
                self.show_feedback_message(f"Template matching {'ON' if self.template_matching_enabled else 'OFF'}")
                self.save_state()
+            elif key == ord("M"):  # Shift+M - Toggle multi-scale template matching
+                self.multi_scale_template_matching = not self.multi_scale_template_matching
+                print(f"DEBUG: Multi-scale template matching toggled to {self.multi_scale_template_matching}")
+                self.show_feedback_message(f"Multi-scale template matching {'ON' if self.multi_scale_template_matching else 'OFF'}")
+                self.save_state()
            elif key == ord("t"):
                # Marker looping only for videos
                if not self.is_image_mode: