Enhance template matching in VideoEditor with multi-scale approach and adaptive thresholding

This commit introduces a multi-scale template matching technique to improve tracking accuracy by evaluating templates at various sizes. Additionally, it implements adaptive thresholding based on recent match confidences, allowing for more dynamic and responsive matching criteria. A new method for image preprocessing is also added to enhance contrast and reduce noise, further optimizing the template matching process.
2025-09-26 14:53:16 +02:00
parent 10284dad81
commit c52d9b9399
1 changed files with 72 additions and 20 deletions
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -880,7 +880,8 @@ class VideoEditor:
        # Template matching tracking
        self.template_matching_enabled = False
        self.tracking_template = None
-        self.template_region = None  # (x, y, w, h) in rotated frame coordinates
+        self.template_region = None
        self.template_match_history = []  # Store recent match confidences for adaptive thresholding  # (x, y, w, h) in rotated frame coordinates
        self.template_selection_start = None
        self.template_selection_rect = None
@@ -2225,28 +2226,52 @@ class VideoEditor:
                return None
        try:
-            # Convert to grayscale
+            # Apply image preprocessing for better template matching
-            if len(frame.shape) == 3:
+            gray_frame, gray_template = self._improve_template_matching(frame, self.tracking_template)
                gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            else:
                gray_frame = frame
-            if len(self.tracking_template.shape) == 3:
+            # Multi-scale template matching for better tracking
-                gray_template = cv2.cvtColor(self.tracking_template, cv2.COLOR_BGR2GRAY)
+            scales = [0.8, 0.9, 1.0, 1.1, 1.2]  # Different scales to try
-            else:
+            best_match = None
-                gray_template = self.tracking_template
+            best_confidence = 0.0
-            # Template matching
+            for scale in scales:
-            result = cv2.matchTemplate(gray_frame, gray_template, cv2.TM_CCOEFF_NORMED)
+                # Resize template
            _, max_val, _, max_loc = cv2.minMaxLoc(result)
            # Only accept matches above threshold
            if max_val > 0.6:  # Adjust threshold as needed
                # Get template center
                template_h, template_w = gray_template.shape
-                center_x = max_loc[0] + template_w // 2
+                new_w = int(template_w * scale)
-                center_y = max_loc[1] + template_h // 2
+                new_h = int(template_h * scale)
-                return (center_x, center_y, max_val)
+                
                if new_w <= 0 or new_h <= 0 or new_w > gray_frame.shape[1] or new_h > gray_frame.shape[0]:
                    continue
                scaled_template = cv2.resize(gray_template, (new_w, new_h))
                # Perform template matching
                result = cv2.matchTemplate(gray_frame, scaled_template, cv2.TM_CCOEFF_NORMED)
                _, max_val, _, max_loc = cv2.minMaxLoc(result)
                # Check if this is the best match so far
                if max_val > best_confidence:
                    best_confidence = max_val
                    # Get center of template
                    center_x = max_loc[0] + new_w // 2
                    center_y = max_loc[1] + new_h // 2
                    best_match = (center_x, center_y, max_val)
            # Adaptive thresholding based on recent match history
            if len(self.template_match_history) > 0:
                # Use average of recent matches as baseline
                avg_confidence = sum(self.template_match_history[-10:]) / len(self.template_match_history[-10:])
                threshold = max(0.3, avg_confidence * 0.8)  # 80% of recent average, minimum 0.3
            else:
                threshold = 0.5  # Default threshold
            # Only accept matches above adaptive threshold
            if best_confidence > threshold:
                # Store confidence for adaptive thresholding
                self.template_match_history.append(best_confidence)
                if len(self.template_match_history) > 20:  # Keep only last 20 matches
                    self.template_match_history.pop(0)
                return best_match
            else:
                return None
@@ -2285,6 +2310,33 @@ class VideoEditor:
            print(f"Error recreating template: {e}")
            return False
    def _improve_template_matching(self, frame, template):
        """Apply image preprocessing to improve template matching"""
        try:
            # Convert to grayscale if needed
            if len(frame.shape) == 3:
                gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            else:
                gray_frame = frame
            if len(template.shape) == 3:
                gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
            else:
                gray_template = template
            # Apply histogram equalization for better contrast
            gray_frame = cv2.equalizeHist(gray_frame)
            gray_template = cv2.equalizeHist(gray_template)
            # Apply Gaussian blur to reduce noise
            gray_frame = cv2.GaussianBlur(gray_frame, (3, 3), 0)
            gray_template = cv2.GaussianBlur(gray_template, (3, 3), 0)
            return gray_frame, gray_template
        except Exception as e:
            print(f"Error improving template matching: {e}")
            return frame, template
    def _set_template_from_region(self, screen_rect):
        """Set template from selected region"""
        x, y, w, h = screen_rect