Refactor and streamline template matching logic in VideoEditor
This commit removes unused methods and simplifies the template matching process by eliminating multi-scale matching in favor of a single-scale approach. The changes enhance performance and clarity in the tracking logic, ensuring a more efficient template matching experience. Debug messages have been updated to reflect the current state of the template matching process.
This commit is contained in:
114
croppa/main.py
114
croppa/main.py
@@ -4,7 +4,7 @@ import cv2
|
|||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Tuple, Dict, Any
|
from typing import List, Dict, Any
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
import threading
|
import threading
|
||||||
@@ -200,24 +200,6 @@ class FeatureTracker:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_tracking_position(self, frame_number: int) -> Optional[Tuple[float, float]]:
|
|
||||||
"""Get the average tracking position for a frame"""
|
|
||||||
if frame_number not in self.features:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not self.features[frame_number]['positions']:
|
|
||||||
return None
|
|
||||||
|
|
||||||
positions = self.features[frame_number]['positions']
|
|
||||||
|
|
||||||
if not positions:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Calculate average position
|
|
||||||
avg_x = sum(pos[0] for pos in positions) / len(positions)
|
|
||||||
avg_y = sum(pos[1] for pos in positions) / len(positions)
|
|
||||||
|
|
||||||
return (avg_x, avg_y)
|
|
||||||
|
|
||||||
|
|
||||||
def clear_features(self):
|
def clear_features(self):
|
||||||
@@ -2308,17 +2290,6 @@ class VideoEditor:
|
|||||||
|
|
||||||
return (interp_x, interp_y)
|
return (interp_x, interp_y)
|
||||||
|
|
||||||
def set_tracking_template(self, frame, region):
|
|
||||||
"""Set a template region for tracking (much better than optical flow)"""
|
|
||||||
try:
|
|
||||||
x, y, w, h = region
|
|
||||||
self.tracking_template = frame[y:y+h, x:x+w].copy()
|
|
||||||
self.template_region = region
|
|
||||||
print(f"DEBUG: Set tracking template with region {region}")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error setting tracking template: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def track_template(self, frame):
|
def track_template(self, frame):
|
||||||
"""Track the template in the current frame"""
|
"""Track the template in the current frame"""
|
||||||
@@ -2329,48 +2300,19 @@ class VideoEditor:
|
|||||||
# Apply image preprocessing for better template matching
|
# Apply image preprocessing for better template matching
|
||||||
gray_frame, gray_template = self._improve_template_matching(frame, self.tracking_template)
|
gray_frame, gray_template = self._improve_template_matching(frame, self.tracking_template)
|
||||||
|
|
||||||
# Multi-scale template matching for better tracking (if enabled)
|
# Single-scale template matching (faster)
|
||||||
if False: # Multi-scale template matching removed
|
result = cv2.matchTemplate(gray_frame, gray_template, cv2.TM_CCOEFF_NORMED)
|
||||||
scales = [0.8, 0.9, 1.0, 1.1, 1.2] # Different scales to try
|
_, max_val, _, max_loc = cv2.minMaxLoc(result)
|
||||||
|
|
||||||
|
if max_val > 0.6: # Higher threshold for single-scale
|
||||||
|
template_h, template_w = gray_template.shape
|
||||||
|
center_x = max_loc[0] + template_w // 2
|
||||||
|
center_y = max_loc[1] + template_h // 2
|
||||||
|
best_match = (center_x, center_y, max_val)
|
||||||
|
best_confidence = max_val
|
||||||
|
else:
|
||||||
best_match = None
|
best_match = None
|
||||||
best_confidence = 0.0
|
best_confidence = 0.0
|
||||||
|
|
||||||
for scale in scales:
|
|
||||||
# Resize template
|
|
||||||
template_h, template_w = gray_template.shape
|
|
||||||
new_w = int(template_w * scale)
|
|
||||||
new_h = int(template_h * scale)
|
|
||||||
|
|
||||||
if new_w <= 0 or new_h <= 0 or new_w > gray_frame.shape[1] or new_h > gray_frame.shape[0]:
|
|
||||||
continue
|
|
||||||
|
|
||||||
scaled_template = cv2.resize(gray_template, (new_w, new_h))
|
|
||||||
|
|
||||||
# Perform template matching
|
|
||||||
result = cv2.matchTemplate(gray_frame, scaled_template, cv2.TM_CCOEFF_NORMED)
|
|
||||||
_, max_val, _, max_loc = cv2.minMaxLoc(result)
|
|
||||||
|
|
||||||
# Check if this is the best match so far
|
|
||||||
if max_val > best_confidence:
|
|
||||||
best_confidence = max_val
|
|
||||||
# Get center of template
|
|
||||||
center_x = max_loc[0] + new_w // 2
|
|
||||||
center_y = max_loc[1] + new_h // 2
|
|
||||||
best_match = (center_x, center_y, max_val)
|
|
||||||
else:
|
|
||||||
# Single-scale template matching (faster)
|
|
||||||
result = cv2.matchTemplate(gray_frame, gray_template, cv2.TM_CCOEFF_NORMED)
|
|
||||||
_, max_val, _, max_loc = cv2.minMaxLoc(result)
|
|
||||||
|
|
||||||
if max_val > 0.6: # Higher threshold for single-scale
|
|
||||||
template_h, template_w = gray_template.shape
|
|
||||||
center_x = max_loc[0] + template_w // 2
|
|
||||||
center_y = max_loc[1] + template_h // 2
|
|
||||||
best_match = (center_x, center_y, max_val)
|
|
||||||
best_confidence = max_val
|
|
||||||
else:
|
|
||||||
best_match = None
|
|
||||||
best_confidence = 0.0
|
|
||||||
|
|
||||||
# Adaptive thresholding based on recent match history
|
# Adaptive thresholding based on recent match history
|
||||||
if len(self.template_match_history) > 0:
|
if len(self.template_match_history) > 0:
|
||||||
@@ -2394,36 +2336,6 @@ class VideoEditor:
|
|||||||
print(f"Error in template tracking: {e}")
|
print(f"Error in template tracking: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _recreate_template_from_region(self, frame):
|
|
||||||
"""Recreate template from saved region coordinates"""
|
|
||||||
try:
|
|
||||||
if self.template_region is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
x, y, w, h = self.template_region
|
|
||||||
print(f"DEBUG: Recreating template from region ({x}, {y}, {w}, {h})")
|
|
||||||
|
|
||||||
# Ensure region is within frame bounds
|
|
||||||
if (x >= 0 and y >= 0 and
|
|
||||||
x + w <= frame.shape[1] and
|
|
||||||
y + h <= frame.shape[0]):
|
|
||||||
|
|
||||||
# Extract template from frame
|
|
||||||
template = frame[y:y+h, x:x+w]
|
|
||||||
if template.size > 0:
|
|
||||||
self.tracking_template = template.copy()
|
|
||||||
print(f"DEBUG: Template recreated with size {template.shape}")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print("DEBUG: Template region too small")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
print("DEBUG: Template region outside frame bounds")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error recreating template: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _improve_template_matching(self, frame, template):
|
def _improve_template_matching(self, frame, template):
|
||||||
"""Apply image preprocessing to improve template matching"""
|
"""Apply image preprocessing to improve template matching"""
|
||||||
@@ -3253,8 +3165,6 @@ class VideoEditor:
|
|||||||
if self.template_matching_enabled:
|
if self.template_matching_enabled:
|
||||||
mode = "Full Frame" if self.template_matching_full_frame else "Cropped"
|
mode = "Full Frame" if self.template_matching_full_frame else "Cropped"
|
||||||
template_text = f" | Template: {mode}"
|
template_text = f" | Template: {mode}"
|
||||||
if False: # Multi-scale template matching removed
|
|
||||||
template_text += " (MULTI-SCALE)"
|
|
||||||
autorepeat_text = (
|
autorepeat_text = (
|
||||||
f" | Loop: ON" if self.looping_between_markers else ""
|
f" | Loop: ON" if self.looping_between_markers else ""
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user