Enhance tracking point management in VideoEditor and MotionTracker with dual coordinate storage

This commit introduces a new TrackingPoint class to encapsulate both original and display coordinates for tracking points, improving the accuracy and consistency of point transformations. The VideoEditor class has been updated to utilize this new structure, allowing for better handling of tracking points during video editing. Additionally, logging has been enhanced to provide clearer insights into the addition and processing of tracking points, while redundant verification steps have been removed for efficiency. This change streamlines the tracking process and improves the overall user experience.
This commit is contained in:
2025-09-16 21:33:28 +02:00
parent 33a553c092
commit cd86cfc9f2
2 changed files with 73 additions and 51 deletions

View File

@@ -1,31 +1,55 @@
from typing import List, Dict, Tuple, Optional
from typing import List, Dict, Tuple, Optional, NamedTuple
class TrackingPoint(NamedTuple):
"""Represents a tracking point with both original and display coordinates"""
original: Tuple[float, float] # Original frame coordinates (x, y)
display: Optional[Tuple[float, float]] = None # Display coordinates after transformation (x, y)
def __str__(self):
if self.display:
return f"TrackingPoint(orig={self.original}, display={self.display})"
return f"TrackingPoint(orig={self.original})"
class MotionTracker:
"""Handles motion tracking for crop and pan operations"""
def __init__(self):
self.tracking_points = {} # {frame_number: [(x, y), ...]}
self.tracking_points = {} # {frame_number: [TrackingPoint, ...]}
self.tracking_enabled = False
self.base_crop_rect = None # Original crop rect when tracking started
self.base_zoom_center = None # Original zoom center when tracking started
def add_tracking_point(self, frame_number: int, x: int, y: int):
"""Add a tracking point at the specified frame and coordinates"""
def add_tracking_point(self, frame_number: int, x: float, y: float, display_coords: Optional[Tuple[float, float]] = None):
"""Add a tracking point at the specified frame and coordinates
Args:
frame_number: The frame number to add the point to
x: Original x coordinate
y: Original y coordinate
display_coords: Optional display coordinates after transformation
"""
if frame_number not in self.tracking_points:
self.tracking_points[frame_number] = []
self.tracking_points[frame_number].append((x, y))
# Store both original and display coordinates
point = TrackingPoint(original=(float(x), float(y)), display=display_coords)
print(f"Adding tracking point: {point}")
self.tracking_points[frame_number].append(point)
def remove_tracking_point(self, frame_number: int, x: int, y: int, radius: int = 50):
def remove_tracking_point(self, frame_number: int, x: float, y: float, radius: int = 50):
"""Remove a tracking point by frame and proximity to x,y"""
if frame_number not in self.tracking_points:
return False
points = self.tracking_points[frame_number]
for i, (px, py) in enumerate(points):
for i, point in enumerate(points):
px, py = point.original
# Calculate distance between points
distance = ((px - x) ** 2 + (py - y) ** 2) ** 0.5
if distance <= radius:
print(f"Removing tracking point: {point}")
del points[i]
if not points:
del self.tracking_points[frame_number]
@@ -37,7 +61,7 @@ class MotionTracker:
"""Clear all tracking points"""
self.tracking_points.clear()
def get_tracking_points_for_frame(self, frame_number: int) -> List[Tuple[int, int]]:
def get_tracking_points_for_frame(self, frame_number: int) -> List[TrackingPoint]:
"""Get all tracking points for a specific frame"""
return self.tracking_points.get(frame_number, [])
@@ -61,24 +85,24 @@ class MotionTracker:
points = self.tracking_points[frame_number]
if points:
# Return average of all points at this frame
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
avg_x = sum(p.original[0] for p in points) / len(points)
avg_y = sum(p.original[1] for p in points) / len(points)
return (avg_x, avg_y)
# If frame is before first tracking point
if frame_number < frames[0]:
points = self.tracking_points[frames[0]]
if points:
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
avg_x = sum(p.original[0] for p in points) / len(points)
avg_y = sum(p.original[1] for p in points) / len(points)
return (avg_x, avg_y)
# If frame is after last tracking point
if frame_number > frames[-1]:
points = self.tracking_points[frames[-1]]
if points:
avg_x = sum(p[0] for p in points) / len(points)
avg_y = sum(p[1] for p in points) / len(points)
avg_x = sum(p.original[0] for p in points) / len(points)
avg_y = sum(p.original[1] for p in points) / len(points)
return (avg_x, avg_y)
# Find the two frames to interpolate between
@@ -92,10 +116,10 @@ class MotionTracker:
continue
# Get average positions for each frame
avg_x1 = sum(p[0] for p in points1) / len(points1)
avg_y1 = sum(p[1] for p in points1) / len(points1)
avg_x2 = sum(p[0] for p in points2) / len(points2)
avg_y2 = sum(p[1] for p in points2) / len(points2)
avg_x1 = sum(p.original[0] for p in points1) / len(points1)
avg_y1 = sum(p.original[1] for p in points1) / len(points1)
avg_x2 = sum(p.original[0] for p in points2) / len(points2)
avg_y2 = sum(p.original[1] for p in points2) / len(points2)
# Linear interpolation
t = (frame_number - frame1) / (frame2 - frame1)
@@ -154,8 +178,14 @@ class MotionTracker:
def to_dict(self) -> Dict:
"""Convert to dictionary for serialization"""
# Convert TrackingPoint objects to tuples for serialization
serialized_points = {}
for frame_num, points in self.tracking_points.items():
# Store only the original coordinates for serialization
serialized_points[frame_num] = [p.original for p in points]
return {
'tracking_points': self.tracking_points,
'tracking_points': serialized_points,
'tracking_enabled': self.tracking_enabled,
'base_crop_rect': self.base_crop_rect,
'base_zoom_center': self.base_zoom_center
@@ -168,7 +198,8 @@ class MotionTracker:
self.tracking_points = {}
for frame_str, points in tracking_points_data.items():
frame_num = int(frame_str) # Convert string key to integer
self.tracking_points[frame_num] = points
# Convert tuples to TrackingPoint objects
self.tracking_points[frame_num] = [TrackingPoint(original=p) for p in points]
self.tracking_enabled = data.get('tracking_enabled', False)
self.base_crop_rect = data.get('base_crop_rect', None)