feat(main.py): introduce multi-segment video mode for enhanced analysis

This commit is contained in:
2025-08-20 11:44:00 +02:00
parent a951ad21a3
commit 9a8424feb3

327
main.py
View File

@@ -2,6 +2,7 @@ import os
import sys import sys
import glob import glob
import cv2 import cv2
import numpy as np
import argparse import argparse
import shutil import shutil
import time import time
@@ -48,6 +49,14 @@ class MediaGrader:
self.current_frame = 0 self.current_frame = 0
self.total_frames = 0 self.total_frames = 0
# Multi-segment mode state
self.multi_segment_mode = False
self.segment_count = 4 # Number of video segments (2x2 grid)
self.segment_overlap_percent = 10 # Percentage overlap between segments
self.segment_caps = [] # List of VideoCapture objects for each segment
self.segment_frames = [] # List of current frames for each segment
self.segment_positions = [] # List of frame positions for each segment
# Key repeat tracking with rate limiting # Key repeat tracking with rate limiting
self.last_seek_time = 0 self.last_seek_time = 0
self.current_seek_key = None self.current_seek_key = None
@@ -94,6 +103,28 @@ class MediaGrader:
# Jump history for H key (undo jump) # Jump history for H key (undo jump)
self.jump_history = {} # Dict[file_path: List[frame_positions]] for jump undo self.jump_history = {} # Dict[file_path: List[frame_positions]] for jump undo
# Undo functionality
self.undo_history = [] # List of (source_path, destination_path, original_index) tuples
# Watch tracking for "good look" feature
self.watched_regions = {} # Dict[file_path: List[Tuple[start_frame, end_frame]]]
self.current_watch_start = None # Frame where current viewing session started
self.last_frame_position = 0 # Track last known frame position
# Bisection navigation tracking
self.last_jump_position = {} # Dict[file_path: last_frame] for bisection reference
# Jump history for H key (undo jump)
self.jump_history = {} # Dict[file_path: List[frame_positions]] for jump undo
# Multi-segment mode configuration
MULTI_SEGMENT_MODE = False
SEGMENT_COUNT = 4 # Number of video segments (2x2 grid)
SEGMENT_OVERLAP_PERCENT = 10 # Percentage overlap between segments
# Seek modifiers for A/D keys
SHIFT_SEEK_MULTIPLIER = 5 # SHIFT + A/D multiplier
def find_media_files(self) -> List[Path]: def find_media_files(self) -> List[Path]:
"""Find all media files recursively in the directory""" """Find all media files recursively in the directory"""
media_files = [] media_files = []
@@ -457,8 +488,118 @@ class MediaGrader:
print(f"Undid jump: returned to frame {previous_position} ({percentage:.1f}% through video)") print(f"Undid jump: returned to frame {previous_position} ({percentage:.1f}% through video)")
return True return True
def toggle_multi_segment_mode(self):
"""Toggle between single and multi-segment video mode"""
if not self.is_video(self.media_files[self.current_index]):
print("Multi-segment mode only works with videos")
return False
self.multi_segment_mode = not self.multi_segment_mode
if self.multi_segment_mode:
print(f"Enabled multi-segment mode ({self.segment_count} segments)")
self.setup_segment_captures()
else:
print("Disabled multi-segment mode")
self.cleanup_segment_captures()
# Reload single video
self.load_media(self.media_files[self.current_index])
return True
def setup_segment_captures(self):
"""Setup multiple video captures for segment mode"""
if not self.is_video(self.media_files[self.current_index]):
return
# Clean up existing segment captures
self.cleanup_segment_captures()
current_file = self.media_files[self.current_index]
# Calculate segment positions
segment_duration = self.total_frames // self.segment_count
overlap_frames = int(segment_duration * self.segment_overlap_percent / 100)
self.segment_positions = []
for i in range(self.segment_count):
start_frame = max(0, i * segment_duration - overlap_frames)
self.segment_positions.append(start_frame)
# Create video captures for each segment
for i, start_frame in enumerate(self.segment_positions):
cap = cv2.VideoCapture(str(current_file))
if cap.isOpened():
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
self.segment_caps.append(cap)
# Load initial frame for each segment
ret, frame = cap.read()
if ret:
self.segment_frames.append(frame)
else:
self.segment_frames.append(None)
else:
self.segment_caps.append(None)
self.segment_frames.append(None)
def cleanup_segment_captures(self):
"""Clean up all segment video captures"""
for cap in self.segment_caps:
if cap:
cap.release()
self.segment_caps = []
self.segment_frames = []
self.segment_positions = []
def update_segment_frames(self):
"""Update frames for all segments during playback"""
if not self.multi_segment_mode or not self.segment_caps:
return
for i, cap in enumerate(self.segment_caps):
if cap and cap.isOpened():
ret, frame = cap.read()
if ret:
self.segment_frames[i] = frame
else:
# Loop back to segment start when reaching end
cap.set(cv2.CAP_PROP_POS_FRAMES, self.segment_positions[i])
ret, frame = cap.read()
if ret:
self.segment_frames[i] = frame
def seek_all_segments(self, frames_delta: int):
"""Seek all segments by the specified number of frames"""
if not self.multi_segment_mode or not self.segment_caps:
return
for i, cap in enumerate(self.segment_caps):
if cap and cap.isOpened():
current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
segment_start = self.segment_positions[i]
segment_duration = self.total_frames // self.segment_count
segment_end = min(self.total_frames - 1, segment_start + segment_duration)
target_frame = max(segment_start, min(current_frame + frames_delta, segment_end))
cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
# Load new frame
ret, frame = cap.read()
if ret:
self.segment_frames[i] = frame
# Reset position for next read
cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
def display_current_frame(self): def display_current_frame(self):
"""Display the current cached frame with overlays""" """Display the current cached frame with overlays"""
if self.multi_segment_mode:
self.display_multi_segment_frame()
else:
self.display_single_frame()
def display_single_frame(self):
"""Display single frame view"""
if self.current_display_frame is None: if self.current_display_frame is None:
return return
@@ -487,10 +628,103 @@ class MediaGrader:
cv2.imshow("Media Grader", frame) cv2.imshow("Media Grader", frame)
def display_multi_segment_frame(self):
"""Display multi-segment frame view"""
if not self.segment_frames or not any(frame is not None for frame in self.segment_frames):
return
# Calculate grid dimensions (2x2 for 4 segments)
grid_rows = int(self.segment_count ** 0.5)
grid_cols = int(self.segment_count / grid_rows)
# Get reference frame size
ref_frame = next((f for f in self.segment_frames if f is not None), None)
if ref_frame is None:
return
frame_height, frame_width = ref_frame.shape[:2]
# Calculate segment display size
segment_width = frame_width // grid_cols
segment_height = frame_height // grid_rows
# Create combined display frame
combined_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
# Place each segment in the grid
for i, segment_frame in enumerate(self.segment_frames):
if segment_frame is None:
continue
row = i // grid_cols
col = i % grid_cols
# Resize segment frame to fit grid cell
resized_segment = cv2.resize(segment_frame, (segment_width, segment_height))
# Calculate position in combined frame
y_start = row * segment_height
y_end = y_start + segment_height
x_start = col * segment_width
x_end = x_start + segment_width
# Place segment in combined frame
combined_frame[y_start:y_end, x_start:x_end] = resized_segment
# Add segment label
segment_position = int((self.segment_positions[i] / self.total_frames) * 100)
label_text = f"Seg {i+1}: {segment_position}%"
cv2.putText(
combined_frame,
label_text,
(x_start + 5, y_start + 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
2,
)
cv2.putText(
combined_frame,
label_text,
(x_start + 5, y_start + 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 0),
1,
)
# Draw grid borders
cv2.rectangle(combined_frame, (x_start, y_start), (x_end-1, y_end-1), (128, 128, 128), 1)
# Add overall info overlay
current_file = self.media_files[self.current_index]
info_text = f"MULTI-SEGMENT | Speed: {self.playback_speed:.1f}x | File: {self.current_index + 1}/{len(self.media_files)} | {'Playing' if self.is_playing else 'PAUSED'}"
cv2.putText(
combined_frame,
info_text,
(10, frame_height - 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(255, 255, 255),
2,
)
cv2.putText(
combined_frame,
info_text,
(10, frame_height - 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 0, 0),
1
)
cv2.imshow("Media Grader", combined_frame)
def draw_timeline(self, frame): def draw_timeline(self, frame):
"""Draw timeline at the bottom of the frame""" """Draw timeline at the bottom of the frame"""
# Only draw timeline for video files # Only draw timeline for video files in single mode
if not self.is_video(self.media_files[self.current_index]): if not self.is_video(self.media_files[self.current_index]) or self.multi_segment_mode:
return return
height, width = frame.shape[:2] height, width = frame.shape[:2]
@@ -528,7 +762,7 @@ class MediaGrader:
def mouse_callback(self, event, x, y, flags, param): def mouse_callback(self, event, x, y, flags, param):
"""Handle mouse events for timeline interaction""" """Handle mouse events for timeline interaction"""
if not self.timeline_rect or not self.is_video(self.media_files[self.current_index]): if not self.timeline_rect or not self.is_video(self.media_files[self.current_index]) or self.multi_segment_mode:
return return
bar_x_start, bar_y, bar_width, bar_height = self.timeline_rect bar_x_start, bar_y, bar_width, bar_height = self.timeline_rect
@@ -571,34 +805,43 @@ class MediaGrader:
): ):
return return
frames_to_skip = self.calculate_frames_to_skip() if self.multi_segment_mode:
# Update all segment frames
self.update_segment_frames()
return True
else:
frames_to_skip = self.calculate_frames_to_skip()
for _ in range(frames_to_skip + 1): for _ in range(frames_to_skip + 1):
ret, frame = self.current_cap.read() ret, frame = self.current_cap.read()
if not ret: if not ret:
return False return False
self.current_display_frame = frame self.current_display_frame = frame
self.current_frame = int(self.current_cap.get(cv2.CAP_PROP_POS_FRAMES)) self.current_frame = int(self.current_cap.get(cv2.CAP_PROP_POS_FRAMES))
# Update watch tracking # Update watch tracking
self.update_watch_tracking() self.update_watch_tracking()
return True return True
def seek_video(self, frames_delta: int): def seek_video(self, frames_delta: int):
"""Seek video by specified number of frames""" """Seek video by specified number of frames"""
if not self.current_cap or not self.is_video( if not self.is_video(self.media_files[self.current_index]):
self.media_files[self.current_index]
):
return return
target_frame = max( if self.multi_segment_mode:
0, min(self.current_frame + frames_delta, self.total_frames - 1) self.seek_all_segments(frames_delta)
) else:
if not self.current_cap:
return
target_frame = max(
0, min(self.current_frame + frames_delta, self.total_frames - 1)
)
self.current_cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame) self.current_cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
self.load_current_frame() self.load_current_frame()
def process_seek_key(self, key: int) -> bool: def process_seek_key(self, key: int) -> bool:
"""Process seeking keys with proper rate limiting""" """Process seeking keys with proper rate limiting"""
@@ -773,14 +1016,15 @@ class MediaGrader:
print(" Ctrl+A/D: Seek backward/forward (10x multiplier)") print(" Ctrl+A/D: Seek backward/forward (10x multiplier)")
print(" , / . : Frame-by-frame seek (fine control)") print(" , / . : Frame-by-frame seek (fine control)")
print(" W/S: Decrease/Increase playback speed") print(" W/S: Decrease/Increase playback speed")
print(" G: Toggle multi-segment mode (videos only)")
print(" 1-5: Grade and move file") print(" 1-5: Grade and move file")
print(" N: Next file") print(" N: Next file")
print(" P: Previous file") print(" P: Previous file")
print(" U: Undo last grading action") print(" U: Undo last grading action")
print(" L: Sample video at key points (videos only)") print(" L: Sample video at key points (videos only, disabled in multi-segment)")
print(" H: Undo last L jump (videos only)") print(" H: Undo last L jump (videos only, disabled in multi-segment)")
print(" J: Bisect backwards from current position (videos only)") print(" J: Bisect backwards from current position (videos only, disabled in multi-segment)")
print(" K: Bisect forwards toward next sample (videos only)") print(" K: Bisect forwards toward next sample (videos only, disabled in multi-segment)")
print(" Q/ESC: Quit") print(" Q/ESC: Quit")
cv2.namedWindow("Media Grader", cv2.WINDOW_NORMAL) cv2.namedWindow("Media Grader", cv2.WINDOW_NORMAL)
@@ -793,6 +1037,10 @@ class MediaGrader:
print(f"Could not load {current_file}") print(f"Could not load {current_file}")
self.current_index += 1 self.current_index += 1
continue continue
# Setup multi-segment mode if enabled and this is a video
if self.multi_segment_mode and self.is_video(current_file):
self.setup_segment_captures()
window_title = f"Media Grader - {current_file.name} ({self.current_index + 1}/{len(self.media_files)})" window_title = f"Media Grader - {current_file.name} ({self.current_index + 1}/{len(self.media_files)})"
cv2.setWindowTitle("Media Grader", window_title) cv2.setWindowTitle("Media Grader", window_title)
@@ -837,14 +1085,28 @@ class MediaGrader:
# File was restored, reload it # File was restored, reload it
break break
elif key == ord("l"): elif key == ord("l"):
# Jump to largest unwatched region if not self.multi_segment_mode:
self.jump_to_unwatched_region() # Jump to largest unwatched region
self.jump_to_unwatched_region()
else:
print("Navigation keys (H/J/K/L) disabled in multi-segment mode")
elif key == ord("j"): elif key == ord("j"):
self.bisect_backwards() if not self.multi_segment_mode:
self.bisect_backwards()
else:
print("Navigation keys (H/J/K/L) disabled in multi-segment mode")
elif key == ord("k"): elif key == ord("k"):
self.bisect_forwards() if not self.multi_segment_mode:
self.bisect_forwards()
else:
print("Navigation keys (H/J/K/L) disabled in multi-segment mode")
elif key == ord("h"): # Changed from "j" to "h" for undo jump elif key == ord("h"): # Changed from "j" to "h" for undo jump
self.undo_jump() if not self.multi_segment_mode:
self.undo_jump()
else:
print("Navigation keys (H/J/K/L) disabled in multi-segment mode")
elif key == ord("g"):
self.toggle_multi_segment_mode()
elif key in [ord("1"), ord("2"), ord("3"), ord("4"), ord("5")]: elif key in [ord("1"), ord("2"), ord("3"), ord("4"), ord("5")]:
grade = int(chr(key)) grade = int(chr(key))
if not self.grade_media(grade): if not self.grade_media(grade):
@@ -871,6 +1133,7 @@ class MediaGrader:
if self.current_cap: if self.current_cap:
self.current_cap.release() self.current_cap.release()
self.cleanup_segment_captures()
cv2.destroyAllWindows() cv2.destroyAllWindows()
print("Grading session complete!") print("Grading session complete!")