diff --git a/croppa/main.py b/croppa/main.py
index 7b28338..42cb87b 100644
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -601,6 +601,10 @@ class VideoEditor:
         self.cached_frame_number = None
         self.cached_transform_hash = None
 
+        # Motion tracking state
+        self.tracking_points = {}  # {frame_number: [(x, y), ...]} in original frame coords
+        self.tracking_enabled = False
+
         # Project view mode
         self.project_view_mode = False
         self.project_view = None
@@ -643,7 +647,9 @@ class VideoEditor:
                 'display_offset': self.display_offset,
                 'playback_speed': getattr(self, 'playback_speed', 1.0),
                 'seek_multiplier': getattr(self, 'seek_multiplier', 1.0),
-                'is_playing': getattr(self, 'is_playing', False)
+                'is_playing': getattr(self, 'is_playing', False),
+                'tracking_enabled': self.tracking_enabled,
+                'tracking_points': {str(k): v for k, v in self.tracking_points.items()}
             }
 
             with open(state_file, 'w') as f:
@@ -719,6 +725,12 @@ class VideoEditor:
             if 'is_playing' in state:
                 self.is_playing = state['is_playing']
                 print(f"Loaded is_playing: {self.is_playing}")
+            if 'tracking_enabled' in state:
+                self.tracking_enabled = state['tracking_enabled']
+                print(f"Loaded tracking_enabled: {self.tracking_enabled}")
+            if 'tracking_points' in state and isinstance(state['tracking_points'], dict):
+                self.tracking_points = {int(k): v for k, v in state['tracking_points'].items()}
+                print(f"Loaded tracking_points: {sum(len(v) for v in self.tracking_points.values())} points")
 
             # Validate cut markers against current video length
             if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
@@ -1087,9 +1099,15 @@ class VideoEditor:
         # Apply brightness/contrast first (to original frame for best quality)
         processed_frame = self.apply_brightness_contrast(processed_frame)
 
-        # Apply crop
+        # Apply crop (with motion tracking follow if enabled)
         if self.crop_rect:
             x, y, w, h = self.crop_rect
+            if self.tracking_enabled:
+                interp = self._get_interpolated_tracking_position(getattr(self, 'current_frame', 0))
+                if interp:
+                    cx, cy = interp
+                    x = int(round(cx - w / 2))
+                    y = int(round(cy - h / 2))
             x, y, w, h = int(x), int(y), int(w), int(h)
             # Ensure crop is within frame bounds
             x = max(0, min(x, processed_frame.shape[1] - 1))
@@ -1129,6 +1147,135 @@ class VideoEditor:
 
         return processed_frame
 
+    # --- Motion tracking helpers ---
+    def _get_effective_crop_rect_for_frame(self, frame_number):
+        """Compute crop rect applied to a given frame, considering tracking follow."""
+        if not self.crop_rect:
+            return (0, 0, self.frame_width, self.frame_height)
+        x, y, w, h = map(int, self.crop_rect)
+        if self.tracking_enabled:
+            pos = self._get_interpolated_tracking_position(frame_number)
+            if pos:
+                cx, cy = pos
+                x = int(round(cx - w / 2))
+                y = int(round(cy - h / 2))
+        # Clamp to frame bounds
+        x = max(0, min(x, self.frame_width - 1))
+        y = max(0, min(y, self.frame_height - 1))
+        w = min(w, self.frame_width - x)
+        h = min(h, self.frame_height - y)
+        return (x, y, w, h)
+
+    def _get_interpolated_tracking_position(self, frame_number):
+        """Linear interpolation between keyed tracking points.
+        Returns (x, y) in original frame coords or None.
+        """
+        if not self.tracking_points:
+            return None
+        frames = sorted(self.tracking_points.keys())
+        if not frames:
+            return None
+        if frame_number in self.tracking_points and self.tracking_points[frame_number]:
+            pts = self.tracking_points[frame_number]
+            return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts))
+        if frame_number < frames[0]:
+            pts = self.tracking_points[frames[0]]
+            return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts)) if pts else None
+        if frame_number > frames[-1]:
+            pts = self.tracking_points[frames[-1]]
+            return (sum(p[0] for p in pts) / len(pts), sum(p[1] for p in pts) / len(pts)) if pts else None
+        for i in range(len(frames) - 1):
+            f1, f2 = frames[i], frames[i + 1]
+            if f1 <= frame_number <= f2:
+                pts1 = self.tracking_points.get(f1) or []
+                pts2 = self.tracking_points.get(f2) or []
+                if not pts1 or not pts2:
+                    continue
+                x1 = sum(p[0] for p in pts1) / len(pts1)
+                y1 = sum(p[1] for p in pts1) / len(pts1)
+                x2 = sum(p[0] for p in pts2) / len(pts2)
+                y2 = sum(p[1] for p in pts2) / len(pts2)
+                t = (frame_number - f1) / (f2 - f1) if f2 != f1 else 0.0
+                return (x1 + t * (x2 - x1), y1 + t * (y2 - y1))
+        return None
+
+    def _map_original_to_screen(self, ox, oy):
+        """Map a point in original frame coords to canvas screen coords."""
+        cx, cy, cw, ch = self._get_effective_crop_rect_for_frame(getattr(self, 'current_frame', 0))
+        px = ox - cx
+        py = oy - cy
+        angle = self.rotation_angle
+        if angle in (90, 270):
+            rotated_w, rotated_h = ch, cw
+        else:
+            rotated_w, rotated_h = cw, ch
+        if angle == 90:
+            rx, ry = py, rotated_w - px
+        elif angle == 180:
+            rx, ry = rotated_w - px, rotated_h - py
+        elif angle == 270:
+            rx, ry = rotated_h - py, px
+        else:
+            rx, ry = px, py
+        zx = rx * self.zoom_factor
+        zy = ry * self.zoom_factor
+        base_w, base_h = rotated_w, rotated_h
+        disp_w = int(base_w * self.zoom_factor)
+        disp_h = int(base_h * self.zoom_factor)
+        available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT)
+        scale = min(self.window_width / max(1, disp_w), available_height / max(1, disp_h))
+        if scale < 1.0:
+            final_w = int(disp_w * scale)
+            final_h = int(disp_h * scale)
+        else:
+            final_w = disp_w
+            final_h = disp_h
+            scale = 1.0
+        start_x = (self.window_width - final_w) // 2
+        start_y = (available_height - final_h) // 2
+        sx = int(round(start_x + zx * scale))
+        sy = int(round(start_y + zy * scale))
+        return sx, sy
+
+    def _map_screen_to_original(self, sx, sy):
+        """Map a point on canvas screen coords back to original frame coords."""
+        cx, cy, cw, ch = self._get_effective_crop_rect_for_frame(getattr(self, 'current_frame', 0))
+        angle = self.rotation_angle
+        if angle in (90, 270):
+            rotated_w, rotated_h = ch, cw
+        else:
+            rotated_w, rotated_h = cw, ch
+        disp_w = int(rotated_w * self.zoom_factor)
+        disp_h = int(rotated_h * self.zoom_factor)
+        available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT)
+        scale = min(self.window_width / max(1, disp_w), available_height / max(1, disp_h))
+        if scale < 1.0:
+            final_w = int(disp_w * scale)
+            final_h = int(disp_h * scale)
+        else:
+            final_w = disp_w
+            final_h = disp_h
+            scale = 1.0
+        start_x = (self.window_width - final_w) // 2
+        start_y = (available_height - final_h) // 2
+        zx = (sx - start_x) / max(1e-6, scale)
+        zy = (sy - start_y) / max(1e-6, scale)
+        rx = zx / max(1e-6, self.zoom_factor)
+        ry = zy / max(1e-6, self.zoom_factor)
+        if angle == 90:
+            px, py = rotated_w - ry, rx
+        elif angle == 180:
+            px, py = rotated_w - rx, rotated_h - ry
+        elif angle == 270:
+            px, py = ry, rotated_h - rx
+        else:
+            px, py = rx, ry
+        ox = px + cx
+        oy = py + cy
+        ox = max(0, min(int(round(ox)), self.frame_width - 1))
+        oy = max(0, min(int(round(oy)), self.frame_height - 1))
+        return ox, oy
+
     def clear_transformation_cache(self):
         """Clear the cached transformation to force recalculation"""
         self.cached_transformed_frame = None
@@ -1665,10 +1812,13 @@ class VideoEditor:
         seek_multiplier_text = (
             f" | Seek: {self.seek_multiplier:.1f}x" if self.seek_multiplier != 1.0 else ""
         )
+        motion_text = (
+            f" | Motion: {self.tracking_enabled}" if self.tracking_enabled else ""
+        )
         if self.is_image_mode:
-            info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}"
+            info_text = f"Image | Zoom: {self.zoom_factor:.1f}x{rotation_text}{brightness_text}{contrast_text}{motion_text}"
         else:
-            info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text} | {'Playing' if self.is_playing else 'Paused'}"
+            info_text = f"Frame: {self.current_frame}/{self.total_frames} | Speed: {self.playback_speed:.1f}x | Zoom: {self.zoom_factor:.1f}x{seek_multiplier_text}{rotation_text}{brightness_text}{contrast_text}{motion_text} | {'Playing' if self.is_playing else 'Paused'}"
         cv2.putText(
             canvas,
             info_text,
@@ -1754,6 +1904,19 @@ class VideoEditor:
                 1,
             )
 
+        # Draw tracking overlays (points and interpolated cross)
+        pts = self.tracking_points.get(self.current_frame, []) if not self.is_image_mode else []
+        for (ox, oy) in pts:
+            sx, sy = self._map_original_to_screen(ox, oy)
+            cv2.circle(canvas, (sx, sy), 6, (0, 255, 0), -1)
+            cv2.circle(canvas, (sx, sy), 6, (255, 255, 255), 1)
+        if self.tracking_enabled and not self.is_image_mode:
+            interp = self._get_interpolated_tracking_position(self.current_frame)
+            if interp:
+                sx, sy = self._map_original_to_screen(interp[0], interp[1])
+                cv2.line(canvas, (sx - 10, sy), (sx + 10, sy), (255, 0, 0), 2)
+                cv2.line(canvas, (sx, sy - 10), (sx, sy + 10), (255, 0, 0), 2)
+
         # Draw timeline
         self.draw_timeline(canvas)
 
@@ -1812,6 +1975,31 @@ class VideoEditor:
         if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN:
             self.zoom_center = (x, y)
 
+        # Handle right-click for tracking points (no modifiers)
+        if event == cv2.EVENT_RBUTTONDOWN and not (flags & (cv2.EVENT_FLAG_CTRLKEY | cv2.EVENT_FLAG_SHIFTKEY)):
+            if not self.is_image_mode:
+                ox, oy = self._map_screen_to_original(x, y)
+                threshold = 50
+                removed = False
+                if self.current_frame in self.tracking_points:
+                    pts_screen = []
+                    for idx, (px, py) in enumerate(self.tracking_points[self.current_frame]):
+                        sxp, syp = self._map_original_to_screen(px, py)
+                        pts_screen.append((idx, sxp, syp))
+                    for idx, sxp, syp in pts_screen:
+                        if (sxp - x) ** 2 + (syp - y) ** 2 <= threshold ** 2:
+                            del self.tracking_points[self.current_frame][idx]
+                            if not self.tracking_points[self.current_frame]:
+                                del self.tracking_points[self.current_frame]
+                            self.show_feedback_message("Tracking point removed")
+                            removed = True
+                            break
+                if not removed:
+                    self.tracking_points.setdefault(self.current_frame, []).append((int(ox), int(oy)))
+                    self.show_feedback_message("Tracking point added")
+                self.clear_transformation_cache()
+                self.save_state()
+
         # Handle scroll wheel for zoom (Ctrl + scroll)
         if flags & cv2.EVENT_FLAG_CTRLKEY:
             if event == cv2.EVENT_MOUSEWHEEL:
@@ -1832,119 +2020,34 @@ class VideoEditor:
         if self.current_display_frame is None:
             return
 
-        # Get the original frame dimensions
-        original_height, original_width = self.current_display_frame.shape[:2]
-        available_height = self.window_height - (0 if self.is_image_mode else self.TIMELINE_HEIGHT)
-
-        # Calculate how the original frame is displayed (after crop/zoom/rotation)
-        display_frame = self.apply_crop_zoom_and_rotation(
-            self.current_display_frame.copy()
-        )
-        if display_frame is None:
-            return
-
-        display_height, display_width = display_frame.shape[:2]
-
-        # Calculate scale for the display frame
-        scale = min(
-            self.window_width / display_width, available_height / display_height
-        )
-        if scale < 1.0:
-            final_display_width = int(display_width * scale)
-            final_display_height = int(display_height * scale)
-        else:
-            final_display_width = display_width
-            final_display_height = display_height
-            scale = 1.0
-
-        start_x = (self.window_width - final_display_width) // 2
-        start_y = (available_height - final_display_height) // 2
-
-        # Convert screen coordinates to display frame coordinates
-        display_x = (x - start_x) / scale
-        display_y = (y - start_y) / scale
-        display_w = w / scale
-        display_h = h / scale
-
-        # Clamp to display frame bounds
-        display_x = max(0, min(display_x, display_width))
-        display_y = max(0, min(display_y, display_height))
-        display_w = min(display_w, display_width - display_x)
-        display_h = min(display_h, display_height - display_y)
-
-        # Now we need to convert from the display frame coordinates back to original frame coordinates
-        # The display frame is the result of: original -> crop -> rotation -> zoom
-
-        # Step 1: Reverse zoom
-        if self.zoom_factor != 1.0:
-            display_x = display_x / self.zoom_factor
-            display_y = display_y / self.zoom_factor
-            display_w = display_w / self.zoom_factor
-            display_h = display_h / self.zoom_factor
-
-        # Step 2: Reverse rotation
-        if self.rotation_angle != 0:
-            # Get the dimensions of the frame after crop but before rotation
-            if self.crop_rect:
-                crop_w, crop_h = int(self.crop_rect[2]), int(self.crop_rect[3])
-            else:
-                crop_w, crop_h = original_width, original_height
-
-            # Apply inverse rotation to coordinates
-            # The key insight: we need to use the dimensions of the ROTATED frame for the coordinate transformation
-            # because the coordinates we have are in the rotated coordinate system
-            if self.rotation_angle == 90:
-                # 90° clockwise rotation: (x,y) -> (y, rotated_width-x-w)
-                # The rotated frame has dimensions: height x width (swapped)
-                rotated_w, rotated_h = crop_h, crop_w
-                new_x = display_y
-                new_y = rotated_w - display_x - display_w
-                new_w = display_h
-                new_h = display_w
-            elif self.rotation_angle == 180:
-                # 180° rotation: (x,y) -> (width-x-w, height-y-h)
-                new_x = crop_w - display_x - display_w
-                new_y = crop_h - display_y - display_h
-                new_w = display_w
-                new_h = display_h
-            elif self.rotation_angle == 270:
-                # 270° clockwise rotation: (x,y) -> (rotated_height-y-h, x)
-                # The rotated frame has dimensions: height x width (swapped)
-                rotated_w, rotated_h = crop_h, crop_w
-                new_x = rotated_h - display_y - display_h
-                new_y = display_x
-                new_w = display_h
-                new_h = display_w
-            else:
-                new_x, new_y, new_w, new_h = display_x, display_y, display_w, display_h
-
-            display_x, display_y, display_w, display_h = new_x, new_y, new_w, new_h
-
-        # Step 3: Convert from cropped frame coordinates to original frame coordinates
-        original_x = display_x
-        original_y = display_y
-        original_w = display_w
-        original_h = display_h
-
-        # Add the crop offset to get back to original frame coordinates
-        if self.crop_rect:
-            crop_x, crop_y, crop_w, crop_h = self.crop_rect
-            original_x += crop_x
-            original_y += crop_y
+        # Map both corners from screen to original to form an axis-aligned crop
+        # All coordinates are in reference to the ORIGINAL frame
+        # User input arrives in processed display space → map back to original
+        x2 = x + w
+        y2 = y + h
+        ox1, oy1 = self._map_screen_to_original(x, y)
+        ox2, oy2 = self._map_screen_to_original(x2, y2)
+        left = min(ox1, ox2)
+        top = min(oy1, oy2)
+        right = max(ox1, ox2)
+        bottom = max(oy1, oy2)
+        original_x = left
+        original_y = top
+        original_w = max(10, right - left)
+        original_h = max(10, bottom - top)
 
         # Clamp to original frame bounds
-        original_x = max(0, min(original_x, original_width))
-        original_y = max(0, min(original_y, original_height))
-        original_w = min(original_w, original_width - original_x)
-        original_h = min(original_h, original_height - original_y)
+        original_x = max(0, min(original_x, self.frame_width - 1))
+        original_y = max(0, min(original_y, self.frame_height - 1))
+        original_w = min(original_w, self.frame_width - original_x)
+        original_h = min(original_h, self.frame_height - original_y)
 
-        if original_w > 10 and original_h > 10:  # Minimum size check
-            # Save current crop for undo
+        if original_w > 10 and original_h > 10:
             if self.crop_rect:
                 self.crop_history.append(self.crop_rect)
             self.crop_rect = (original_x, original_y, original_w, original_h)
             self.clear_transformation_cache()
-            self.save_state()  # Save state when crop is set
+            self.save_state()
 
     def seek_to_timeline_position(self, mouse_x, bar_x_start, bar_width):
         """Seek to position based on mouse click on timeline"""
@@ -2291,12 +2394,15 @@ class VideoEditor:
             return False
 
 
-    def _process_frame_for_render(self, frame, output_width: int, output_height: int):
+    def _process_frame_for_render(self, frame, output_width: int, output_height: int, frame_number: int = None):
         """Process a single frame for rendering (optimized for speed)"""
         try:
             # Apply crop (vectorized operation)
             if self.crop_rect:
-                x, y, w, h = map(int, self.crop_rect)
+                if frame_number is None:
+                    x, y, w, h = map(int, self.crop_rect)
+                else:
+                    x, y, w, h = map(int, self._get_effective_crop_rect_for_frame(frame_number))
 
                 # Clamp coordinates to frame bounds
                 h_frame, w_frame = frame.shape[:2]
@@ -2409,7 +2515,7 @@ class VideoEditor:
                     if not ret:
                         break
 
-                    processed_frame = self._process_frame_for_render(frame, output_width, output_height)
+                    processed_frame = self._process_frame_for_render(frame, output_width, output_height, start_frame + i)
                     if processed_frame is not None:
                         if i == 0:
                             print(f"Processed frame dimensions: {processed_frame.shape[1]}x{processed_frame.shape[0]}")
@@ -2500,6 +2606,11 @@ class VideoEditor:
             print("  U: Undo crop")
             print("  C: Clear crop")
             print()
+            print("Motion Tracking:")
+            print("  Right-click: Add/remove tracking point (at current frame)")
+            print("  v: Toggle motion tracking on/off")
+            print("  V: Clear all tracking points")
+            print()
             print("Other Controls:")
             print("  Ctrl+Scroll: Zoom in/out")
             print("  Shift+S: Save screenshot")
@@ -2772,6 +2883,16 @@ class VideoEditor:
                 else:
                     print(f"DEBUG: File '{self.video_path.stem}' does not contain '_edited_'")
                     print("Enter key only overwrites files with '_edited_' in the name. Use 'n' to create new files.")
+            elif key == ord("v"):
+                # Toggle motion tracking on/off
+                self.tracking_enabled = not self.tracking_enabled
+                self.show_feedback_message(f"Motion tracking {'ON' if self.tracking_enabled else 'OFF'}")
+                self.save_state()
+            elif key == ord("V"):
+                # Clear all tracking points
+                self.tracking_points = {}
+                self.show_feedback_message("Tracking points cleared")
+                self.save_state()
             elif key == ord("t"):
                 # Marker looping only for videos
                 if not self.is_image_mode: