diff --git a/croppa/main.py b/croppa/main.py
index 7b28338..7f7a649 100644
--- a/croppa/main.py
+++ b/croppa/main.py
@@ -4,7 +4,7 @@ import cv2
 import argparse
 import numpy as np
 from pathlib import Path
-from typing import List
+from typing import List, Tuple
 import time
 import re
 import json
@@ -12,6 +12,7 @@ import threading
 import queue
 import subprocess
 import ctypes
+from tracking import MotionTracker
 
 class Cv2BufferedCap:
     """Buffered wrapper around cv2.VideoCapture that handles frame loading, seeking, and caching correctly"""
@@ -596,6 +597,11 @@ class VideoEditor:
         self.display_needs_update = True
         self.last_display_state = None
         
+        # Motion tracking
+        self.motion_tracker = MotionTracker()
+        self.tracking_point_radius = 10  # Radius of tracking point circles
+        self.tracking_point_distance = 50  # Max distance to consider for removing points
+        
         # Cached transformations for performance
         self.cached_transformed_frame = None
         self.cached_frame_number = None
@@ -628,6 +634,9 @@ class VideoEditor:
             return False
 
         try:
+            # Get tracking data
+            tracking_data = self.motion_tracker.to_dict()
+            
             state = {
                 'timestamp': time.time(),
                 'current_frame': getattr(self, 'current_frame', 0),
@@ -643,7 +652,8 @@ class VideoEditor:
                 'display_offset': self.display_offset,
                 'playback_speed': getattr(self, 'playback_speed', 1.0),
                 'seek_multiplier': getattr(self, 'seek_multiplier', 1.0),
-                'is_playing': getattr(self, 'is_playing', False)
+                'is_playing': getattr(self, 'is_playing', False),
+                'motion_tracking': tracking_data  # Add tracking data
             }
 
             with open(state_file, 'w') as f:
@@ -719,6 +729,11 @@ class VideoEditor:
             if 'is_playing' in state:
                 self.is_playing = state['is_playing']
                 print(f"Loaded is_playing: {self.is_playing}")
+                
+            # Load motion tracking data if available
+            if 'motion_tracking' in state:
+                self.motion_tracker.from_dict(state['motion_tracking'])
+                print(f"Loaded motion tracking data: {len(self.motion_tracker.tracking_points)} keyframes")
 
             # Validate cut markers against current video length
             if self.cut_start_frame is not None and self.cut_start_frame >= self.total_frames:
@@ -1064,6 +1079,11 @@ class VideoEditor:
         if frame is None:
             return None
 
+        # Get tracking offset for crop following if motion tracking is enabled
+        tracking_offset = (0, 0)
+        if self.motion_tracker.tracking_enabled:
+            tracking_offset = self.motion_tracker.get_tracking_offset(self.current_frame)
+            
         # Create a hash of the transformation parameters for caching
         transform_hash = hash((
             self.crop_rect,
@@ -1071,7 +1091,9 @@ class VideoEditor:
             self.rotation_angle,
             self.brightness,
             self.contrast,
-            tuple(self.display_offset)
+            tuple(self.display_offset),
+            tracking_offset,  # Include tracking offset in hash
+            self.motion_tracker.tracking_enabled  # Include tracking state in hash
         ))
 
         # Check if we can use cached transformation during auto-repeat seeking
@@ -1090,6 +1112,13 @@ class VideoEditor:
         # Apply crop
         if self.crop_rect:
             x, y, w, h = self.crop_rect
+            
+            # Apply tracking offset to crop position if motion tracking is enabled
+            if self.motion_tracker.tracking_enabled:
+                tracking_offset = self.motion_tracker.get_tracking_offset(self.current_frame)
+                x += int(tracking_offset[0])
+                y += int(tracking_offset[1])
+                
             x, y, w, h = int(x), int(y), int(w), int(h)
             # Ensure crop is within frame bounds
             x = max(0, min(x, processed_frame.shape[1] - 1))
@@ -1134,6 +1163,109 @@ class VideoEditor:
         self.cached_transformed_frame = None
         self.cached_frame_number = None
         self.cached_transform_hash = None
+        
+    def transform_point(self, point: Tuple[float, float]) -> Tuple[float, float]:
+        """Transform a point from original frame coordinates to display coordinates
+        
+        This applies the same transformations that are applied to frames:
+        1. Crop
+        2. Rotation
+        3. Zoom
+        """
+        if point is None:
+            return None
+            
+        x, y = point
+        
+        # Step 1: Apply crop (adjust point relative to crop origin)
+        if self.crop_rect:
+            crop_x, crop_y, _, _ = self.crop_rect
+            x -= crop_x
+            y -= crop_y
+            
+        # Step 2: Apply rotation
+        if self.rotation_angle != 0:
+            # Get dimensions after crop
+            if self.crop_rect:
+                crop_w, crop_h = self.crop_rect[2], self.crop_rect[3]
+            else:
+                if self.current_display_frame is not None:
+                    crop_h, crop_w = self.current_display_frame.shape[:2]
+                else:
+                    return None
+                    
+            # Apply rotation to coordinates
+            if self.rotation_angle == 90:
+                # 90° clockwise: (x,y) -> (y, width-x)
+                new_x = y
+                new_y = crop_w - x
+                x, y = new_x, new_y
+            elif self.rotation_angle == 180:
+                # 180° rotation: (x,y) -> (width-x, height-y)
+                x = crop_w - x
+                y = crop_h - y
+            elif self.rotation_angle == 270:
+                # 270° clockwise: (x,y) -> (height-y, x)
+                new_x = crop_h - y
+                new_y = x
+                x, y = new_x, new_y
+                
+        # Step 3: Apply zoom
+        if self.zoom_factor != 1.0:
+            x *= self.zoom_factor
+            y *= self.zoom_factor
+            
+        return (x, y)
+        
+    def untransform_point(self, point: Tuple[float, float]) -> Tuple[float, float]:
+        """Transform a point from display coordinates back to original frame coordinates
+        
+        This reverses the transformations in the opposite order:
+        1. Reverse zoom
+        2. Reverse rotation
+        3. Reverse crop
+        """
+        if point is None or self.current_display_frame is None:
+            return None
+            
+        x, y = point
+        
+        # Step 1: Reverse zoom
+        if self.zoom_factor != 1.0:
+            x /= self.zoom_factor
+            y /= self.zoom_factor
+            
+        # Step 2: Reverse rotation
+        if self.rotation_angle != 0:
+            # Get dimensions after crop but before rotation
+            if self.crop_rect:
+                crop_w, crop_h = self.crop_rect[2], self.crop_rect[3]
+            else:
+                crop_h, crop_w = self.current_display_frame.shape[:2]
+                
+            # Apply inverse rotation to coordinates
+            if self.rotation_angle == 90:
+                # Reverse 90° clockwise: (x,y) -> (width-y, x)
+                new_x = crop_w - y
+                new_y = x
+                x, y = new_x, new_y
+            elif self.rotation_angle == 180:
+                # Reverse 180° rotation: (x,y) -> (width-x, height-y)
+                x = crop_w - x
+                y = crop_h - y
+            elif self.rotation_angle == 270:
+                # Reverse 270° clockwise: (x,y) -> (y, height-x)
+                new_x = y
+                new_y = crop_h - x
+                x, y = new_x, new_y
+                
+        # Step 3: Reverse crop (add crop offset)
+        if self.crop_rect:
+            crop_x, crop_y, _, _ = self.crop_rect
+            x += crop_x
+            y += crop_y
+            
+        return (x, y)
 
 
     def apply_rotation(self, frame):
@@ -1294,6 +1426,64 @@ class VideoEditor:
         # Keep project view open but switch focus to video editor
         # Don't destroy the project view window - just let the user switch between them
 
+    def draw_tracking_points(self, canvas, offset_x, offset_y, scale):
+        """Draw tracking points and computed tracking position on the canvas
+        
+        Args:
+            canvas: The canvas to draw on
+            offset_x: X offset of the frame on the canvas
+            offset_y: Y offset of the frame on the canvas
+            scale: Scale factor of the frame on the canvas
+        """
+        if self.current_frame is None:
+            return
+            
+        # Draw tracking points for the current frame (green circles with white border)
+        tracking_points = self.motion_tracker.get_tracking_points_for_frame(self.current_frame)
+        for point in tracking_points:
+            # Transform point from original frame coordinates to display coordinates
+            display_point = self.transform_point(point)
+            if display_point:
+                # Scale and offset the point to match the canvas
+                x = int(offset_x + display_point[0] * scale)
+                y = int(offset_y + display_point[1] * scale)
+                
+                # Draw white border
+                cv2.circle(canvas, (x, y), self.tracking_point_radius + 2, (255, 255, 255), 2)
+                # Draw green circle
+                cv2.circle(canvas, (x, y), self.tracking_point_radius, (0, 255, 0), -1)
+                
+        # Draw computed tracking position (blue cross) if tracking is enabled
+        if self.motion_tracker.tracking_enabled:
+            interpolated_pos = self.motion_tracker.get_interpolated_position(self.current_frame)
+            if interpolated_pos:
+                # Transform point from original frame coordinates to display coordinates
+                display_point = self.transform_point(interpolated_pos)
+                if display_point:
+                    # Scale and offset the point to match the canvas
+                    x = int(offset_x + display_point[0] * scale)
+                    y = int(offset_y + display_point[1] * scale)
+                    
+                    # Draw blue cross
+                    cross_size = 10
+                    cv2.line(canvas, (x - cross_size, y), (x + cross_size, y), (255, 0, 0), 2)
+                    cv2.line(canvas, (x, y - cross_size), (x, y + cross_size), (255, 0, 0), 2)
+                    
+        # Add tracking status to the info overlay if tracking is enabled or points exist
+        if self.motion_tracker.tracking_enabled or self.motion_tracker.has_tracking_points():
+            point_count = sum(len(points) for points in self.motion_tracker.tracking_points.values())
+            status_text = f"Motion: {'ON' if self.motion_tracker.tracking_enabled else 'OFF'} ({point_count} pts)"
+            
+            # Calculate position for the text (bottom right corner)
+            text_x = self.window_width - 250
+            text_y = self.window_height - (self.TIMELINE_HEIGHT if not self.is_image_mode else 30)
+            
+            # Draw text with shadow
+            cv2.putText(canvas, status_text, (text_x + 2, text_y + 2), 
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
+            cv2.putText(canvas, status_text, (text_x, text_y), 
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 1)
+    
     def draw_feedback_message(self, frame):
         """Draw feedback message on frame if visible"""
         if not self.feedback_message or not self.feedback_message_time:
@@ -1757,6 +1947,9 @@ class VideoEditor:
         # Draw timeline
         self.draw_timeline(canvas)
 
+        # Draw tracking points and tracking position
+        self.draw_tracking_points(canvas, start_x, start_y, scale)
+        
         # Draw progress bar (if visible)
         self.draw_progress_bar(canvas)
 
@@ -1808,6 +2001,32 @@ class VideoEditor:
                 self.crop_start_point = None
                 self.crop_preview_rect = None
 
+        # Handle tracking points (Right-click)
+        if event == cv2.EVENT_RBUTTONDOWN:
+            # Convert display coordinates to original frame coordinates
+            original_point = self.untransform_point((x, y))
+            
+            if original_point:
+                # Check if clicking on an existing tracking point to remove it
+                removed = self.motion_tracker.remove_tracking_point(
+                    self.current_frame, 
+                    original_point[0], 
+                    original_point[1], 
+                    self.tracking_point_distance
+                )
+                
+                if not removed:
+                    # If no point was removed, add a new tracking point
+                    self.motion_tracker.add_tracking_point(
+                        self.current_frame,
+                        original_point[0],
+                        original_point[1]
+                    )
+                    
+                # Save state when tracking points change
+                self.save_state()
+                self.display_needs_update = True
+                
         # Handle zoom center (Ctrl + click)
         if flags & cv2.EVENT_FLAG_CTRLKEY and event == cv2.EVENT_LBUTTONDOWN:
             self.zoom_center = (x, y)
@@ -2772,6 +2991,33 @@ class VideoEditor:
                 else:
                     print(f"DEBUG: File '{self.video_path.stem}' does not contain '_edited_'")
                     print("Enter key only overwrites files with '_edited_' in the name. Use 'n' to create new files.")
+            elif key == ord("v") or key == ord("V"):
+                # Motion tracking controls
+                if key == ord("v"):
+                    # Toggle motion tracking
+                    if self.motion_tracker.tracking_enabled:
+                        self.motion_tracker.stop_tracking()
+                        print("Motion tracking disabled")
+                    else:
+                        # Start tracking with current crop and zoom center
+                        base_zoom_center = self.zoom_center
+                        if not base_zoom_center and self.current_display_frame is not None:
+                            # Use frame center if no zoom center is set
+                            h, w = self.current_display_frame.shape[:2]
+                            base_zoom_center = (w // 2, h // 2)
+                            
+                        self.motion_tracker.start_tracking(
+                            self.crop_rect,
+                            base_zoom_center
+                        )
+                        print("Motion tracking enabled")
+                    self.save_state()
+                else:  # V - Clear all tracking points
+                    self.motion_tracker.clear_tracking_points()
+                    print("All tracking points cleared")
+                    self.save_state()
+                self.display_needs_update = True
+                    
             elif key == ord("t"):
                 # Marker looping only for videos
                 if not self.is_image_mode:
diff --git a/croppa/tracking.py b/croppa/tracking.py
new file mode 100644
index 0000000..ff3b6ac
--- /dev/null
+++ b/croppa/tracking.py
@@ -0,0 +1,157 @@
+from typing import List, Dict, Tuple, Optional
+
+
+class MotionTracker:
+    """Handles motion tracking for crop and pan operations"""
+    
+    def __init__(self):
+        self.tracking_points = {}  # {frame_number: [(x, y), ...]}
+        self.tracking_enabled = False
+        self.base_crop_rect = None  # Original crop rect when tracking started
+        self.base_zoom_center = None  # Original zoom center when tracking started
+        
+    def add_tracking_point(self, frame_number: int, x: int, y: int):
+        """Add a tracking point at the specified frame and coordinates"""
+        if frame_number not in self.tracking_points:
+            self.tracking_points[frame_number] = []
+        self.tracking_points[frame_number].append((x, y))
+        
+    def remove_tracking_point(self, frame_number: int, x: int, y: int, radius: int = 50):
+        """Remove a tracking point by frame and proximity to x,y"""
+        if frame_number not in self.tracking_points:
+            return False
+            
+        points = self.tracking_points[frame_number]
+        for i, (px, py) in enumerate(points):
+            # Calculate distance between points
+            distance = ((px - x) ** 2 + (py - y) ** 2) ** 0.5
+            if distance <= radius:
+                del points[i]
+                if not points:
+                    del self.tracking_points[frame_number]
+                return True
+                
+        return False
+                
+    def clear_tracking_points(self):
+        """Clear all tracking points"""
+        self.tracking_points.clear()
+        
+    def get_tracking_points_for_frame(self, frame_number: int) -> List[Tuple[int, int]]:
+        """Get all tracking points for a specific frame"""
+        return self.tracking_points.get(frame_number, [])
+        
+    def has_tracking_points(self) -> bool:
+        """Check if any tracking points exist"""
+        return bool(self.tracking_points)
+        
+    def get_interpolated_position(self, frame_number: int) -> Optional[Tuple[float, float]]:
+        """Get interpolated position for a frame based on tracking points"""
+        if not self.tracking_points:
+            return None
+            
+        # Get all frames with tracking points
+        frames = sorted(self.tracking_points.keys())
+        
+        if not frames:
+            return None
+            
+        # If we have a point at this exact frame, return it
+        if frame_number in self.tracking_points:
+            points = self.tracking_points[frame_number]
+            if points:
+                # Return average of all points at this frame
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+        
+        # If frame is before first tracking point
+        if frame_number < frames[0]:
+            points = self.tracking_points[frames[0]]
+            if points:
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+                
+        # If frame is after last tracking point
+        if frame_number > frames[-1]:
+            points = self.tracking_points[frames[-1]]
+            if points:
+                avg_x = sum(p[0] for p in points) / len(points)
+                avg_y = sum(p[1] for p in points) / len(points)
+                return (avg_x, avg_y)
+        
+        # Find the two frames to interpolate between
+        for i in range(len(frames) - 1):
+            if frames[i] <= frame_number <= frames[i + 1]:
+                frame1, frame2 = frames[i], frames[i + 1]
+                points1 = self.tracking_points[frame1]
+                points2 = self.tracking_points[frame2]
+                
+                if not points1 or not points2:
+                    continue
+                    
+                # Get average positions for each frame
+                avg_x1 = sum(p[0] for p in points1) / len(points1)
+                avg_y1 = sum(p[1] for p in points1) / len(points1)
+                avg_x2 = sum(p[0] for p in points2) / len(points2)
+                avg_y2 = sum(p[1] for p in points2) / len(points2)
+                
+                # Linear interpolation
+                t = (frame_number - frame1) / (frame2 - frame1)
+                interp_x = avg_x1 + t * (avg_x2 - avg_x1)
+                interp_y = avg_y1 + t * (avg_y2 - avg_y1)
+                
+                return (interp_x, interp_y)
+        
+        return None
+        
+    def get_tracking_offset(self, frame_number: int) -> Tuple[float, float]:
+        """Get the offset to center the crop on the tracked point"""
+        if not self.tracking_enabled or not self.base_zoom_center:
+            return (0.0, 0.0)
+            
+        current_pos = self.get_interpolated_position(frame_number)
+        if not current_pos:
+            return (0.0, 0.0)
+            
+        # Calculate offset to center the crop on the tracked point
+        # The offset should move the display so the tracked point stays centered
+        offset_x = current_pos[0] - self.base_zoom_center[0]
+        offset_y = current_pos[1] - self.base_zoom_center[1]
+        
+        return (offset_x, offset_y)
+        
+    def start_tracking(self, base_crop_rect: Tuple[int, int, int, int], base_zoom_center: Tuple[int, int]):
+        """Start motion tracking with base positions"""
+        self.tracking_enabled = True
+        self.base_crop_rect = base_crop_rect
+        self.base_zoom_center = base_zoom_center
+        
+    def stop_tracking(self):
+        """Stop motion tracking"""
+        self.tracking_enabled = False
+        self.base_crop_rect = None
+        self.base_zoom_center = None
+        
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for serialization"""
+        return {
+            'tracking_points': self.tracking_points,
+            'tracking_enabled': self.tracking_enabled,
+            'base_crop_rect': self.base_crop_rect,
+            'base_zoom_center': self.base_zoom_center
+        }
+        
+    def from_dict(self, data: Dict):
+        """Load from dictionary for deserialization"""
+        # Convert string keys back to integers for tracking_points
+        tracking_points_data = data.get('tracking_points', {})
+        self.tracking_points = {}
+        for frame_str, points in tracking_points_data.items():
+            frame_num = int(frame_str)  # Convert string key to integer
+            self.tracking_points[frame_num] = points
+        
+        self.tracking_enabled = data.get('tracking_enabled', False)
+        self.base_crop_rect = data.get('base_crop_rect', None)
+        self.base_zoom_center = data.get('base_zoom_center', None)
\ No newline at end of file