package youtube import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "log/slog" "math/rand" "net/http" "net/url" "strconv" "sync/atomic" ) const ( Size1Kb = 1024 Size1Mb = Size1Kb * 1024 Size10Mb = Size1Mb * 10 playerParams = "CgIQBg==" ) var ErrNoFormat = errors.New("no video format provided") // DefaultClient type to use. No reason to change but you could if you wanted to. var DefaultClient = IOSClient // Client offers methods to download video metadata and video streams. type Client struct { // HTTPClient can be used to set a custom HTTP client. // If not set, http.DefaultClient will be used HTTPClient *http.Client // MaxRoutines to use when downloading a video. MaxRoutines int // ChunkSize to use when downloading videos in chunks. Default is Size10Mb. ChunkSize int64 // playerCache caches the JavaScript code of a player response playerCache playerCache client *clientInfo consentID string } func (c *Client) assureClient() { if c.client == nil { c.client = &DefaultClient } } // GetVideo fetches video metadata func (c *Client) GetVideo(url string) (*Video, error) { return c.GetVideoContext(context.Background(), url) } // GetVideoContext fetches video metadata with a context func (c *Client) GetVideoContext(ctx context.Context, url string) (*Video, error) { id, err := ExtractVideoID(url) if err != nil { return nil, fmt.Errorf("extractVideoID failed: %w", err) } return c.videoFromID(ctx, id) } func (c *Client) videoFromID(ctx context.Context, id string) (*Video, error) { c.assureClient() body, err := c.videoDataByInnertube(ctx, id) if err != nil { return nil, err } v := Video{ ID: id, } // return early if all good if err = v.parseVideoInfo(body); err == nil { return &v, nil } // If the uploader has disabled embedding the video on other sites, parse video page if errors.Is(err, ErrNotPlayableInEmbed) { // additional parameters are required to access clips with sensitiv content html, err := c.httpGetBodyBytes(ctx, "https://www.youtube.com/watch?v="+id+"&bpctr=9999999999&has_verified=1") if err != nil { return nil, err } return &v, v.parseVideoPage(html) } // If the uploader marked the video as inappropriate for some ages, use embed player if errors.Is(err, ErrLoginRequired) { c.client = &EmbeddedClient bodyEmbed, errEmbed := c.videoDataByInnertube(ctx, id) if errEmbed == nil { errEmbed = v.parseVideoInfo(bodyEmbed) } if errEmbed == nil { return &v, nil } // private video clearly not age-restricted and thus should be explicit if errEmbed == ErrVideoPrivate { return &v, errEmbed } // wrapping error so its clear whats happened return &v, fmt.Errorf("can't bypass age restriction: %w", errEmbed) } // undefined error return &v, err } type innertubeRequest struct { VideoID string `json:"videoId,omitempty"` BrowseID string `json:"browseId,omitempty"` Continuation string `json:"continuation,omitempty"` Context inntertubeContext `json:"context"` PlaybackContext *playbackContext `json:"playbackContext,omitempty"` ContentCheckOK bool `json:"contentCheckOk,omitempty"` RacyCheckOk bool `json:"racyCheckOk,omitempty"` Params string `json:"params"` } type playbackContext struct { ContentPlaybackContext contentPlaybackContext `json:"contentPlaybackContext"` } type contentPlaybackContext struct { // SignatureTimestamp string `json:"signatureTimestamp"` HTML5Preference string `json:"html5Preference"` } type inntertubeContext struct { Client innertubeClient `json:"client"` } type innertubeClient struct { HL string `json:"hl"` GL string `json:"gl"` ClientName string `json:"clientName"` ClientVersion string `json:"clientVersion"` AndroidSDKVersion int `json:"androidSDKVersion,omitempty"` UserAgent string `json:"userAgent,omitempty"` TimeZone string `json:"timeZone"` UTCOffset int `json:"utcOffsetMinutes"` DeviceModel string `json:"deviceModel,omitempty"` } // client info for the innertube API type clientInfo struct { name string key string version string userAgent string androidVersion int deviceModel string } var ( // WebClient, better to use Android client but go ahead. WebClient = clientInfo{ name: "WEB", version: "2.20220801.00.00", key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", } // AndroidClient, download go brrrrrr. AndroidClient = clientInfo{ name: "ANDROID", version: "18.11.34", key: "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w", userAgent: "com.google.android.youtube/18.11.34 (Linux; U; Android 11) gzip", androidVersion: 30, } // IOSClient Client based brrrr. IOSClient = clientInfo{ name: "IOS", version: "19.45.4", key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", userAgent: "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)", deviceModel: "iPhone16,2", } // EmbeddedClient, not really tested. EmbeddedClient = clientInfo{ name: "WEB_EMBEDDED_PLAYER", version: "1.19700101", key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", // seems like same key works for both clients userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", } ) func (c *Client) videoDataByInnertube(ctx context.Context, id string) ([]byte, error) { data := innertubeRequest{ VideoID: id, Context: prepareInnertubeContext(*c.client), ContentCheckOK: true, RacyCheckOk: true, // Params: playerParams, PlaybackContext: &playbackContext{ ContentPlaybackContext: contentPlaybackContext{ // SignatureTimestamp: sts, HTML5Preference: "HTML5_PREF_WANTS", }, }, } return c.httpPostBodyBytes(ctx, "https://www.youtube.com/youtubei/v1/player?key="+c.client.key, data) } func (c *Client) transcriptDataByInnertube(ctx context.Context, id string, lang string) ([]byte, error) { data := innertubeRequest{ Context: prepareInnertubeContext(*c.client), Params: transcriptVideoID(id, lang), } return c.httpPostBodyBytes(ctx, "https://www.youtube.com/youtubei/v1/get_transcript?key="+c.client.key, data) } func prepareInnertubeContext(clientInfo clientInfo) inntertubeContext { return inntertubeContext{ Client: innertubeClient{ HL: "en", GL: "US", TimeZone: "UTC", DeviceModel: clientInfo.deviceModel, ClientName: clientInfo.name, ClientVersion: clientInfo.version, AndroidSDKVersion: clientInfo.androidVersion, UserAgent: clientInfo.userAgent, }, } } func prepareInnertubePlaylistData(ID string, continuation bool, clientInfo clientInfo) innertubeRequest { context := prepareInnertubeContext(clientInfo) if continuation { return innertubeRequest{ Context: context, Continuation: ID, ContentCheckOK: true, RacyCheckOk: true, Params: playerParams, } } return innertubeRequest{ Context: context, BrowseID: "VL" + ID, ContentCheckOK: true, RacyCheckOk: true, Params: playerParams, } } // transcriptVideoID encodes the video ID to the param used to fetch transcripts. func transcriptVideoID(videoID string, lang string) string { langCode := encTranscriptLang(lang) // This can be optionally appened to the Sprintf str, not sure what it means // *3engagement-panel-searchable-transcript-search-panel\x30\x00\x38\x01\x40\x01 return base64Enc(fmt.Sprintf("\n\x0b%s\x12\x12%s\x18\x01", videoID, langCode)) } func encTranscriptLang(languageCode string) string { s := fmt.Sprintf("\n\x03asr\x12\x02%s\x1a\x00", languageCode) s = base64PadEnc(s) return url.QueryEscape(s) } // GetPlaylist fetches playlist metadata func (c *Client) GetPlaylist(url string) (*Playlist, error) { return c.GetPlaylistContext(context.Background(), url) } // GetPlaylistContext fetches playlist metadata, with a context, along with a list of Videos, and some basic information // for these videos. Playlist entries cannot be downloaded, as they lack all the required metadata, but // can be used to enumerate all IDs, Authors, Titles, etc. func (c *Client) GetPlaylistContext(ctx context.Context, url string) (*Playlist, error) { c.assureClient() id, err := extractPlaylistID(url) if err != nil { return nil, fmt.Errorf("extractPlaylistID failed: %w", err) } data := prepareInnertubePlaylistData(id, false, *c.client) body, err := c.httpPostBodyBytes(ctx, "https://www.youtube.com/youtubei/v1/browse?key="+c.client.key, data) if err != nil { return nil, err } p := &Playlist{ID: id} return p, p.parsePlaylistInfo(ctx, c, body) } func (c *Client) VideoFromPlaylistEntry(entry *PlaylistEntry) (*Video, error) { return c.videoFromID(context.Background(), entry.ID) } func (c *Client) VideoFromPlaylistEntryContext(ctx context.Context, entry *PlaylistEntry) (*Video, error) { return c.videoFromID(ctx, entry.ID) } // GetStream returns the stream and the total size for a specific format func (c *Client) GetStream(video *Video, format *Format) (io.ReadCloser, int64, error) { return c.GetStreamContext(context.Background(), video, format) } // GetStreamContext returns the stream and the total size for a specific format with a context. func (c *Client) GetStreamContext(ctx context.Context, video *Video, format *Format) (io.ReadCloser, int64, error) { url, err := c.GetStreamURL(video, format) if err != nil { return nil, 0, err } req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, 0, err } r, w := io.Pipe() contentLength := format.ContentLength if contentLength == 0 { // some videos don't have length information contentLength = c.downloadOnce(req, w, format) } else { // we have length information, let's download by chunks! c.downloadChunked(ctx, req, w, format) } return r, contentLength, nil } func (c *Client) downloadOnce(req *http.Request, w *io.PipeWriter, _ *Format) int64 { resp, err := c.httpDo(req) if err != nil { w.CloseWithError(err) //nolint:errcheck return 0 } go func() { defer resp.Body.Close() _, err := io.Copy(w, resp.Body) if err == nil { w.Close() } else { w.CloseWithError(err) //nolint:errcheck } }() contentLength := resp.Header.Get("Content-Length") length, _ := strconv.ParseInt(contentLength, 10, 64) return length } func (c *Client) getChunkSize() int64 { if c.ChunkSize > 0 { return c.ChunkSize } return Size10Mb } func (c *Client) getMaxRoutines(limit int) int { routines := 10 if c.MaxRoutines > 0 { routines = c.MaxRoutines } if limit > 0 && routines > limit { routines = limit } return routines } func (c *Client) downloadChunked(ctx context.Context, req *http.Request, w *io.PipeWriter, format *Format) { chunks := getChunks(format.ContentLength, c.getChunkSize()) maxRoutines := c.getMaxRoutines(len(chunks)) cancelCtx, cancel := context.WithCancel(ctx) abort := func(err error) { w.CloseWithError(err) cancel() } currentChunk := atomic.Uint32{} for i := 0; i < maxRoutines; i++ { go func() { for { chunkIndex := int(currentChunk.Add(1)) - 1 if chunkIndex >= len(chunks) { // no more chunks return } chunk := &chunks[chunkIndex] err := c.downloadChunk(req.Clone(cancelCtx), chunk) close(chunk.data) if err != nil { abort(err) return } } }() } go func() { // copy chunks into the PipeWriter for i := 0; i < len(chunks); i++ { select { case <-cancelCtx.Done(): abort(context.Canceled) return case data := <-chunks[i].data: _, err := io.Copy(w, bytes.NewBuffer(data)) if err != nil { abort(err) } } } // everything succeeded w.Close() }() } // GetStreamURL returns the url for a specific format func (c *Client) GetStreamURL(video *Video, format *Format) (string, error) { return c.GetStreamURLContext(context.Background(), video, format) } // GetStreamURLContext returns the url for a specific format with a context func (c *Client) GetStreamURLContext(ctx context.Context, video *Video, format *Format) (string, error) { if format == nil { return "", ErrNoFormat } c.assureClient() if format.URL != "" { if c.client.androidVersion > 0 { return format.URL, nil } return c.unThrottle(ctx, video.ID, format.URL) } // TODO: check rest of this function, is it redundant? cipher := format.Cipher if cipher == "" { return "", ErrCipherNotFound } uri, err := c.decipherURL(ctx, video.ID, cipher) if err != nil { return "", err } return uri, err } // httpDo sends an HTTP request and returns an HTTP response. func (c *Client) httpDo(req *http.Request) (*http.Response, error) { client := c.HTTPClient if client == nil { client = http.DefaultClient } req.Header.Set("User-Agent", c.client.userAgent) req.Header.Set("Origin", "https://youtube.com") req.Header.Set("Sec-Fetch-Mode", "navigate") if len(c.consentID) == 0 { c.consentID = strconv.Itoa(rand.Intn(899) + 100) //nolint:gosec } req.AddCookie(&http.Cookie{ Name: "CONSENT", Value: "YES+cb.20210328-17-p0.en+FX+" + c.consentID, Path: "/", Domain: ".youtube.com", }) res, err := client.Do(req) log := slog.With("method", req.Method, "url", req.URL) if err == nil && res.StatusCode != http.StatusOK { err = ErrUnexpectedStatusCode(res.StatusCode) res.Body.Close() res = nil } if err != nil { log.Debug("HTTP request failed", "error", err) } else { log.Debug("HTTP request succeeded", "status", res.Status) } return res, err } // httpGet does a HTTP GET request, checks the response to be a 200 OK and returns it func (c *Client) httpGet(ctx context.Context, url string) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, err } resp, err := c.httpDo(req) if err != nil { return nil, err } if resp.StatusCode != http.StatusOK { resp.Body.Close() return nil, ErrUnexpectedStatusCode(resp.StatusCode) } return resp, nil } // httpGetBodyBytes reads the whole HTTP body and returns it func (c *Client) httpGetBodyBytes(ctx context.Context, url string) ([]byte, error) { resp, err := c.httpGet(ctx, url) if err != nil { return nil, err } defer resp.Body.Close() return io.ReadAll(resp.Body) } // httpPost does a HTTP POST request with a body, checks the response to be a 200 OK and returns it func (c *Client) httpPost(ctx context.Context, url string, body interface{}) (*http.Response, error) { data, err := json.Marshal(body) if err != nil { return nil, err } req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(data)) if err != nil { return nil, err } req.Header.Set("X-Youtube-Client-Name", "3") req.Header.Set("X-Youtube-Client-Version", c.client.version) req.Header.Set("Content-Type", "application/json") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") resp, err := c.httpDo(req) if err != nil { return nil, err } if resp.StatusCode != http.StatusOK { resp.Body.Close() return nil, ErrUnexpectedStatusCode(resp.StatusCode) } return resp, nil } // httpPostBodyBytes reads the whole HTTP body and returns it func (c *Client) httpPostBodyBytes(ctx context.Context, url string, body interface{}) ([]byte, error) { resp, err := c.httpPost(ctx, url, body) if err != nil { return nil, err } defer resp.Body.Close() return io.ReadAll(resp.Body) } // downloadChunk writes the response data into the data channel of the chunk. // Downloading in multiple chunks is much faster: // https://github.com/kkdai/youtube/pull/190 func (c *Client) downloadChunk(req *http.Request, chunk *chunk) error { q := req.URL.Query() q.Set("range", fmt.Sprintf("%d-%d", chunk.start, chunk.end)) req.URL.RawQuery = q.Encode() resp, err := c.httpDo(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return ErrUnexpectedStatusCode(resp.StatusCode) } expected := int(chunk.end-chunk.start) + 1 data, err := io.ReadAll(resp.Body) n := len(data) if err != nil { return err } if n != expected { return fmt.Errorf("chunk at offset %d has invalid size: expected=%d actual=%d", chunk.start, expected, n) } chunk.data <- data return nil }