Files
youtube-downloader/downloader/vendor/github.com/kkdai/youtube/v2/client.go
2024-12-21 11:16:12 +01:00

633 lines
16 KiB
Go

package youtube
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"math/rand"
"net/http"
"net/url"
"strconv"
"sync/atomic"
)
const (
Size1Kb = 1024
Size1Mb = Size1Kb * 1024
Size10Mb = Size1Mb * 10
playerParams = "CgIQBg=="
)
var ErrNoFormat = errors.New("no video format provided")
// DefaultClient type to use. No reason to change but you could if you wanted to.
var DefaultClient = IOSClient
// Client offers methods to download video metadata and video streams.
type Client struct {
// HTTPClient can be used to set a custom HTTP client.
// If not set, http.DefaultClient will be used
HTTPClient *http.Client
// MaxRoutines to use when downloading a video.
MaxRoutines int
// ChunkSize to use when downloading videos in chunks. Default is Size10Mb.
ChunkSize int64
// playerCache caches the JavaScript code of a player response
playerCache playerCache
client *clientInfo
consentID string
}
func (c *Client) assureClient() {
if c.client == nil {
c.client = &DefaultClient
}
}
// GetVideo fetches video metadata
func (c *Client) GetVideo(url string) (*Video, error) {
return c.GetVideoContext(context.Background(), url)
}
// GetVideoContext fetches video metadata with a context
func (c *Client) GetVideoContext(ctx context.Context, url string) (*Video, error) {
id, err := ExtractVideoID(url)
if err != nil {
return nil, fmt.Errorf("extractVideoID failed: %w", err)
}
return c.videoFromID(ctx, id)
}
func (c *Client) videoFromID(ctx context.Context, id string) (*Video, error) {
c.assureClient()
body, err := c.videoDataByInnertube(ctx, id)
if err != nil {
return nil, err
}
v := Video{
ID: id,
}
// return early if all good
if err = v.parseVideoInfo(body); err == nil {
return &v, nil
}
// If the uploader has disabled embedding the video on other sites, parse video page
if errors.Is(err, ErrNotPlayableInEmbed) {
// additional parameters are required to access clips with sensitiv content
html, err := c.httpGetBodyBytes(ctx, "https://www.youtube.com/watch?v="+id+"&bpctr=9999999999&has_verified=1")
if err != nil {
return nil, err
}
return &v, v.parseVideoPage(html)
}
// If the uploader marked the video as inappropriate for some ages, use embed player
if errors.Is(err, ErrLoginRequired) {
c.client = &EmbeddedClient
bodyEmbed, errEmbed := c.videoDataByInnertube(ctx, id)
if errEmbed == nil {
errEmbed = v.parseVideoInfo(bodyEmbed)
}
if errEmbed == nil {
return &v, nil
}
// private video clearly not age-restricted and thus should be explicit
if errEmbed == ErrVideoPrivate {
return &v, errEmbed
}
// wrapping error so its clear whats happened
return &v, fmt.Errorf("can't bypass age restriction: %w", errEmbed)
}
// undefined error
return &v, err
}
type innertubeRequest struct {
VideoID string `json:"videoId,omitempty"`
BrowseID string `json:"browseId,omitempty"`
Continuation string `json:"continuation,omitempty"`
Context inntertubeContext `json:"context"`
PlaybackContext *playbackContext `json:"playbackContext,omitempty"`
ContentCheckOK bool `json:"contentCheckOk,omitempty"`
RacyCheckOk bool `json:"racyCheckOk,omitempty"`
Params string `json:"params"`
}
type playbackContext struct {
ContentPlaybackContext contentPlaybackContext `json:"contentPlaybackContext"`
}
type contentPlaybackContext struct {
// SignatureTimestamp string `json:"signatureTimestamp"`
HTML5Preference string `json:"html5Preference"`
}
type inntertubeContext struct {
Client innertubeClient `json:"client"`
}
type innertubeClient struct {
HL string `json:"hl"`
GL string `json:"gl"`
ClientName string `json:"clientName"`
ClientVersion string `json:"clientVersion"`
AndroidSDKVersion int `json:"androidSDKVersion,omitempty"`
UserAgent string `json:"userAgent,omitempty"`
TimeZone string `json:"timeZone"`
UTCOffset int `json:"utcOffsetMinutes"`
DeviceModel string `json:"deviceModel,omitempty"`
}
// client info for the innertube API
type clientInfo struct {
name string
key string
version string
userAgent string
androidVersion int
deviceModel string
}
var (
// WebClient, better to use Android client but go ahead.
WebClient = clientInfo{
name: "WEB",
version: "2.20220801.00.00",
key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}
// AndroidClient, download go brrrrrr.
AndroidClient = clientInfo{
name: "ANDROID",
version: "18.11.34",
key: "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w",
userAgent: "com.google.android.youtube/18.11.34 (Linux; U; Android 11) gzip",
androidVersion: 30,
}
// IOSClient Client based brrrr.
IOSClient = clientInfo{
name: "IOS",
version: "19.45.4",
key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
userAgent: "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)",
deviceModel: "iPhone16,2",
}
// EmbeddedClient, not really tested.
EmbeddedClient = clientInfo{
name: "WEB_EMBEDDED_PLAYER",
version: "1.19700101",
key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", // seems like same key works for both clients
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}
)
func (c *Client) videoDataByInnertube(ctx context.Context, id string) ([]byte, error) {
data := innertubeRequest{
VideoID: id,
Context: prepareInnertubeContext(*c.client),
ContentCheckOK: true,
RacyCheckOk: true,
// Params: playerParams,
PlaybackContext: &playbackContext{
ContentPlaybackContext: contentPlaybackContext{
// SignatureTimestamp: sts,
HTML5Preference: "HTML5_PREF_WANTS",
},
},
}
return c.httpPostBodyBytes(ctx, "https://www.youtube.com/youtubei/v1/player?key="+c.client.key, data)
}
func (c *Client) transcriptDataByInnertube(ctx context.Context, id string, lang string) ([]byte, error) {
data := innertubeRequest{
Context: prepareInnertubeContext(*c.client),
Params: transcriptVideoID(id, lang),
}
return c.httpPostBodyBytes(ctx, "https://www.youtube.com/youtubei/v1/get_transcript?key="+c.client.key, data)
}
func prepareInnertubeContext(clientInfo clientInfo) inntertubeContext {
return inntertubeContext{
Client: innertubeClient{
HL: "en",
GL: "US",
TimeZone: "UTC",
DeviceModel: clientInfo.deviceModel,
ClientName: clientInfo.name,
ClientVersion: clientInfo.version,
AndroidSDKVersion: clientInfo.androidVersion,
UserAgent: clientInfo.userAgent,
},
}
}
func prepareInnertubePlaylistData(ID string, continuation bool, clientInfo clientInfo) innertubeRequest {
context := prepareInnertubeContext(clientInfo)
if continuation {
return innertubeRequest{
Context: context,
Continuation: ID,
ContentCheckOK: true,
RacyCheckOk: true,
Params: playerParams,
}
}
return innertubeRequest{
Context: context,
BrowseID: "VL" + ID,
ContentCheckOK: true,
RacyCheckOk: true,
Params: playerParams,
}
}
// transcriptVideoID encodes the video ID to the param used to fetch transcripts.
func transcriptVideoID(videoID string, lang string) string {
langCode := encTranscriptLang(lang)
// This can be optionally appened to the Sprintf str, not sure what it means
// *3engagement-panel-searchable-transcript-search-panel\x30\x00\x38\x01\x40\x01
return base64Enc(fmt.Sprintf("\n\x0b%s\x12\x12%s\x18\x01", videoID, langCode))
}
func encTranscriptLang(languageCode string) string {
s := fmt.Sprintf("\n\x03asr\x12\x02%s\x1a\x00", languageCode)
s = base64PadEnc(s)
return url.QueryEscape(s)
}
// GetPlaylist fetches playlist metadata
func (c *Client) GetPlaylist(url string) (*Playlist, error) {
return c.GetPlaylistContext(context.Background(), url)
}
// GetPlaylistContext fetches playlist metadata, with a context, along with a list of Videos, and some basic information
// for these videos. Playlist entries cannot be downloaded, as they lack all the required metadata, but
// can be used to enumerate all IDs, Authors, Titles, etc.
func (c *Client) GetPlaylistContext(ctx context.Context, url string) (*Playlist, error) {
c.assureClient()
id, err := extractPlaylistID(url)
if err != nil {
return nil, fmt.Errorf("extractPlaylistID failed: %w", err)
}
data := prepareInnertubePlaylistData(id, false, *c.client)
body, err := c.httpPostBodyBytes(ctx, "https://www.youtube.com/youtubei/v1/browse?key="+c.client.key, data)
if err != nil {
return nil, err
}
p := &Playlist{ID: id}
return p, p.parsePlaylistInfo(ctx, c, body)
}
func (c *Client) VideoFromPlaylistEntry(entry *PlaylistEntry) (*Video, error) {
return c.videoFromID(context.Background(), entry.ID)
}
func (c *Client) VideoFromPlaylistEntryContext(ctx context.Context, entry *PlaylistEntry) (*Video, error) {
return c.videoFromID(ctx, entry.ID)
}
// GetStream returns the stream and the total size for a specific format
func (c *Client) GetStream(video *Video, format *Format) (io.ReadCloser, int64, error) {
return c.GetStreamContext(context.Background(), video, format)
}
// GetStreamContext returns the stream and the total size for a specific format with a context.
func (c *Client) GetStreamContext(ctx context.Context, video *Video, format *Format) (io.ReadCloser, int64, error) {
url, err := c.GetStreamURL(video, format)
if err != nil {
return nil, 0, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, 0, err
}
r, w := io.Pipe()
contentLength := format.ContentLength
if contentLength == 0 {
// some videos don't have length information
contentLength = c.downloadOnce(req, w, format)
} else {
// we have length information, let's download by chunks!
c.downloadChunked(ctx, req, w, format)
}
return r, contentLength, nil
}
func (c *Client) downloadOnce(req *http.Request, w *io.PipeWriter, _ *Format) int64 {
resp, err := c.httpDo(req)
if err != nil {
w.CloseWithError(err) //nolint:errcheck
return 0
}
go func() {
defer resp.Body.Close()
_, err := io.Copy(w, resp.Body)
if err == nil {
w.Close()
} else {
w.CloseWithError(err) //nolint:errcheck
}
}()
contentLength := resp.Header.Get("Content-Length")
length, _ := strconv.ParseInt(contentLength, 10, 64)
return length
}
func (c *Client) getChunkSize() int64 {
if c.ChunkSize > 0 {
return c.ChunkSize
}
return Size10Mb
}
func (c *Client) getMaxRoutines(limit int) int {
routines := 10
if c.MaxRoutines > 0 {
routines = c.MaxRoutines
}
if limit > 0 && routines > limit {
routines = limit
}
return routines
}
func (c *Client) downloadChunked(ctx context.Context, req *http.Request, w *io.PipeWriter, format *Format) {
chunks := getChunks(format.ContentLength, c.getChunkSize())
maxRoutines := c.getMaxRoutines(len(chunks))
cancelCtx, cancel := context.WithCancel(ctx)
abort := func(err error) {
w.CloseWithError(err)
cancel()
}
currentChunk := atomic.Uint32{}
for i := 0; i < maxRoutines; i++ {
go func() {
for {
chunkIndex := int(currentChunk.Add(1)) - 1
if chunkIndex >= len(chunks) {
// no more chunks
return
}
chunk := &chunks[chunkIndex]
err := c.downloadChunk(req.Clone(cancelCtx), chunk)
close(chunk.data)
if err != nil {
abort(err)
return
}
}
}()
}
go func() {
// copy chunks into the PipeWriter
for i := 0; i < len(chunks); i++ {
select {
case <-cancelCtx.Done():
abort(context.Canceled)
return
case data := <-chunks[i].data:
_, err := io.Copy(w, bytes.NewBuffer(data))
if err != nil {
abort(err)
}
}
}
// everything succeeded
w.Close()
}()
}
// GetStreamURL returns the url for a specific format
func (c *Client) GetStreamURL(video *Video, format *Format) (string, error) {
return c.GetStreamURLContext(context.Background(), video, format)
}
// GetStreamURLContext returns the url for a specific format with a context
func (c *Client) GetStreamURLContext(ctx context.Context, video *Video, format *Format) (string, error) {
if format == nil {
return "", ErrNoFormat
}
c.assureClient()
if format.URL != "" {
if c.client.androidVersion > 0 {
return format.URL, nil
}
return c.unThrottle(ctx, video.ID, format.URL)
}
// TODO: check rest of this function, is it redundant?
cipher := format.Cipher
if cipher == "" {
return "", ErrCipherNotFound
}
uri, err := c.decipherURL(ctx, video.ID, cipher)
if err != nil {
return "", err
}
return uri, err
}
// httpDo sends an HTTP request and returns an HTTP response.
func (c *Client) httpDo(req *http.Request) (*http.Response, error) {
client := c.HTTPClient
if client == nil {
client = http.DefaultClient
}
req.Header.Set("User-Agent", c.client.userAgent)
req.Header.Set("Origin", "https://youtube.com")
req.Header.Set("Sec-Fetch-Mode", "navigate")
if len(c.consentID) == 0 {
c.consentID = strconv.Itoa(rand.Intn(899) + 100) //nolint:gosec
}
req.AddCookie(&http.Cookie{
Name: "CONSENT",
Value: "YES+cb.20210328-17-p0.en+FX+" + c.consentID,
Path: "/",
Domain: ".youtube.com",
})
res, err := client.Do(req)
log := slog.With("method", req.Method, "url", req.URL)
if err == nil && res.StatusCode != http.StatusOK {
err = ErrUnexpectedStatusCode(res.StatusCode)
res.Body.Close()
res = nil
}
if err != nil {
log.Debug("HTTP request failed", "error", err)
} else {
log.Debug("HTTP request succeeded", "status", res.Status)
}
return res, err
}
// httpGet does a HTTP GET request, checks the response to be a 200 OK and returns it
func (c *Client) httpGet(ctx context.Context, url string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := c.httpDo(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, ErrUnexpectedStatusCode(resp.StatusCode)
}
return resp, nil
}
// httpGetBodyBytes reads the whole HTTP body and returns it
func (c *Client) httpGetBodyBytes(ctx context.Context, url string) ([]byte, error) {
resp, err := c.httpGet(ctx, url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}
// httpPost does a HTTP POST request with a body, checks the response to be a 200 OK and returns it
func (c *Client) httpPost(ctx context.Context, url string, body interface{}) (*http.Response, error) {
data, err := json.Marshal(body)
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(data))
if err != nil {
return nil, err
}
req.Header.Set("X-Youtube-Client-Name", "3")
req.Header.Set("X-Youtube-Client-Version", c.client.version)
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
resp, err := c.httpDo(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, ErrUnexpectedStatusCode(resp.StatusCode)
}
return resp, nil
}
// httpPostBodyBytes reads the whole HTTP body and returns it
func (c *Client) httpPostBodyBytes(ctx context.Context, url string, body interface{}) ([]byte, error) {
resp, err := c.httpPost(ctx, url, body)
if err != nil {
return nil, err
}
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}
// downloadChunk writes the response data into the data channel of the chunk.
// Downloading in multiple chunks is much faster:
// https://github.com/kkdai/youtube/pull/190
func (c *Client) downloadChunk(req *http.Request, chunk *chunk) error {
q := req.URL.Query()
q.Set("range", fmt.Sprintf("%d-%d", chunk.start, chunk.end))
req.URL.RawQuery = q.Encode()
resp, err := c.httpDo(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return ErrUnexpectedStatusCode(resp.StatusCode)
}
expected := int(chunk.end-chunk.start) + 1
data, err := io.ReadAll(resp.Body)
n := len(data)
if err != nil {
return err
}
if n != expected {
return fmt.Errorf("chunk at offset %d has invalid size: expected=%d actual=%d", chunk.start, expected, n)
}
chunk.data <- data
return nil
}