Files
youtube-downloader/downloader/vendor/github.com/kkdai/youtube/v2/transcript.go

215 lines
5.6 KiB
Go

package youtube
import (
"context"
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
)
var (
ErrTranscriptDisabled = errors.New("transcript is disabled on this video")
)
// TranscriptSegment is a single transcipt segment spanning a few milliseconds.
type TranscriptSegment struct {
// Text is the transcipt text.
Text string `json:"text"`
// StartMs is the start timestamp in ms.
StartMs int `json:"offset"`
// OffsetText e.g. '4:00'.
OffsetText string `json:"offsetText"`
// Duration the transcript segment spans in ms.
Duration int `json:"duration"`
}
func (tr TranscriptSegment) String() string {
return tr.OffsetText + " - " + strings.TrimSpace(tr.Text)
}
type VideoTranscript []TranscriptSegment
func (vt VideoTranscript) String() string {
var str string
for _, tr := range vt {
str += tr.String() + "\n"
}
return str
}
// GetTranscript fetches the video transcript if available.
//
// Not all videos have transcripts, only relatively new videos.
// If transcripts are disabled or not available, ErrTranscriptDisabled is returned.
func (c *Client) GetTranscript(video *Video, lang string) (VideoTranscript, error) {
return c.GetTranscriptCtx(context.Background(), video, lang)
}
// GetTranscriptCtx fetches the video transcript if available.
//
// Not all videos have transcripts, only relatively new videos.
// If transcripts are disabled or not available, ErrTranscriptDisabled is returned.
func (c *Client) GetTranscriptCtx(ctx context.Context, video *Video, lang string) (VideoTranscript, error) {
c.assureClient()
if video == nil || video.ID == "" {
return nil, fmt.Errorf("no video provided")
}
body, err := c.transcriptDataByInnertube(ctx, video.ID, lang)
if err != nil {
return nil, err
}
transcript, err := parseTranscript(body)
if err != nil {
return nil, err
}
return transcript, nil
}
func parseTranscript(body []byte) (VideoTranscript, error) {
var resp transcriptResp
if err := json.Unmarshal(body, &resp); err != nil {
return nil, err
}
if len(resp.Actions) > 0 {
// Android client response
if app := resp.Actions[0].AppSegment; app != nil {
return getSegments(app)
}
// Web client response
if web := resp.Actions[0].WebSegment; web != nil {
return nil, fmt.Errorf("not implemented")
}
}
return nil, ErrTranscriptDisabled
}
type segmenter interface {
ParseSegments() []TranscriptSegment
}
func getSegments(f segmenter) (VideoTranscript, error) {
if segments := f.ParseSegments(); len(segments) > 0 {
return segments, nil
}
return nil, ErrTranscriptDisabled
}
// transcriptResp is the JSON structure as returned by the transcript API.
type transcriptResp struct {
Actions []struct {
AppSegment *appData `json:"elementsCommand"`
WebSegment *webData `json:"updateEngagementPanelAction"`
} `json:"actions"`
}
type appData struct {
TEC struct {
Args struct {
ListArgs struct {
Ow struct {
InitialSeg []struct {
TranscriptSegment struct {
StartMs string `json:"startMs"`
EndMs string `json:"endMs"`
Text struct {
String struct {
// Content is the actual transctipt text
Content string `json:"content"`
} `json:"elementsAttributedString"`
} `json:"snippet"`
StartTimeText struct {
String struct {
// Content is the fomratted timestamp, e.g. '4:00'
Content string `json:"content"`
} `json:"elementsAttributedString"`
} `json:"startTimeText"`
} `json:"transcriptSegmentRenderer"`
} `json:"initialSegments"`
} `json:"overwrite"`
} `json:"transformTranscriptSegmentListArguments"`
} `json:"arguments"`
} `json:"transformEntityCommand"`
}
func (s *appData) ParseSegments() []TranscriptSegment {
rawSegments := s.TEC.Args.ListArgs.Ow.InitialSeg
segments := make([]TranscriptSegment, 0, len(rawSegments))
for _, segment := range rawSegments {
startMs, _ := strconv.Atoi(segment.TranscriptSegment.StartMs)
endMs, _ := strconv.Atoi(segment.TranscriptSegment.EndMs)
segments = append(segments, TranscriptSegment{
Text: segment.TranscriptSegment.Text.String.Content,
StartMs: startMs,
OffsetText: segment.TranscriptSegment.StartTimeText.String.Content,
Duration: endMs - startMs,
})
}
return segments
}
type webData struct {
Content struct {
TR struct {
Body struct {
TBR struct {
Cues []struct {
Transcript struct {
FormattedStartOffset struct {
SimpleText string `json:"simpleText"`
} `json:"formattedStartOffset"`
Cues []struct {
TranscriptCueRenderer struct {
Cue struct {
SimpleText string `json:"simpleText"`
} `json:"cue"`
StartOffsetMs string `json:"startOffsetMs"`
DurationMs string `json:"durationMs"`
} `json:"transcriptCueRenderer"`
} `json:"cues"`
} `json:"transcriptCueGroupRenderer"`
} `json:"cueGroups"`
} `json:"transcriptSearchPanelRenderer"`
} `json:"content"`
} `json:"transcriptRenderer"`
} `json:"content"`
}
func (s *webData) ParseSegments() []TranscriptSegment {
// TODO: doesn't actually work now, check json.
cues := s.Content.TR.Body.TBR.Cues
segments := make([]TranscriptSegment, 0, len(cues))
for _, s := range cues {
formatted := s.Transcript.FormattedStartOffset.SimpleText
segment := s.Transcript.Cues[0].TranscriptCueRenderer
start, _ := strconv.Atoi(segment.StartOffsetMs)
duration, _ := strconv.Atoi(segment.DurationMs)
segments = append(segments, TranscriptSegment{
Text: segment.Cue.SimpleText,
StartMs: start,
OffsetText: formatted,
Duration: duration,
})
}
return segments
}