FIX YouTube transcript errors

This commit is contained in:
Josh 2025-05-08 14:11:09 -07:00
parent 041be54471
commit 2d92e94608

View file

@ -153,24 +153,38 @@ class YouTubeConverter(DocumentConverter):
params = parse_qs(parsed_url.query) # type: ignore
if "v" in params and params["v"][0]:
video_id = str(params["v"][0])
transcript_list = ytt_api.list(video_id)
languages = ['en']
for transcript in transcript_list:
languages.append(transcript.language_code)
break
try:
youtube_transcript_languages = kwargs.get(
"youtube_transcript_languages", ("en",)
"youtube_transcript_languages", languages
)
# Retry the transcript fetching operation
transcript = self._retry_operation(
lambda: ytt_api.fetch(
video_id, languages=youtube_transcript_languages
video_id,languages = youtube_transcript_languages
),
retries=3, # Retry 3 times
delay=2, # 2 seconds delay between retries
)
if transcript:
transcript_text = " ".join(
[part.text for part in transcript]
) # type: ignore
except Exception as e:
print(f"Error fetching transcript: {e}")
# No transcript available
if len(languages) == 1:
print(f"Error fetching transcript: {e}")
else:
# Translate transcript into first kwarg
transcript = transcript_list.find_transcript(languages).translate(youtube_transcript_languages[0]).fetch()
transcript_text = " ".join(
[part.text for part in transcript]
)
if transcript_text:
webpage_text += f"\n### Transcript\n{transcript_text}\n"
@ -222,3 +236,5 @@ class YouTubeConverter(DocumentConverter):
attempt += 1
# If all attempts fail, raise the last exception
raise Exception(f"Operation failed after {retries} attempts.")