FIX YouTube transcript errors
This commit is contained in:
parent
041be54471
commit
2d92e94608
1 changed files with 19 additions and 3 deletions
|
|
@ -153,9 +153,14 @@ class YouTubeConverter(DocumentConverter):
|
||||||
params = parse_qs(parsed_url.query) # type: ignore
|
params = parse_qs(parsed_url.query) # type: ignore
|
||||||
if "v" in params and params["v"][0]:
|
if "v" in params and params["v"][0]:
|
||||||
video_id = str(params["v"][0])
|
video_id = str(params["v"][0])
|
||||||
|
transcript_list = ytt_api.list(video_id)
|
||||||
|
languages = ['en']
|
||||||
|
for transcript in transcript_list:
|
||||||
|
languages.append(transcript.language_code)
|
||||||
|
break
|
||||||
try:
|
try:
|
||||||
youtube_transcript_languages = kwargs.get(
|
youtube_transcript_languages = kwargs.get(
|
||||||
"youtube_transcript_languages", ("en",)
|
"youtube_transcript_languages", languages
|
||||||
)
|
)
|
||||||
# Retry the transcript fetching operation
|
# Retry the transcript fetching operation
|
||||||
transcript = self._retry_operation(
|
transcript = self._retry_operation(
|
||||||
|
|
@ -165,12 +170,21 @@ class YouTubeConverter(DocumentConverter):
|
||||||
retries=3, # Retry 3 times
|
retries=3, # Retry 3 times
|
||||||
delay=2, # 2 seconds delay between retries
|
delay=2, # 2 seconds delay between retries
|
||||||
)
|
)
|
||||||
|
|
||||||
if transcript:
|
if transcript:
|
||||||
transcript_text = " ".join(
|
transcript_text = " ".join(
|
||||||
[part.text for part in transcript]
|
[part.text for part in transcript]
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# No transcript available
|
||||||
|
if len(languages) == 1:
|
||||||
print(f"Error fetching transcript: {e}")
|
print(f"Error fetching transcript: {e}")
|
||||||
|
else:
|
||||||
|
# Translate transcript into first kwarg
|
||||||
|
transcript = transcript_list.find_transcript(languages).translate(youtube_transcript_languages[0]).fetch()
|
||||||
|
transcript_text = " ".join(
|
||||||
|
[part.text for part in transcript]
|
||||||
|
)
|
||||||
if transcript_text:
|
if transcript_text:
|
||||||
webpage_text += f"\n### Transcript\n{transcript_text}\n"
|
webpage_text += f"\n### Transcript\n{transcript_text}\n"
|
||||||
|
|
||||||
|
|
@ -222,3 +236,5 @@ class YouTubeConverter(DocumentConverter):
|
||||||
attempt += 1
|
attempt += 1
|
||||||
# If all attempts fail, raise the last exception
|
# If all attempts fail, raise the last exception
|
||||||
raise Exception(f"Operation failed after {retries} attempts.")
|
raise Exception(f"Operation failed after {retries} attempts.")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue