import requests
def get_current_talk_id_by_url(url):
page = requests.get(url)
if(page.status_code != 200):
return False
current_talk_id = page.text.split(sep='"current_talk":"')
return current_talk_id[1].split(sep='"')[0]
def get_full_transcript_by_url(url, language = 'en'):
current_talk_id = get_current_talk_id_by_url(url)
if(current_talk_id == False):
return 'Error: URL does not match a valid ted talk or the server is unavailable .'
transcript = requests.get(f'https://www.ted.com/talks/{current_talk_id}/transcript.json?language={language}')
if(transcript.status_code != 200):
return 'Error: Transcription for the selected language is not available or the server is unavailable.'
else:
transcript = transcript.json()
full_transcript = '';
for cues in transcript['paragraphs']:
for line in cues['cues']:
full_transcript += line['text'].replace('\n', ' ') + ' '
full_transcript += '\n'
return full_transcript
def get_full_transcript_by_list_of_urls(url_list, language):
transcriptions = []
for url in url_list:
transcriptions += [[url, get_full_transcript_by_url(url, language)]]
return transcriptions
#put here the urls
teds = [
'https://www.ted.com/talks/david_birch_a_new_way_to_stop_identity_theft',
'https://www.ted.com/talks/patty_mccord_4_lessons_the_pandemic_taught_us_about_work_life_and_balance',
'https://www.ted.com/talks/ricardo_semler_how_to_run_a_company_with_almost_no_rules',
'https://www.ted.com/talks/patty_mccord_8_lessons_on_building_a_company_people_enjoy_working_for'
]
#here it´s possible to choose the language of the transcript
transcriptions = get_full_transcript_by_list_of_urls(teds, 'en')
transcriptions