# Start writing code he
!pip install requirements.txt
pip install youtube-data-api
from youtube_api import YouTubeDataAPI
from youtube_api import parsers
api_key = 'AIzaSyCjfvDwwfe322328YBcpKYH0_BYt0b50NLDDN0-Pw0'
yt = YouTubeDataAPI(api_key)
yt.search(q='что было дальше', max_results=5, order_by='relevance')
yt.get_playlists(channel_id='UCNqktdxgAADBj36dC7VGOgg')
video_data=list()
all_videos=yt.get_videos_from_playlist_id('PLmkbS48df313zBeQtxckns8Nq4IyWhV1P')
for item in all_videos:
video_data.append(yt.get_video_metadata(item['video_id']))
video_data
import pandas as pd
df=pd.DataFrame(video_data)
df
from natasha import (
Segmenter,
MorphVocab,
NewsEmbedding,
NewsMorphTagger,
NewsSyntaxParser,
NewsNERTagger,
PER,
NamesExtractor,
Doc
)
segmenter = Segmenter()
morph_vocab = MorphVocab()
emb = NewsEmbedding()
morph_tagger = NewsMorphTagger(emb)
syntax_parser = NewsSyntaxParser(emb)
ner_tagger = NewsNERTagger(emb)
names_extractor = NamesExtractor(morph_vocab)
def return_names(data):
doc=Doc(data)
doc.segment(segmenter)
doc.tag_ner(ner_tagger)
jam=list()
for item2 in doc.spans:
if item2.type == "PER":
print(item2)
jam.append(item2.text)
return str(jam)
df['PERSON']=df['video_title'].apply(return_names)
df
def return_orgs(data):
doc=Doc(data)
doc.segment(segmenter)
doc.tag_ner(ner_tagger)
jam=list()
for item2 in doc.spans:
if item2.type == "ORG":
print(item2)
jam.append(item2.text)
return str(jam)
df['ORGANISATION']=df['video_description'].apply(return_orgs)
df
import re
def get_links(data):
res=re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+',data)
while 'https://instagram.com' in res: res.remove('https://instagram.com')
while 'https://www.instagram.com' in res: res.remove('https://www.instagram.com')
return res
df['LINKS']=df['video_description'].apply(get_links)
df