!python -m spacy download en_core_web_sm
!python -m textblob.download_corpora
import requests
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
from spacy import displacy
import csv
from matplotlib import pyplot as plt
import pandas as pd
from collections import Counter
response = requests.get("https://gutenberg.org/files/345/345-0.txt")
if response.status_code == 200:
response.encoding = 'utf-8'
contents = response.text
print(contents[:500])
text = contents[contents.find('\r\n\r\n\r\n\r\n\r\nDRACULA\r\n\r\n\r\n\r\n\r\n')
: contents.find('THE END')]
chapters = text.split('CHAPTER')
print(chapters[0])
chapters.pop(0)
print(chapters[0][:50])
print(chapters[1][:50])
print(chapters[-1][:50])
chapters = [chap[chap.find('\r\n'):]
for chap in chapters]
print(chapters[0][:50])
print(chapters[1][:50])
print(chapters[-1][:50])
text = ''.join([chap.replace('_', ' ')
.replace('\r\n', ' ')
for chap in chapters])
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('spacytextblob')
print(nlp.pipe_names)
doc_book = nlp(text)
sentence_sentiments = [sentence._.polarity
for sentence
in doc_book.sents]
sentence_sentiments[:10]
sent_dic = list(zip(sentence_sentiments, list(doc_book.sents)))
sentence_sentiments = [sentiment
for sentiment
in sentence_sentiments
if sentiment != 0]
sentence_sentiments[:10]
plot_sentiments_average(sentence_sentiments, 30, 'the original Dracula novel')
book_plot_wiki = """Jonathan Harker, a newly qualified English solicitor, visits Count Dracula at his castle in the Carpathian Mountains to help the Count purchase a house near London. Ignoring the Count's warning, Harker wanders the castle and encounters three vampire women; Dracula rescues Harker, and gives the women a small child bound inside a bag. Harker awakens in bed; soon after, Dracula leaves the castle, abandoning him to the women; Harker escapes with his life and ends up delirious in a Budapest hospital. Dracula takes a ship for England with boxes of earth from his castle. The captain's log narrates the crew's disappearance until he alone remains, bound to the helm to maintain course. An animal resembling a large dog is seen leaping ashore when the ship runs aground at Whitby.
Lucy Westenra's letter to her best friend, Harker's fiancée Mina Murray, describes her marriage proposals from Dr. John Seward, Quincey Morris, and Arthur Holmwood. Lucy accepts Holmwood's, but all remain friends. Mina joins her friend Lucy on holiday in Whitby. Lucy begins sleepwalking. After his ship lands there, Dracula stalks Lucy. Mina receives a letter about her missing fiancé's illness, and goes to Budapest to nurse him. Lucy becomes very ill. Seward's old teacher, Professor Abraham Van Helsing, determines the nature of Lucy's condition, but refuses to disclose it. He diagnoses her with acute blood-loss. Van Helsing places garlic flowers around her room and makes her a necklace of them. Lucy's mother removes the garlic flowers, not knowing they repel vampires. While Seward and Van Helsing are absent, Lucy and her mother are terrified by a wolf and Mrs. Westenra dies of a heart attack; Lucy dies shortly thereafter. After her burial, newspapers report children being stalked in the night by a "bloofer lady" (beautiful lady), and Van Helsing deduces it is Lucy. The four go to her tomb and see that she is a vampire. They stake her heart, behead her, and fill her mouth with garlic. Jonathan Harker and his now-wife Mina have returned, and they join the campaign against Dracula.
Everyone stays at Dr. Seward's asylum as the men begin to hunt Dracula. Van Helsing finally reveals that vampires can only rest on earth from their homeland. Dracula communicates with Seward's patient, Renfield, an insane man who eats vermin to absorb their life force. After Dracula learns of the group's plot against him, he uses Renfield to enter the asylum. He secretly attacks Mina three times, drinking her blood each time and forcing Mina to drink his blood on the final visit. She is cursed to become a vampire after her death unless Dracula is killed. As the men find Dracula's properties, they discover many earth boxes within. The vampire hunters open each of the boxes and seal wafers of sacramental bread inside them, rendering them useless to Dracula. They attempt to trap the Count in his Piccadilly house, but he escapes. They learn that Dracula is fleeing to his castle in Transylvania with his last box. Mina has a faint psychic connection to Dracula, which Van Helsing exploits via hypnosis to track Dracula's movements. Guided by Mina, they pursue him.
In Galatz, Romania, the hunters split up. Van Helsing and Mina go to Dracula's castle, where the professor destroys the vampire women. Jonathan Harker and Arthur Holmwood follow Dracula's boat on the river, while Quincey Morris and John Seward parallel them on land. After Dracula's box is finally loaded onto a wagon by Szgany men, the hunters converge and attack it. After routing the Szgany, Harker slashes Dracula's neck and Quincey stabs him in the heart. Dracula crumbles to dust, freeing Mina from her vampiric curse. Quincey is mortally wounded in the fight against the Szgany. He dies from his wounds, at peace with the knowledge that Mina is saved. A note by Jonathan Harker seven years later states that the Harkers have a son, named Quincey."""
doc_book_plot = nlp(book_plot_wiki)
def calc_sentence_sentiments(doc):
#retrieve sentiment scores
temp = [sentence._.polarity for sentence in doc.sents]
# remove null values
temp = [sentiment for sentiment in temp if sentiment != 0]
return temp
wiki_sentiments = calc_sentence_sentiments(doc_book_plot)
doc_book._.polarity
doc_book_plot._.polarity
displacy.render(doc_book[:200], style='ent', jupyter='True')
characters_wiki = [ent.text for ent in doc_book_plot.ents if ent.label_ == 'PERSON']
characters_book = [ent.text for ent in doc_book.ents if ent.label_ == 'PERSON']
#novel's text
Counter(characters_book).most_common(15)
#plot on Wiki
Counter(characters_wiki).most_common(15)
print(set(characters_wiki) - set(characters_book))
print(set(characters_wiki + ['another character']) - set(characters_book))
set([ent.text for ent
in doc_book.ents
if ent.label_ == 'WORK_OF_ART'])
print([ent.text for ent
in doc_book.ents
if (ent.label_ == 'TIME')])
for ent in doc_book_plot.ents:
if ent.label_ != 'PERSON' and ent.label_ != 'CARDINAL':
print(ent.text, ent.label_)
SELECT *, LEN(Plot)
FROM '/work/dracula_nosferatu_plots.csv'
with open('/work/dracula_nosferatu_plots.csv', newline='') as f:
reader = csv.reader(f)
movie_plots = list(reader)
class movie:
instances = []
def __init__(self, item, varname):
self.name = varname
self.release_year = item[0]
self.title = item[1]
self.plot = item[-1]
self.__class__.instances.append(self)
def spacy_magic(self):
self.nlp = nlp(self.plot)
for film in movie_plots[1:]:
#make name
varname = film[0] + film[1][:3]
#make movie object
globals()[varname] = movie(film, varname)
for film in movie.instances:
film.spacy_magic()
for film in movie.instances:
print('\n', film.title, film.release_year, 'focus:')
focus = [ent.text for ent in film.nlp.ents if (ent.label_ in ('GPE', 'FAC', 'ORG'))]
print(set(focus))
for film in movie.instances:
print('\n', film.title, film.release_year, 'average plot mood: ', round(film.nlp._.polarity, 2))
def display_only_entities(doc, types):
previous = [] # to look for duplicates
for ent in doc.nlp.ents:
if (ent.text in previous) == False:
if ent.label_ in types:
displacy.render(ent, style='ent', jupyter='True')
previous.append(ent.text)
movienum
15 / 28
focus
family = []
for film in movie.instances:
family = family + [ent.text for ent in film.nlp.ents
if ent.label_ == 'PERSON' and 'helsing' in ent.text.lower()
and len(ent.text.split(' ')) >= 3]
print(*set(family), sep='\n')