#Python
import os
from IPython.display import Image
from IPython.display import HTML
from colorama import Fore as F
from collections import Counter
#All purpose
import numpy as np
import pandas as pd
#Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt
import stylecloud
#Sklearn
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
#External Text analsyis Library
from textstat import flesch_reading_ease
import gensim
import pyLDAvis
import pyLDAvis.gensim_models
import nltk
import spacy
#NLTK download
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
#NLTK
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import RegexpTokenizer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk import word_tokenize
#suppressing warnings
import warnings
warnings.filterwarnings("ignore")
ps = PorterStemmer()
counter = Counter(corpus)
most = counter.most_common()
x, y = [], []
lookup = []
for word,count in most[:120]:
if (word.lower() not in stop) and (ps.stem(word.lower()) not in lookup) and word.isalpha():
x.append(word)
y.append(count)
lookup.append(ps.stem(word.lower()))
sns.barplot(x=y,y=x,color="#212121");
plt.title("The most common words used in titles",fontsize=20);
lda_model = gensim.models.LdaMulticore(bow_corpus,
num_topics = 5,
id2word = dic,
passes = 10,
workers = 2)
lda_model.show_topics()
df.title.apply(lambda x : flesch_reading_ease(x)).hist(color='black');