import pandas as pd
import numpy as np
import plotly.express as px
from IPython.display import Image
import stylecloud
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
data = pd.read_csv('Cleaned_data.csv')
data.head()
Unnamed: 0int64
Unnamed: 0.1int64
0
0
0
1
1
1
2
2
2
3
3
3
4
4
4
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1208 entries, 0 to 1207
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 1208 non-null int64
1 Unnamed: 0.1 1208 non-null int64
2 Company/Brand 1208 non-null object
3 Founded 1207 non-null float64
4 Headquarters 1207 non-null object
5 Sector 1208 non-null object
6 What it does 1208 non-null object
7 Founders 1204 non-null object
8 Investors 1149 non-null object
9 Amount 1207 non-null float64
10 Stage 776 non-null object
11 Month 1208 non-null object
dtypes: float64(2), int64(2), object(8)
memory usage: 113.4+ KB
# Replacing Nan Values with Mean
data['Amount'] = pd.to_numeric(data['Amount'])
data['Amount'] = data['Amount'].replace(0, int(data.Amount.mean()))
Sector_df = pd.DataFrame(data['Sector'].value_counts()[:15])
Sector_df.reset_index(inplace = True)
Sector_df = Sector_df.rename(columns = {'index':'Sector','Sector':'Count'})
fig = px.bar(Sector_df, x='Sector', y='Count', text = 'Count',title = 'Funding Count in Different Sectors',color_discrete_sequence =['#1e847f'])
fig.update_layout(paper_bgcolor="#ecc19c")
fig.update_layout(plot_bgcolor="#ecc19c")
fig.update_traces(textfont_size=14, textangle=0, textposition="inside", cliponaxis=False)
fig.show()
month_df = pd.DataFrame(data['Amount'].groupby(data['Month']).sum())
month_df['Month'] = month_df.index
month_df.sort_values("Amount", axis = 0, ascending = False, inplace = True, na_position ='last')
fig = px.bar(month_df, x='Month', y='Amount', text = 'Amount',title = 'Funding in Different Months of 2021 (in $ )',color_discrete_sequence =['#1e847f'])
fig.update_layout(paper_bgcolor="#ecc19c")
fig.update_layout(plot_bgcolor="#ecc19c")
fig.update_traces(textfont_size=14, textangle=0, textposition="inside", cliponaxis=False)
fig.show()
Location_df= pd.DataFrame(data['Amount'].groupby(data.Headquarters).sum())
Location_df.reset_index(inplace = True)
Location_df.sort_values("Amount", axis = 0, ascending = False, inplace = True, na_position ='last')
fig = px.bar(Location_df[:5], x='Headquarters', y='Amount', text = 'Amount',title = 'Funding in Different States of 2021 (in $ )',color_discrete_sequence =['#1e847f'])
fig.update_layout(paper_bgcolor="#ecc19c")
fig.update_layout(plot_bgcolor="#ecc19c")
fig.update_traces(textfont_size=14, textangle=0, textposition="inside", cliponaxis=False)
fig.show()
companies = data['Company/Brand'].value_counts().head(20)
fig = px.bar( x=companies.index, y=companies.values, text = companies.values,title = 'Number of Fundings a Startup got',color_discrete_sequence =['#1e847f'])
fig.update_layout(paper_bgcolor="#ecc19c")
fig.update_layout(plot_bgcolor="#ecc19c", xaxis_title = 'Companies', yaxis_title = 'Count')
fig.update_traces(textfont_size=14, textangle=0, textposition="inside", cliponaxis=False)
fig.show()
investor_df = data['Investors'].value_counts().head(10)
fig = px.bar(x=investor_df.index, y=investor_df.values, text = investor_df.values,title = 'Investors with most Fundings',color_discrete_sequence =['#1e847f'])
fig.update_layout(paper_bgcolor="#ecc19c")
fig.update_layout(plot_bgcolor="#ecc19c", xaxis_title = 'Companies', yaxis_title = 'Count')
fig.update_traces(textfont_size=14, textangle=0, textposition="inside", cliponaxis=False)
fig.show()
funding_type_df = data['Stage'].value_counts().head(5)
fig = px.bar(x=funding_type_df.index, y=funding_type_df.values, text = funding_type_df.values,title = 'Investment Type with Funding',color_discrete_sequence =['#1e847f'])
fig.update_layout(paper_bgcolor="#ecc19c")
fig.update_layout(plot_bgcolor="#ecc19c", xaxis_title = 'Investment type', yaxis_title = 'Count')
fig.update_traces(textfont_size=14, textangle=0, textposition="inside", cliponaxis=False)
fig.show()
text = " ".join(sector for sector in data.Sector)
stylecloud.gen_stylecloud(text=text,
colors=['#ecc19c','#1e847f'],
background_color='White',
icon_name='fas fa-brain',
max_words = 100,
gradient = 'horizontal',
output_name='sector.png',
collocations=False)
Image(filename='sector.png')