Descripción general del Data set
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('Unicorn_Startups.csv')
df.head()
df.columns
df = df.drop(['Unnamed: 0'], axis=1)
df
df.shape
df.columns
df.isnull().sum()
df = df.fillna('Nadie')
df.isnull().sum()
df.info()
df.nunique()
df['Industry'].value_counts()
df['Industry'] = df['Industry'].astype('category')
df.info()
df.describe()
Analizando las variables del Data set
df.columns
Company (Unircorn Startup Name)
df_val = df.sort_values('Valuation ($B)', ascending=False).head(10)
df_val
plt.bar(df_val['Company'], df_val['Valuation ($B)'])
plt.xticks(rotation=90)
plt.show()
df_date = df.sort_values('Date Joined', ascending=False).head(10)
df_date
print (df_date['Valuation ($B)'].mean())
print(df_date['Valuation ($B)'].median())
Investor 1
#Top 20 Inversores por valuación
df.groupby('Investor 1')['Valuation ($B)'].sum().sort_values(ascending=False).head(10)
# Top 20 Inversores por número de Startups Unicornio
df.groupby('Investor 1')['Company'].count().sort_values(ascending=False).head(10)
Industry
df['Industry'].value_counts()
x = df['Industry'].value_counts().index
y = df['Industry'].value_counts().values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df.groupby('Industry')['Valuation ($B)'].sum().sort_values(ascending=False)
x = df.groupby('Industry')['Valuation ($B)'].sum().sort_values(ascending=False).index
y = df.groupby('Industry')['Valuation ($B)'].sum().sort_values(ascending=False).values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df['Valuation ($B)'].describe()
import plotly.express as px
fig = px.histogram(data_frame=df, x='Valuation ($B)', title= 'Valuation ($B)', marginal='box',
hover_data=df[['Investor 1', 'Company']])
fig.show()
plt.hist(df['Valuation ($B)'])
plt.show()
df_val = df.sort_values('Valuation ($B)', ascending=0).head(10)
plt.bar(df_val['Company'], df_val['Valuation ($B)'])
plt.xticks(rotation=90)
plt.show()
df_date = df.sort_values('Date Joined', ascending=1).head()
df_date
df_date = df.sort_values('Date Joined', ascending=0).head()
df_date
df_co = df.groupby('Country')['Company'].count().sort_values(ascending=False)
df_co
x = df.groupby('Country')['Company'].count().sort_values(ascending=False).index
y = df.groupby('Country')['Company'].count().sort_values(ascending=False).values
plt.bar(x[:10], y[:10])
plt.xticks(rotation=90)
plt.show()
df.groupby('City')['Company'].count().sort_values(ascending=False).head(10)
plt.bar(x[:20], y[:20])
plt.xticks(rotation=90)
plt.show()
Industria
df['Industry'].value_counts()
x = df['Industry'].value_counts().index
y = df['Industry'].value_counts().values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df['Investor 1'].value_counts().head()
x = df['Investor 1'].value_counts().index
y = df['Investor 1'].value_counts().values
plt.bar(x[:5], y[:5])
plt.xticks(rotation = 90)
plt.show()
df['Investor 2'].value_counts().head()
x = df['Investor 2'].value_counts().head().index
y = df['Investor 2'].value_counts().head().values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df['Investor 3'].value_counts().head()
x = df['Investor 3'].value_counts().head(10).index
y = df['Investor 3'].value_counts().head(10).values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df['Investor 4'].value_counts()
x = df['Investor 4'].value_counts().head(10).index
y = df['Investor 4'].value_counts().head(10).values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()