Descripción general del Data set
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('Unicorn_Startups.csv')
df.head()
df.columns
df = df.drop(['Unnamed: 0'], axis=1)
df
df.shape
df.columns
df.isnull().sum()
df = df.fillna('Nadie')
df.isnull().sum()
df.info()
df.nunique()
df['Industry'].value_counts()
df['Industry'] = df['Industry'].astype('category')
df.info()
df.describe()
Analizando las variables del Data set
df.columns
df_val = df.sort_values('Valuation ($B)', ascending=0).head(10)
plt.bar(df_val['Company'], df_val['Valuation ($B)'])
plt.xticks(rotation=90)
plt.show()
df_date = df.sort_values('Date Joined', ascending=1).head()
df_date
df_date = df.sort_values('Date Joined', ascending=0).head()
df_date
df_co = df.groupby('Country')['Company'].count().sort_values(ascending=False)
df_co
x = df.groupby('Country')['Company'].count().sort_values(ascending=False).index
y = df.groupby('Country')['Company'].count().sort_values(ascending=False).values
plt.bar(x[:10], y[:10])
plt.xticks(rotation=90)
plt.show()
df.groupby('City')['Company'].count().sort_values(ascending=False).head(10)
plt.bar(x[:20], y[:20])
plt.xticks(rotation=90)
plt.show()
Industria
df['Industry'].value_counts()
x = df['Industry'].value_counts().index
y = df['Industry'].value_counts().values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df['Investor 1'].value_counts().head()
x = df['Investor 1'].value_counts().index
y = df['Investor 1'].value_counts().values
plt.bar(x[:5], y[:5])
plt.xticks(rotation = 90)
plt.show()
df['Investor 2'].value_counts().head()
x = df['Investor 2'].value_counts().head().index
y = df['Investor 2'].value_counts().head().values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df['Investor 3'].value_counts().head()
x = df['Investor 3'].value_counts().head(10).index
y = df['Investor 3'].value_counts().head(10).values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df['Investor 4'].value_counts()
x = df['Investor 4'].value_counts().head(10).index
y = df['Investor 4'].value_counts().head(10).values
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()