#Llamamos el set de datos público
zoo_data = 'https://archive.ics.uci.edu/ml/machine-learning-databases/zoo/zoo.data'
# los nombres de las columnas se encuentran en : 'https://archive.ics.uci.edu/ml/machine-learning-databases/zoo/zoo.names'
zoo_name= ['animal name','hair','feathers','eggs','milk','airborne','aquatic','predator','toothed','backbone','breathes','venomous','fins','legs','tail','domestic',' cat-size','type']
import pandas as pd
data= pd.read_csv(zoo_data)
data.head()
data.columns=zoo_name
data.head()
#librerias de graficación más empleadas
import plotly as plt
import seaborn as sns
import altair as alt # gráfica
plot_01=alt.Chart(data).mark_line().encode(
x='animal name',
y='legs'
)
plot_01
plot_02=alt.Chart(data).mark_bar().encode(
x='animal name',
y='legs'
)
plot_02
import plotly.express as px
fig = px.pie(data, values='aquatic', names='animal name', title='Porcentaje de animales acuáticos') # gráfica de pie
fig.show()
#Llamamos el set de datos público
Chicago_data = 'https://raw.githubusercontent.com/terranigmark/curso-analisis-exploratorio-datos-platzi/main/Traffic_Crashes1.csv'
token_map_plot='pk.eyJ1IjoiY2hlbWlza3kiLCJhIjoiY2tnOGNhcXk3MGZ3eDJ5b2FxZ3ViajN1MyJ9.t3R24lWTwzTQwyJw4vuWFw'
import pandas as pd
Chicago_df=pd.read_csv(Chicago_data)
Chicago_df
Chicago_df.head()
Chicago_df.info()
Chicago_df['CRASH_DATE']=Chicago_df['CRASH_DATE'].apply(lambda x: pd.to_datetime(x,errors='coerce',utc=True))
Chicago_df.info()
Chicago_df['NUM_UNITS'] = Chicago_df['NUM_UNITS'].astype('int32')
import plotly.express as px
px.set_mapbox_access_token(token_map_plot)
px.scatter_mapbox(Chicago_df,lat='LATITUDE',lon='LONGITUDE',color='CRASH_HOUR',size='NUM_UNITS', color_continuous_scale=px.colors.cyclical.IceFire,zoom=10)
#Llamamos el set de datos público
Chicago_data = 'https://raw.githubusercontent.com/terranigmark/curso-analisis-exploratorio-datos-platzi/main/Traffic_Crashes1.csv'
token_map_plot='pk.eyJ1IjoiY2hlbWlza3kiLCJhIjoiY2tnOGNhcXk3MGZ3eDJ5b2FxZ3ViajN1MyJ9.t3R24lWTwzTQwyJw4vuWFw'
import pandas as pd
Chicago_df=pd.read_csv(Chicago_data)
Chicago_df['CRASH_DATE'] = Chicago_df['CRASH_DATE'].apply(lambda x: pd.to_datetime(x,
errors='coerce', utc=True))
import plotly.express as px
px.set_mapbox_access_token(token_map_plot)
df = px.data.carshare()
fig = px.scatter_mapbox(Chicago_df, lat="LATITUDE", lon="LONGITUDE", color="CRASH_HOUR", size="NUM_UNITS",
color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10)
fig.show()
Chicago_df['Lat_round']= round(Chicago_df['LATITUDE'],2)
Chicago_df['Long_round']= round(Chicago_df['LONGITUDE'],2)
df = px.data.carshare()
fig = px.scatter_mapbox(Chicago_df, lat="Lat_round", lon="Long_round", color="CRASH_HOUR", size="NUM_UNITS",
color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10)
fig.show()
Chicago_df['NUM_UNITS'].plot()
import scipy.stats as ss
ss.variation(Chicago_df['NUM_UNITS'])
import matplotlib.pyplot as plt
plt.hist(Chicago_df[['NUM_UNITS']])
tab=pd.crosstab(index=Chicago_df['CRASH_MONTH'],columns='Frecuencia')
plt.bar(tab.index,tab['Frecuencia'])
#Llamamos el set de datos público
import pandas as pd
import altair as alt
Chicago_data = 'https://raw.githubusercontent.com/terranigmark/curso-analisis-exploratorio-datos-platzi/main/Traffic_Crashes1.csv'
Chicago_df=pd.read_csv(Chicago_data)
#Transformaciones básicas
Chicago_df['CRASH_DATE'] = Chicago_df['CRASH_DATE'].apply(lambda x: pd.to_datetime(x,errors='coerce', utc=True))
Chicago_df.head()
Chicago_df.info()
report_1=Chicago_df.groupby(['LIGHTING_CONDITION','REPORT_TYPE','CRASH_HOUR']).agg({'NUM_UNITS':'sum'})
report_1=report_1.reset_index()
report_1
alt.Chart(report_1).mark_bar().encode(
x='LIGHTING_CONDITION',
y='NUM_UNITS',
color='REPORT_TYPE'
).properties(width=220)
report_2=Chicago_df.groupby(['LIGHTING_CONDITION','REPORT_TYPE','CRASH_HOUR']).agg({'NUM_UNITS':['sum','min','max']})
report_2=report_1.reset_index()
report_2
#Llamamos el set de datos público
import pandas as pd
url_wine_red = 'https://raw.githubusercontent.com/terranigmark/curso-analisis-exploratorio-datos-platzi/main/winequality-red.csv'
url_wine_white = 'https://raw.githubusercontent.com/terranigmark/curso-analisis-exploratorio-datos-platzi/main/winequality-white.csv'
red=pd.read_csv(url_wine_red,delimiter=';')
white=pd.read_csv(url_wine_white,delimiter=';')
red.head()
white.head()
red['color']='red'
white['color']='white'
red.append(white)
pd.concat([white,red])
left=pd.DataFrame({'Key':['key1','key2','key3'],
'A':['a1','a2','a3']
})
left
right=pd.DataFrame({'Key':['key3','key4','key3'],
'A':['a1','a2','a3']
})
right