Análisis de los 25 retailers más grandes de Estados Unidos
import pandas as pd
import numpy as np
import altair as alt
df = pd.read_csv('largest_us_retailers.csv')
df
df.columns
alt.Chart(df).mark_bar(color = '#ffacd9').encode(
x = alt.X('Sales', title = '', axis=alt.Axis(grid = False,labelFontSize=12,labels = False,ticks = False, domain = False)),
y = alt.Y('Company', title = '',sort = '-x', axis=alt.Axis(grid = False,labelFontSize=14)),
tooltip = [alt.Tooltip('Company',title = 'Company'),alt.Tooltip('Sales', title = 'Sales MUSD')]
).configure_title(
fontSize = 18,
anchor = 'middle',
color = '#e4007c'
).configure_view(
strokeWidth=0
).properties (
title = 'Sales'
)
# Quitar los datos de Walmart US para el cálculo del promedio
companies = df[df['Company'] != 'Walmart US']
# Visualizar el cambio en una gráfica
bar = alt.Chart(companies).mark_bar(color = '#ff79c2').encode(
x = alt.X('Sales', title = '', axis=alt.Axis(grid = False,labelFontSize=12,labels = False,ticks = False, domain = False)),
y = alt.Y('Company', title = '', sort = '-x', axis=alt.Axis(grid = False,labelFontSize=14)),
tooltip = [alt.Tooltip('Company',title = 'Company'),alt.Tooltip('Sales', title = 'Sales MUSD')]
)
m = alt.Chart(companies).mark_rule(color = 'black').encode(
y = alt.Y('mean(Sales)',title = '',axis=alt.Axis(labels = False,ticks = False, domain = False)),
tooltip = [alt.Tooltip('mean(Sales)', title = 'mean MUSD')]
)
(bar + m).properties (
title = 'Sales'
).configure_title(
fontSize = 18,
anchor = 'middle',
color = '#e4007c'
).configure_view(
strokeWidth=0
)
meanCompanies = companies['Sales'].mean()
meanCompanies
# Visualizar el cambio en una gráfica
bar = alt.Chart(df).mark_bar(color = '#679997').encode(
x = alt.X('Sales:Q',bin=alt.Bin(maxbins=60),title = 'Sales [M USD]'),
y = alt.Y('count(Sales)', title = '',axis=alt.Axis(grid=False))
)
m = alt.Chart(df).mark_rule(color = 'black').encode(
x = alt.X('mean(Sales)',title = '',axis=alt.Axis(ticks = False, domain = False)),
tooltip = [alt.Tooltip('median(Sales)', title = 'median MUSD')]
)
(bar + m).properties (
title = 'Sales'
).configure_title(
fontSize = 18,
anchor = 'middle',
color = '#679997'
).configure_view(
strokeWidth=0
)
medianSales = df['Sales'].median()
medianSales
bar = alt.Chart(companies).mark_bar(color = '#9ae5e2').encode(
x = alt.X('Sales:Q',bin=alt.Bin(maxbins=10),title = 'Sales [M USD]'),
y = alt.Y('count(Sales)', title = '',axis=alt.Axis(grid=False))
)
m = alt.Chart(df).mark_rule(color = 'black').encode(
x = alt.X('mean(Sales)',title = '',axis=alt.Axis(ticks = False, domain = False)),
tooltip = [alt.Tooltip('median(Sales)', title = 'median (all data)')]
)
(bar + m).properties (
title = 'Sales'
).configure_title(
fontSize = 18,
anchor = 'middle',
color = '#679997'
).configure_view(
strokeWidth=0
)
alt.Chart(df).mark_bar(color = 'skyblue').encode(
x = alt.X('Stores:Q', title='Sales [M USD]',bin=alt.Bin(maxbins=10), axis=alt.Axis(domain = False)),
y = alt.Y('count()', title='', axis=alt.Axis(grid=False, domain=False, ticks = False, )),
tooltip = [alt.Tooltip('count()', title='Companies')]
).configure_view(
strokeWidth=0
).properties (
title = 'Sales'
)
sel = pts = alt.selection(type = 'single')
stores = alt.Chart(df).mark_bar().encode(
x = alt.X('Sales', title = '', axis=alt.Axis(grid = False,labelFontSize=12,labels = False,ticks = False, domain = False)),
y = alt.Y('Company', title = 'Sales', sort='-x', axis=alt.Axis(grid = False,labelFontSize=14,titleAnchor='start',titleAngle=0,titleFontSize=20, titleColor='#e4007c')),
tooltip = [alt.Tooltip('Company',title = 'Company'),alt.Tooltip('Sales', title = 'Sales MUSD'),alt.Tooltip('Stores',title='Stores')],
color = alt.condition(pts, alt.ColorValue("#ffacd9"), alt.ColorValue("grey"))
).add_selection(pts)
sales = alt.Chart(df).mark_bar(color = 'skyblue').encode(
x = alt.X('Stores', title = '', axis=alt.Axis(grid = False,labelFontSize=12,labels = False,ticks = False, domain = False)),
y = alt.Y('Company', title = 'Stores', sort='-x', axis=alt.Axis(grid = False,labelFontSize=14,titleAnchor='start',titleAngle=0,titleFontSize=20, titleColor='steelblue')),
tooltip = [alt.Tooltip('Company',title = 'Company'),alt.Tooltip('Sales', title = 'Sales MUSD'),alt.Tooltip('Stores',title='Stores')],
color = alt.condition(pts, alt.ColorValue("skyblue"), alt.ColorValue("grey"))
).add_selection(pts)
(stores | sales ).configure_view(
strokeWidth=0
)
alt.Chart(df).mark_boxplot().encode(
y = alt.Y('Sales')
) | alt.Chart(companies).mark_boxplot().encode(
y = alt.Y('Sales')
).interactive()
rango = df['Sales'].max() - df['Sales'].min()
rango
top = df.sort_values('Stores', ascending=False)
top5Stores = top.iloc[:5]
top5Stores
sales = alt.Chart(df).mark_bar().encode(
x = alt.X('Stores', title = '', axis=alt.Axis(grid = False,labelFontSize=12,labels = False,ticks = False, domain = False)),
y = alt.Y('Company', title = 'Stores', sort='-x', axis=alt.Axis(grid = False,labelFontSize=14, titleAnchor='start',titleAngle=0,titleFontSize=18)),
tooltip = [alt.Tooltip('Company',title = 'Company'),alt.Tooltip('Sales', title = 'Sales MUSD'),alt.Tooltip('Stores',title='Stores')],
color = alt.condition(
alt.datum.Stores >= 4574,
alt.ColorValue("orange"),
alt.ColorValue("steelblue"))
)
stores = alt.Chart(top5Sales).mark_bar().encode(
x = alt.X('Sales', title = '', axis=alt.Axis(grid = False,labelFontSize=12,labels = False,ticks = False, domain = False)),
y = alt.Y('Company', title = 'Top 5: Sales', sort='-x', axis=alt.Axis(grid = False,labelFontSize=14,titleAnchor='start',titleAngle=0,titleFontSize=18)),
tooltip = [alt.Tooltip('Company',title = 'Company'),alt.Tooltip('Sales', title = 'Sales MUSD'),alt.Tooltip('Stores',title='Stores')],
color = alt.condition(
alt.datum.Stores >= 4574,
alt.ColorValue("orange"),
alt.ColorValue("steelblue"))
)
( sales | stores ).configure_view(
strokeWidth=0
)
alt.Chart(df).mark_bar(
color="lightblue"
).encode(
x = alt.X('Sales', title='', axis=alt.Axis(grid=False, format='.2s',labels = False,ticks = False, domain = False)),
y = alt.Y('Category', title=''),
tooltip = [alt.Tooltip('Sales', title='Sales [M USD]')]
).configure_view(
strokeWidth=0
).properties(
title = 'Sales per Category'
).configure_title(
fontSize = 18,
anchor = 'middle',
color = 'steelblue'
)
alt.Chart(df).mark_bar(color='orange').encode(
x= alt.X('Sales\/Avg\. Store:Q', title = '', axis=alt.Axis(grid=False)),
y= alt.Y('Company', title=''),
tooltip = 'Sales\/Avg\. Store:Q'
).configure_view(
strokeWidth=0
).properties(
title = 'Average sales per Store'
)
df[np.isnan(df['Stores'])]
alt.Chart(df).mark_bar().encode(
x = alt.X('Sales', title = '', axis=alt.Axis(grid = False,labelFontSize=12,labels = False,ticks = False, domain = False)),
y = alt.Y('Company', title = 'Stores', sort='-x', axis=alt.Axis(grid = False,labelFontSize=14, titleAnchor='start',titleAngle=0,titleFontSize=18)),
tooltip = [alt.Tooltip('Company',title = 'Company'),alt.Tooltip('Sales', title = 'Sales MUSD'),alt.Tooltip('Stores',title='Stores')],
color = alt.condition(
alt.datum.Stores,
alt.ColorValue("steelblue"),
alt.ColorValue("orange"))
)