Dashboard de bajas de equipamiento en la guerra de Ucrania
from os import listdir
from os.path import join
import pandas as pd
import glob
path = "data/byType/"
all_files = glob.glob(join(path , "/*.csv"))
li = []
for filename in listdir(path):
df = pd.read_csv(path+filename, index_col=None, header=0)
li.append(df)
frame = pd.concat(li, axis=0, ignore_index=True)
is_28_2 = frame['Date']=="2022-02-28"
frame = frame.sort_values("Date")
frame[is_28_2]
frame['equipment_type'] = frame['equipment_type'].str.replace('\n','')
frame.drop(columns=['Unnamed: 0','row_id'], inplace=True, axis=1)
frame
country
Russia
casualty_type
type_total
import plotly.express as px
import plotly.graph_objects as go
from datetime import date
# today = date.today()
# d1 = today.strftime("%d/%m/%Y")
# fechaarchivosalida = today.strftime("%d%m%Y")
typelabel = casualty_type if casualty_type != "type_total" else "total"
fig = px.line(frame[frame["country"]==country], x='Date',y=casualty_type,
color = 'equipment_type',
labels={"Date": "Date",
'casualty_type': typelabel,
'equipment_type': "Equipment type"
},
title=country + "'s " + typelabel + " casualties", height=650,
)
fig.show()
comparebytype
Towed Artillery
frame["country_and_type"] = frame["country"] + "\'s " + frame["equipment_type"]
import plotly.express as px
import plotly.graph_objects as go
from datetime import date
# today = date.today()
# d1 = today.strftime("%d/%m/%Y")
# fechaarchivosalida = today.strftime("%d%m%Y")
typelabel = casualty_type if casualty_type != "type_total" else "total"
fig = px.line(frame[frame["equipment_type"]==comparebytype], x='Date',y=casualty_type,
color = 'country_and_type',
labels={"Date": "Date",
'casualty_type': typelabel,
'equipment_type': "Equipment type"
},
title=comparebytype[:-1] + " casualties", height=650,
)
fig.show()
df2 = frame[frame["equipment_type"]==comparebytype].pivot(index="Date", columns="country", values="type_total")
df2["ratio_rus_ukr"] = df2["Russia"]/df2["Ukraine"]
df2["date"] = df2.index
df2
import plotly.express as px
import plotly.graph_objects as go
from datetime import date
# today = date.today()
# d1 = today.strftime("%d/%m/%Y")
# fechaarchivosalida = today.strftime("%d%m%Y")
# typelabel = casualty_type if casualty_type != "type_total" else "total"
fig = px.line(df2, x='date',y="ratio_rus_ukr",
# color = 'country_and_type',
labels={"Date": "date",
'casualty_type': typelabel,
'equipment_type': "Equipment type"
},
title=comparebytype[:-1] + " casualties", height=650,
)
fig.show()
df2
# import pandas_alive
# df2["ratio_rus_ukr"].plot_animated('line-chart.gif', kind='line')
import pandas as pd
dates = pd.date_range("2022-02-24", "2022-06-03")
base_url = (
"https://raw.githubusercontent.com/leedrake5/Russia-Ukraine/main/data/byType/{}.csv"
)
datas = []
for _, date in zip(range(1000), dates):
url = base_url.format(date.strftime("%Y-%m-%d"))
print("fetch", url)
datas.append(pd.read_csv(url, index_col=0).reset_index(drop=True))
data = pd.concat(datas, axis=0, ignore_index=True)
(
data
# You'd better look closer at these duplicates and why they exist
.drop_duplicates(subset=["Date", "country", "equipment_type"])
.dropna(subset=["equipment_type"])
.loc[lambda df: df.equipment_type != "\n"]
.set_index(["Date", "country"])
# At this point - after set_index I'd investigate the data if there are duplicates
#
.drop(columns=["row_id"])
.assign(equipment_type=lambda df: df.equipment_type.str.title().str.strip())
.pivot(index=["Date", "country"], columns="equipment_type")
.rename_axis(columns=["loss_type", "equipment_type"])
.stack("loss_type")
.style.format(na_rep="-") # , precision=0)
)
df2 = data.xs("Russia", level="country").fillna(0) / data.xs("Ukraine", level="country")
data
df2