import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df1 = pd.DataFrame({
'age':pd.Series([23, 23, 27, 27, 39, 41, 47, 49, 50,
52, 54, 54, 56, 57, 58, 58, 60, 61]),
'fat':pd.Series([9.5, 26.5, 7.8, 17.8, 31.4, 25.9, 27.4, 27.2, 31.2,
34.6, 42.5, 28.8, 33.4, 30.2, 34.1, 32.9, 41.2, 35.7])
})
df1.describe()
df1.boxplot()
df1.plot.scatter('age','fat')
plt.show()
plt.scatter(df1['age'],df1['fat'])
plt.plot([0,70],[0,70],color='red')
q1 = df1.quantile(0.25)
median = df1.median()
q3 = df1.quantile(0.75)
plt.plot(q1[0],q1[1],'o',color='gold',markersize=10)
plt.plot(median[0],median[1],'o',color='gold',markersize=10)
plt.plot(q3[0],q3[1],'o',color='gold',markersize=10)
plt.annotate('$Q_{1}$',xy=(q1[0],q1[1]),xytext=(-5,-25),textcoords='offset points',fontsize=16)
plt.annotate('$median$',xy=(median[0],median[1]),xytext=(-33,+18),textcoords='offset points',fontsize=16)
plt.annotate('$Q_{3}$',xy=(q3[0],q3[1]),xytext=(+12,-15),textcoords='offset points',fontsize=16)
plt.show()
df2 = pd.DataFrame({
'A1':pd.Series([1.5, 2, 1.6, 1.2, 1.5]),
'A2':pd.Series([1.7, 1.9, 1.8, 1.5, 1])
})
x = np.array([1.4,1.6])
euclid = df2.apply(lambda row:np.linalg.norm(row-x,2),1).sort_values()
manhattan = df2.apply(lambda row:np.linalg.norm(row-x,1),1).sort_values()
inf = df2.apply(lambda row:np.linalg.norm(row-x,np.inf),1).sort_values()
cos = df2.apply(lambda row:np.dot(row,x)/(np.linalg.norm(row)*np.linalg.norm(x)),1).sort_values(ascending=False)
normalized = df2.apply(lambda row:np.linalg.norm(row/np.linalg.norm(row)-x),1).sort_values()
rank = pd.DataFrame({
'euclid':euclid.index,
'manhattan':manhattan.index,
'inf':inf.index,
'cos':cos.index,
'normalized':normalized.index
})
rank
age = np.array([13, 15, 16, 16, 19, 20, 20, 21, 22,
22, 25, 25, 25, 25, 30, 33, 33, 35,
35, 35, 35, 36, 40, 45, 46, 52, 70])
bin_smooth = np.sort(age).reshape(int(age.size/3),3)
bin_smooth = np.ones((3,9))*bin_smooth.mean(axis=1)
bin_smooth.T
minmax = (35-age.min())/(age.max()-age.min())*(1.0-0.0)+0.0
z_score = (35-age.mean())/age.std()
decimal_scaling = 35/10**np.ceil(np.log10(age.max()))
print("minmax:",minmax,"z_score:",z_score,"decimal_scaling:",decimal_scaling)