#coding=utf-8
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("./team.csv")
df
df.describe()
df.set_index('name',inplace=True)
df.head()
df.sample(4)
df.index
df.columns
df[['team','Q1']]
df.loc[:,['team','Q1']]
df[df.index=='Liver']
df[0:3]
df.loc['Austin7':'Ben','Q1':'Q4']
df[df['Q1']>90]
df[(df['Q1']>90) & (df['team']=='C')]
df.sort_values(['Q1','team'],ascending=[True,False])# True升,False降
df.groupby('team').sum()
df.groupby('team').agg({
'Q1':'sum',
'Q2':'count',
'Q3':'mean',
'Q4':'max'
})
df['total'] = df.loc[:,'Q1':'Q4'].apply(lambda x:sum(x),axis=1)
df['total']
df['avg'] = df['total']/4
df['avg']
df.mean(1) # 返回所有行的均值,下同
df.corr() # 返回列与列之间的相关系数
df.count() # 返回每一列中的非空值的个数
df.max() # 返回每一列的最大值
df.min() # 返回每一列的最小值
df.median() # 返回每一列的中位数
df.std() # 返回每一列的标准差
df.var() # 方差
df.mode() # 众数
df['Q1'].plot()
/opt/venv/lib/python3.7/site-packages/pandas/plotting/_matplotlib/core.py:1192: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(xticklabels)
df.loc['Ben','Q1':'Q4'].plot()
/opt/venv/lib/python3.7/site-packages/pandas/plotting/_matplotlib/core.py:1192: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(xticklabels)
df.loc['Ben','Q1':'Q4'].plot.bar()
df.groupby('team').sum().T.plot()
/opt/venv/lib/python3.7/site-packages/pandas/plotting/_matplotlib/core.py:1192: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(xticklabels)
df.groupby('team').count().Q1.plot.pie()
dates = pd.date_range('20130101',periods=6)
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list(range(1,5)))
df1 = df.reindex(index=dates[0:4],columns=list(df.columns)+['5'])
df1.loc[dates[0]:dates[1],'5'] = 1
df1
df1.dropna(how='any')
minus = df.apply(lambda x:x.max()-x.min(),axis=1)
df1.loc[:,"minus"] = minus
df1
df.apply(lambda x:x.max()-x.min())
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
pd.merge(left,right,on='key')
rng = pd.date_range('20120101',periods=100,freq='S')
ts = pd.Series(np.random.randint(0,500,len(rng)),index=rng)
ts
ts.tz_localize('UTC')
ts.to_period()
ts.plot()