# Start writing code here...
import pandas as pd
import numpy as np
df_csv = pd.read_csv('/work/iris.csv')
df_csv
s = pd.Series(data=[100,'a',{'dic1':5}],index=pd.Index(['id1',20,'third'],name="my_idx"),dtype='object',name='my_name')
s
s.values
s.shape
s.index
data = [[1, 'a', 1.2], [2, 'b', 2.2], [3, 'c', 3.2]]
df = pd.DataFrame(data = data,
index = ['row_%d'%i for i in range(3)],
columns=['col_0', 'col_1', 'col_2'])
df
df = pd.DataFrame(data = {'col_0': [1,2,3],
'col_1':list('abc'),
'col_2': [1.2, 2.2, 3.2]},
index = ['row_%d'%i for i in range(3)])
df
df_csv = pd.read_csv('/work/iris.csv')
df_csv
df_csv.head()
df_csv.tail()
df_csv.tail(3)
df_csv.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Sepal.Length 150 non-null float64
1 Sepal.Width 150 non-null float64
2 Petal.Length 150 non-null float64
3 Petal.Width 150 non-null float64
4 Species 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
df.describe()
df_demo = df_csv[['Sepal.Length','Sepal.Width']]
df_demo.mean()
df_demo.max()
df_demo.quantile(0.75)
df_demo.count()
df_demo.idxmax()
df_demo.mean(axis=1).head()
df_demo['Sepal.Length'].unique()
df_demo['Sepal.Length'].nunique()
df_demo['Sepal.Length'].value_counts()
df_demo.drop_duplicates(['Sepal.Length','Sepal.Width'])
df_demo.drop_duplicates(['Sepal.Length','Sepal.Width'],keep='last')
df_demo.drop_duplicates(['Sepal.Length','Sepal.Width'],keep=False).head()
df_demo.drop_duplicates()
df_demo.duplicated().head()
df_csv['Species'].replace({'setosa':0,'versicolor':1,'virginica':2})