import pandas as pd
iris = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None,
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])
type(iris)
iris.head(10)
iris.tail(10)
This chart is empty
Chart was probably not set up properly in the notebook
iris.columns
iris['class']
type(iris['class'])
iris.dtypes
iris.shape
iris.info
import pandas as pd
values = {'dates': ['20210305','20210316','20210328'],
'status': ['Opened','Opened','Closed']
}
demo = pd.DataFrame(values)
demo
demo['dates'] = pd.to_datetime(demo['dates'], format='%Y%m%d')
demo
demo.to_csv('demo_df.csv')
print(iris.index)
print(iris.columns)
iris.head()
iris['sepal_width'][4]
iris['sepal_width'][0:4]
iris[['petal_width','sepal_width']][0:4]
iris['sepal_width'][range(4)]
iris.loc[4,'sepal_width']
iris.loc[0:4,'sepal_width'] #inclusive
iris.loc[range(4),['petal_width','sepal_width']]
iris.iloc[4,1]
iris.iloc[0:4,1]
iris.iloc[range(4),[3,1]]
iris.loc[:,lambda df: ['petal_length','sepal_length']]
iris.loc[lambda iris: iris['sepal_width'] > 3.5, :]
help(pd.read_csv)
import pandas as pd
# How to read CSV file from GitHub using pandas
# https://stackoverflow.com/questions/55240330/how-to-read-csv-file-from-github-using-pandas
url = 'https://raw.githubusercontent.com/pairote-sat/SCMA248/main/demo_df'
df1 = pd.read_csv(url)
print(df1.head())
df1.columns
## Uncomment these commands if the CSV dateset is stored locally.
# df1 = pd.read_csv('/Users/Kaemyuijang/SCMA248/demo_df.csv')
# print(df1.head())
# df1.columns
df1 = pd.read_csv(url, index_col = 0)
df1.head()
# df2 = pd.read_csv('/Users/Kaemyuijang/SCMA248/demo_df.csv', index_col = 0, parse_dates = ['dates'])
# print(df2.head())
# df2.dtypes
df2 = pd.read_csv(url, index_col = 0, parse_dates = ['dates'])
print(df2)
df2.dtypes
df2['dates'] = pd.to_datetime(df2['dates'], format='%d%m%Y')
print(df2)
df2.dtypes
url = 'https://raw.githubusercontent.com/pairote-sat/SCMA248/main/property_data.csv'
df = pd.read_csv(url, index_col = 0)
df
rowindex = df.index
type(rowindex)
rowindex = df.index.tolist()
rowindex[4] = 10105.0
rowindex = [int(i) for i in rowindex]
df.index = rowindex
print(df.loc[:,'ST_NUM'])
df = pd.read_csv(url, index_col = 0)
df
import numpy as np
rowindex = df.index.to_numpy()
rowindex[4] = 10105.0
df.index = rowindex.astype(int)
print(df.loc[:,'ST_NUM'])
df['ST_NUM'].isnull()
df['NUM_BEDROOMS']
# Making a list of missing value types
missing_values = ["na", "--"]
df = pd.read_csv(url, index_col = 0, na_values = missing_values)
df
df['OWN_OCCUPIED']
df = pd.read_csv(url, index_col = 0)
df
import numpy as np
rowindex = df.index.to_numpy()
rowindex[4] = 10105.0
df.index = rowindex.astype(int)
df
# Detecting numbers
cnt=10101
for row in df['OWN_OCCUPIED']:
try:
int(row)
df.loc[cnt, 'OWN_OCCUPIED']=np.nan
except ValueError:
pass
cnt+=1
df['OWN_OCCUPIED']