import pandas as pd
import numpy as np
pd.__version__
# pd.Series(data, index, dtype)
s = pd.Series()
s
data = np.array([12, 45, 32, 89])
s = pd.Series(data)
s
s = pd.Series(data, index=[101, 102, 103, 104])
s
data = {"a": 1, "b": 2, "c": 3}
s = pd.Series(data)
s
data = {"a": 1, "b": 2, "c": 3}
s = pd.Series(data, index=['f', 'g', 'h'])
s
data = 99
s = pd.Series(data, index=[101, 102, 103, 104])
s
data = np.array([12, 45, 32, 89, 7, -10, 21, -0.5, -7, 30])
s = pd.Series(data)
s
s[4]
s[0:3] # s[:3]
s[7:]
data = {"a": 1, "b": 2, "c": 3}
s = pd.Series(data, index=['f', 'g', 'h', 'a', 'b', 'c'])
s['f':'h']
s['f'] = 88.5
s
s > 5
s[s > 2]
# s[s < 2 or s == 3]
s.shape
s.ndim
s.size
s.index
s.values
'a' in s
s.drop('a')
s
s.drop('a', inplace=True)
s
s + 2
s - 2
s * 2
np.power(s, 2)
# pd.DataFrame(data, index, columns, dtype, copy)
# empty data frame
df = pd.DataFrame()
df
data = [7, 8, 44, 56, 23, 98]
df = pd.DataFrame(data)
df
data = [["dog", 'bhaw bhaw'], ['cat', 'meow'], ['cow', 'moo']]
df = pd.DataFrame(data)
df
df = pd.DataFrame(data, columns=["animal_name", 'animal_sound'])
df
data = {'name': ['ram', 'rahul', 'bhwana'], "age": [25, 20, 18]}
df = pd.DataFrame(data)
df
df = pd.DataFrame(data, index=['a', 'b', 'c'])
df
data = {'karan_marks': pd.Series([50, 77, 65], index=['test1', 'test2', 'test3']),
"arjun_marks": pd.Series([67, 89, 32, 98, 50], index=['test-1', 'ptest-1', 'test-2', 'ptest-2', 'test-3'])}
df = pd.DataFrame(data)
df
data = [{"a": 1, "b": 2}, {"c": 3, "d": 4}, {"e": 5, "f": 6}]
df = pd.DataFrame(data)
df
data = [{"a": 1, "b": 2, "c": 90}, {"c": 3, "d": 4}, {"e": 5, "f": 6}]
df = pd.DataFrame(data, index=['x', 'y', 'z'])
df
data = [10, 20, 30, 40, 50, 60, 70]
data1 = [100, 200, 300, 400, 500, 600, 700]
df = pd.DataFrame([data, data1], columns=['a', 'b', 'c', 'd', 'e', 'f', 'g'])
df
df['b']
df['h'] = [56, 78]
df
df['m'] = pd.Series([56, 78, 89])
df
df['n'] = pd.Series([56])
df
del df['n']
df
# df.pop()
df[['c']]
df[['e', 'a', 'f']]
data = {'apple': [102, 500, 250, 75], "grapes": [50, 150, 90, 45]}
df = pd.DataFrame(data)
df
df[['grapes']]
df['bananas'] = df['grapes'] + 50
df
df.drop('bananas', axis=1, inplace=True)
df
df['pear'] = df['grapes'] + 100
df['mangos'] = df['grapes'] + 500
df['jackfruit'] = df['grapes'] + 150
df['strawberry'] = df['grapes'] + 200
df['oranges'] = df['grapes'] + 70
df
# loc & iloc
# loc -> works with rows
df.loc[3]
# iloc -> index name
df.iloc[[3]]
student_marks = {
"name": ['rahul', 'rohit', 'venkat', 'sahil', 'ankita'],
"OS": [25, 75.5, 96, 0, 100],
"DBMS": [20, 80, 60, 50, 99]
}
df1 = pd.DataFrame(student_marks)
df1
data2 = {"name": "robby", "OS": 60.5, "DBMS": 55.5}
df1 = df1.append(data2, ignore_index=True)
df1
data3 = {"name": "venom", "OS": 80.5, "DBMS": 75}
df3 = pd.DataFrame(data3, index=[0])
df3
df1 = pd.concat([df1, df3], ignore_index=True)
df1
df1.iloc[[0]]
df1.loc[[4]]
df1.loc[4] = ['steve', 67, 23]
df1
# del, pop, drop
df1.loc[[6]]
df1.drop(6, inplace=True)
df1
lis = [1, 2, 3, 4, 5, 6, 7]
df1
df
df1
df1.head()
df1.tail()
df1.head(2)
df1.dtypes
df1.empty
df1.ndim
df1.shape
df1.size
df1.values
df1.keys()
df1.describe()
df1.T
df1.iloc[1]
df1.sum()
df1['OS'].sum()
df1['OS'].mean()
data1 = np.random.rand(10, 3)
data2 = np.random.rand(7, 3)
df1 = pd.DataFrame(data1, columns=['col1', 'col2', 'col3'])
df2 = pd.DataFrame(data2, columns=['col1', 'col2', 'col3'])
df1
df2
df3 = pd.concat([df1, df2], ignore_index=True)
df3
for i in df3.col3:
print(i)
for key, value in df3.iterrows():
print(key, value)
unsorted_df = pd.DataFrame(np.random.randn(10, 3), index=[5, 7, 3, 9, 2, 1, 9, 11, 21, 4],
columns=['col1', 'col2', 'col3'])
unsorted_df
unsorted_df.sort_index()
unsorted_df.sort_index(ascending=False)
unsorted_df.sort_index(axis=1)
df = pd.DataFrame({"apples": [50, 95, 105, 75], "grapes": [110, 45, 99, 0]})
df
df.sort_values(by='apples')
df.sort_values(by=['apples', 'grapes'])
df.sort_values(by=['apples', 'grapes'], kind='mergesort')
df3.drop([8, 16])
df3.col2
df3['col2']
df3['col2'].cov(df3['col3'])
df3['col2'].corr(df3['col3'])
df3.corr
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'b', 'd'],
columns=['col1', 'col2', 'col3'])
df
df = df.reindex(['a', 'b', 'c', 'd', 'e'])
df
df.isnull()
df.notnull()
df.isnull().sum()
df.count()
df.fillna(0)