Name: Saurabh Dinesh Chaudhari || Div: TEA || Roll No: TEA27

Compute Estimators of the main statistical measures like Mean, Variance, Standard Deviation, Covariance, Correlation and Standard error with respect to any example. Display graphically the distribution of samples.

import pandas as pd import numpy as np import matplotlib.pyplot as plt np.random.seed(5) x = np.random.randint(10,70,10) y = np.random.randint(20,40,10) x.sort() y.sort()

from array import array array=([18, 19, 24, 26, 45, 46, 48, 49, 57, 64])

array=([27, 27, 32, 32, 33, 35, 36, 36, 36, 37])

def calc_mean(dataset): ''' Def : Mean is defined as the arithmetic average of a population. Formula : (sum of obesrvations)∕(No. of observations) ''' return dataset.sum()/len(dataset)

def calc_variance(dataset,mean): ''' Def : Variance is the degree of variation∕spread in the dataset. Formula : 1) Σ((X - X_mean)^2) ∕ n ''' squared_diff = np.square(dataset-mean) return calc_mean(squared_diff)

def calc_SD(variance): ''' Def : 1) Standard deviation is the amount of deviation of points around the mean. 2) Variation but in terms of the actual dataset. Formula : √(variance) ''' return np.sqrt(variance)

def calc_covariance(dataset1,dataset2): ''' Def : Covariance measures the relationship trend between two sets of data. Formula : 1) Σ((X - X_mean)*(Y - Y_mean)) ∕ n ''' mean1 = calc_mean(dataset1) mean2 = calc_mean(dataset2) return np.sum(np.multiply(dataset1-mean1,dataset2-mean2))/len(dataset1)

def calc_correlation(dataset1,dataset2): ''' Def : Covariance measures the relationship trend between two sets of data. Formula : 1) Σ((X - X_mean)*(Y - Y_mean)) ∕ √(Σ(X - X_mean)^2*Σ(Y - Y_mean)^2) ''' mean1 = calc_mean(dataset1) mean2 = calc_mean(dataset2) num = np.sum(np.multiply(dataset1-mean1,dataset2-mean2)) de = np.multiply(np.sum(np.square(dataset1-mean1)),np.sum(np.square(dataset2-mean2))) return num/np.sqrt(de)

def calc_SE(dataset,sd): ''' Def : The standard error is a statistical term that easures the accuracy with which a sample distribution represents a population by using standard deviation. Formula : Standard_deviation ∕ √(n) ''' return sd/np.sqrt(len(dataset))

mean = calc_mean(x) mean2 = calc_mean(y) variance = calc_variance(x,mean) S_D = calc_SD(variance) covariance = calc_covariance(x,y) correlation = calc_correlation(x,y) S_E = calc_SE(x,S_D) print(mean,mean2,variance,S_D,covariance,correlation,S_E)

Dataset

plt.plot(x,"mo:",label="dataset 1") plt.plot(y,"go:",label = "dataset 2") plt.legend(loc="upper left")

Mean

plt.plot(x,"mo:",label="Dataset 1") plt.axhline(mean,color='b',marker= 'o', linestyle=':',label="Mean") plt.legend(loc="upper left")

plt.plot(y,"ro:",label="Dataset 2") plt.axhline(mean2,color='g',marker= 'o', linestyle=':',label="Mean") plt.legend(loc="upper left")

Correlation

corr = np.correlate(x, y, "same") plt.plot(list(corr),"go:",label = "Correlation")

Variance

plt.plot(x,"mo:",label="Dataset 1") plt.axhline(mean,color='g',marker= 'o', linestyle=':',label="Mean") plt.axhline(variance,color='b',marker= 'o', linestyle=':',label="Variance") plt.legend(loc="upper left")

Covariance

plt.plot(y,"ro:",label="Dataset 2") plt.plot(x,"go:",label="Daraset 1") plt.axhline(covariance,color='b',marker= 'o', linestyle=':',label="covariance")