# Start writing code here..
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
df = pd.read_csv('cars.csv')
df
df = df.sample(n=50, replace=True)
df
df["price_usd"].mean()
df["price_usd"].std()
df.dtypes
df.describe()
df.head()
df["price_usd"].mean()
mu, sigma = 6350, 627.27
s = np.random.normal(mu, sigma, 1000)
count, bins, ignored = plt.hist(s, 20, normed=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
np.exp( - (bins - mu)**2 / (2 * sigma**2) ), linewidth=3, color='y')
plt.show()
mu = 5
std = 10
snd = stats.norm(mu, std)
x = np.linspace(-100, 100, 1000)
plt.figure(figsize=(7.5,7.5))
plt.plot(x, snd.pdf(x))
plt.xlim(-60, 60)
plt.title('Normal Distribution (Mean = 5, STD = 10)', fontsize='15')
plt.xlabel('Values of Random Variable X', fontsize='15')
plt.ylabel('Probability', fontsize='15')
plt.show()
mu, sigma = 6350, 627.2 # mean and standard deviation
s = np.random.normal(mu, sigma, 50)
abs(mu - np.mean(s))
0.0 # may vary
abs(sigma - np.std(s, ddof=1))
0.1 # may vary
import matplotlib.pyplot as plt
count, bins, ignored = plt.hist(s, 30, density=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
np.exp( - (bins - mu)**2 / (2 * sigma**2) ),
linewidth=2, color='r')
plt.show()