import pandas as pd
import numpy as np
import os
% matplotlib inline
UsageError: Line magic function `%` not found.
df = pd.read_csv('weather.csv') #read in the data
df.shape #get the dimensions of the data how many rows and colums
df.info() # data type per column , memory usage
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8784 entries, 0 to 8783
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 MONTH 8784 non-null int64
1 DAY 8784 non-null int64
2 TIME 8784 non-null int64
3 TEMP 8784 non-null float64
4 PRESSURE 8784 non-null int64
dtypes: float64(1), int64(4)
memory usage: 343.2 KB
df.head(50) # inspect the beginning of the dataframe to se what the data actually looks like
df.tail(50) # to inspect the end of the Data frame
df.describe() # get a high level overview of important statistics
df.max() # go further than just describe and get specific statistics
df['PRESSURE'].min() # go further than just describe and get specific statistics can be used on series and and dataframes
https://pandas.pydata.org/docs/reference/index.html # Pandas documentation
Execution Error
SyntaxError: invalid syntax (<ipython-input-13-edfd09aaec23>, line 1)
df["TEMP"].mode()
df["TEMP"].plot() # simple plots
df["TEMP"].plot.hist()
df["TEMP"].plot.hist(bins=100)