Numpy
Numpy data Types
import numpy as np
arr = np.array([1,2,3,4,5])
print(arr.dtype)
change the data type of an array
import numpy as np
arr2 = np.array([1,2,3,4,5], dtype = "float64")
print(arr2.dtype)
print(arr2)
#import numpy as np
#creating the array
arr3 = np.array([1,2,3,4,5])
#changing the array data type
arr3 = arr3.astype(np.float64)
#now print the array and its data type
print(arr3)
print(arr3.dtype)
#import numpy as np
#creating the array
arr4 = np.array([0,1,2,3,4,5])
#changing the array data type
arr4 = arr4.astype(np.bool_)
#now print the array and its data type
print(arr4)
print(arr4.dtype)
print("note that the only false value is that one corresponding to 0")
#import numpy as np
#creating the array
arr5 = np.array([0,1,2,3,4,5])
#changing the array data type
arr5 = arr5.astype(np.string_)
#now print the array and its data type
print(arr5)
print(arr5.dtype)
#import numpy as np
#creating the array of strings
arr6 = np.array(["0","1","2","3","4","5"])
#changing the array data type
arr6 = arr6.astype(np.int8)
#now print the array and its data type
print(arr6)
print(arr6.dtype)
Numpy data type & definition
Operators & indexing
Mathematical operators in numpy
x = [1, 2, 3, 4, 5]
print(x)
# first step
import numpy as np
# this is how you transform a list into an array
x = np.array(x)
#now we use the ** operator on the array without getting an error
print(type(x))
print(x**2)
#import numpy as np
a = np.array([1, "is", True])
print(type(a))
print(type(a[0])) #first element
print("note that everything will be turned into a string")
print(type(a[2])) #third element
Performable operations with Numpy Arrays
#import numpy as np
firstlist = [1, 2, 3]
numpylist = np.array(firstlist)
print(firstlist + firstlist)
print(numpylist + numpylist)
Numpy Indexing
#import numpy as np
#creating a 1D array
crazylist = [1,2,3,4,5,6,7,8,9,10,11,12]
crazyarray = np.array(crazylist)
#let´s print the first element of the array
print(crazyarray[0])
#let´s slice from the 2nd to the 5th elements
print(crazyarray[1:6])
#slice from second object to the last
print(crazyarray[1:])
#slice from the beginning until the sixth element
print(crazyarray[:7])
#import numpy as np
crazylist = [1,2,3,4,5,6,7,8,9,10,11,12]
crazyarray = np.array(crazylist)
#let´s print an array of conditions
conditionals = crazyarray<3
print(conditionals)
Subset with conditions
import numpy as np
crazylist = [1,2,3,4,5,6,7,8,9,10,11,12]
crazyarray = np.array(crazylist)
#let´s say that i want to print all the numbers less than 6
#this is the condition
crazyarray < 6
#this is the actual array that i want to print (subset the condition)
print(crazyarray[crazyarray < 6])
#if we want to set multiple conditions
print(crazyarray[(crazyarray>3) & (crazyarray<7)])
#import numpy as np
crazylist = [1,2,3,4,5,6,7,8,9,10,11,12]
crazyarray = np.array(crazylist)
#three steps sequence
print(crazyarray[::3])
2D Numpy arrays (just arrays with rows and columns)
creating a 2D array
my_array = np.array( [ [1,2,3] , [4,5,6] , [7,8,9] ] )
print(my_array)
#now print the number of rows and columns
print("this array has the following amount of rows and columns respectively:")
print(my_array.shape)
import numpy as np
np_height = [1.73, 1.68, 1.71, 1.89, 1.79]
np_weight = [65.4, 59.2, 63.6, 88.4, 68.7]
#this is the 2D array
np_bigarray = np.array([np_height, np_weight])
print(np_bigarray)
print("rows and columns respectively are:")
print(np_bigarray.shape)
how to subset in a 2D array
#import numpy as np
np_height = [1.73, 1.68, 1.71, 1.89, 1.79]
np_weight = [65.4, 59.2, 63.6, 88.4, 68.7]
np_bigarray = np.array([np_height, np_weight])
b = np_bigarray[0,0] #note that this will be element located in first row and first col
alsob = np_bigarray[0][0] #same thing but with another structure
print(b)
print(alsob)
row-col subsetting
#import numpy as np
np_height = [1.73, 1.68, 1.71, 1.89, 1.79]
np_weight = [65.4, 59.2, 63.6, 88.4, 68.7]
np_bigarray = np.array([np_height, np_weight])
element = np_bigarray[1,2] #note that this will be element located in row 2 and col 3
print(element)
if element == 63.6:
print("Good job")
else:
print("F bro")
full row subsetting
#import numpy as np
np_height = [1.73, 1.68, 1.71, 1.89, 1.79]
np_weight = [65.4, 59.2, 63.6, 88.4, 68.7]
np_bigarray = np.array([np_height, np_weight])
#first row
print(np_bigarray[0])
full column subsetting
#import numpy as np
np_height = [1.73, 1.68, 1.71, 1.89, 1.79]
np_weight = [65.4, 59.2, 63.6, 88.4, 68.7]
np_bigarray = np.array([np_height, np_weight])
#first column
print(np_bigarray[ : , 0])
#third and fourth column
print(np_bigarray[ : , 2:4])
# 2:4 means:
#"take from column index 2 (which is actual column 3) to column index 3 (which is actual column 4)"
#btw the column index 4 is not included
Numpy statistical functions
array = np.array([
[1.64, 71.78],
[1.37, 63.35],
[1.6 , 55.09],
[2.04, 74.85],
[2.04, 68.72],
[2.01, 73.57]
])
#calculate mean from first column
print("mean:")
print(np.mean(array[:, 0]))
#calculate median from first column
print("median:")
print(np.median(array[:, 0]))
#calculate correlation between first and second col
print("correlation:")
print(np.corrcoef(array[:, 0], array[:, 1] ))
#calculate stdev from first column
print("stdev:")
print(np.std(array[:, 0]))
Copy an array (BE CAREFUL)
my_array = np.arange(0, 11)
print(my_array)
sub_array = my_array[0:6]
print(sub_array)
sub_array[:] = 0
print(sub_array)
print(my_array)
my_array_copy = my_array.copy()
print("before transformation:")
print("my_array", my_array)
print("my_array_copy", my_array_copy)
#let's transform my_array
my_array[:] = 0
print("after transformation:")
print("my_array", my_array)
print("my_array_copy", my_array_copy)
Numpy Useful Methods
Generate data with a normal distribution
import numpy as np
#let´s create an array of height with mean 1.75 and sd = 0.20 for 5000 people
height = np.random.normal(1.75, 0.20, 5000)
print(height)
Generate a random number between 0 and 1
#with np.random.rand() you generate a random number between 0 and 1
import numpy as np
print(np.random.rand())
#if you add an argument n, it will generate an array containing n random numbers between 0 and 1
print( np.random.rand(5) )
#if you add two arguments m,n. It will generate an array of random numbers between 0 & 1 and
#m columns x n rows
print( np.random.rand(5,7) )
Generate a random number between a and b
import numpy as np
#random between a & b
print( np.random.randint(1,20) )
#1 = a & 20 = b
#random ARRAY between a & b
print( np.random.randint(1,20, (5,5)) )
#the last tuple (5,5) just indicates the dimensions of the array
Concatenate arrays
import numpy as np
a = np.array([[1,2], [3,4]])
b = np.array([5,6])
b = np.expand_dims(b, axis=0)
c = np.concatenate( (a, b), axis=0 )
print(a)
print(b)
print(c)
merge two arrays into a 2D array
import numpy as np
#let´s create an array of height with mean 1.75 and sd = 0.20 for 5000 people
height = np.random.normal(1.75, 0.20, 5000)
#let´s create an array of weight with mean 60.32 and sd = 15 for 5000 people
weight = np.random.normal(60.32, 15, 5000)
np_city = np.column_stack((height, weight))
print(np_city)
Generate an array given an interval
#Let´s create an array from 0 to 100
import numpy as np
a = np.arange(0, 101)
print(a)
#now let´s create an array from 0 to 100 every 2 steps
b = np.arange(0, 101, 2)
print(b)
Create useful arrays
import numpy as np
#this is a zero matrix
zerosmatrix = np.zeros((10, 10)) #the arguments are the dimensions, 10 rows & 10 cols
print(zerosmatrix)
#this is a ones matrix
onesmatrix = np.ones((10, 10)) #the arguments are the dimensions, 10 rows & 10 cols
print(onesmatrix)
#and now let´s get an equally distributed matrix
eqdistmat = np.linspace(0, 11, 6) #arguments mean (from cero, to 11, 6 numbers)
print(eqdistmat)
#you can also create the idenity matrix
identitymatrix = np.eye(3) #the identity matrix will be 3x3 in this case
print(identitymatrix)
Know the array dimensions
# you can use the np.shape() method to know the dimensions of the array.
import numpy as np
#let´s create a 1D array
firstarray = np.array([1,2,3,4,5])
print(firstarray.shape)
# notice that this has 5 columns and 1 row
#now let´s do a 2D array
secondarray = np.array(
[
[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]
]
)
print(secondarray.shape)
#note that this array is 3 rows & 4 cols
Reshape an array
import numpy as np
# let´s modify the dimensions of an array
originalarray = np.array(
[
[1,2],
[3,4],
[5,6]
]
)
print(originalarray)
#let´s reshape to 1 row and 6 col
firstmodarray = originalarray.reshape(1,6)
print(firstmodarray)
#let´s reshape to 2 rows and 3 cols
secmodarray = np.reshape(originalarray,(2,3), "C")
#IMPORTANT: third argument indicates "organize as the C language does"
#C is the base of python
print(secmodarray)
#let´s reshape to 2 rows and 3 cols
thirdmodarray = np.reshape(originalarray,(2,3), "F")
#IMPORTANT: third argument indicates "organize as the Fortran language does"
print(thirdmodarray)
#let´s reshape to 2 rows and 3 cols
thirdmodarray = np.reshape(originalarray,(2,3), "A")
#IMPORTANT: third argument indicates "organize as the language with the most optimum storage"
#in my device
print(thirdmodarray)
Numpy main functions
.max() .min() .argmax() & .argmin()
import numpy as np
#let's create an array
arr = np.random.randint(1, 20, 10)
print("array:")
print(arr)
#let's create a matrix
matrixx = arr.reshape(2, 5)
print("matrix:")
print(matrixx)
#.max(): in an array, returns the largest number in our array
print("array maximum element:")
print(arr.max())
#.max(): in a matrix, returns the largest number in the given the dimension
print(".max() column level:")
print(matrixx.max(0))
print(".max() row level:")
print(matrixx.max(1))
#.argmax(): returns the index that contains the largest number given the dimension
print(".argmax() column level:")
print(matrixx.argmax(0))
print(".argmax() row level:")
print(matrixx.argmax(1))
.ptp()
import numpy as np
#let's create an array
arr = np.random.randint(1, 20, 10)
print("array:")
print(arr)
#let's create a matrix
matrixx = arr.reshape(2, 5)
print("matrix:")
print(matrixx)
#ptp():tells you what is the peak to peak distance (meaning maxvalue - minvalue)
print("given the array", arr, ", the .ptp() method returns:" )
print(arr.ptp())
print("which equals", arr.max(), "-", arr.min())
#in a matrix
#column level
print(".ptp() in column level:")
print(matrixx.ptp(0))
#row level
print(".ptp() in row level:")
print(matrixx.ptp(1))
.percentile()
import numpy as np
#let's create an array
arr = np.random.randint(1, 20, 10)
print("array:")
print(arr)
#let's create a matrix
matrixx = arr.reshape(2, 5)
print("matrix:")
print(matrixx)
#percentile zero
print(np.percentile(arr, 0))
#percentile fifty
print(np.percentile(arr, 50))
#percentile 100
print(np.percentile(arr, 100))