Numpy

Numpy data Types

import numpy as np arr = np.array([1,2,3,4,5]) print(arr.dtype)

change the data type of an array

import numpy as np arr2 = np.array([1,2,3,4,5], dtype = "float64") print(arr2.dtype) print(arr2)

#import numpy as np #creating the array arr3 = np.array([1,2,3,4,5]) #changing the array data type arr3 = arr3.astype(np.float64) #now print the array and its data type print(arr3) print(arr3.dtype)

#import numpy as np #creating the array arr4 = np.array([0,1,2,3,4,5]) #changing the array data type arr4 = arr4.astype(np.bool_) #now print the array and its data type print(arr4) print(arr4.dtype) print("note that the only false value is that one corresponding to 0")

#import numpy as np #creating the array arr5 = np.array([0,1,2,3,4,5]) #changing the array data type arr5 = arr5.astype(np.string_) #now print the array and its data type print(arr5) print(arr5.dtype)

#import numpy as np #creating the array of strings arr6 = np.array(["0","1","2","3","4","5"]) #changing the array data type arr6 = arr6.astype(np.int8) #now print the array and its data type print(arr6) print(arr6.dtype)

Numpy data type & definition

Operators & indexing

Mathematical operators in numpy

x = [1, 2, 3, 4, 5] print(x)

# first step import numpy as np # this is how you transform a list into an array x = np.array(x) #now we use the ** operator on the array without getting an error print(type(x)) print(x**2)

#import numpy as np a = np.array([1, "is", True]) print(type(a)) print(type(a[0])) #first element print("note that everything will be turned into a string") print(type(a[2])) #third element

Performable operations with Numpy Arrays

#import numpy as np firstlist = [1, 2, 3] numpylist = np.array(firstlist) print(firstlist + firstlist) print(numpylist + numpylist)

Numpy Indexing

#import numpy as np #creating a 1D array crazylist = [1,2,3,4,5,6,7,8,9,10,11,12] crazyarray = np.array(crazylist) #let´s print the first element of the array print(crazyarray[0]) #let´s slice from the 2nd to the 5th elements print(crazyarray[1:6]) #slice from second object to the last print(crazyarray[1:]) #slice from the beginning until the sixth element print(crazyarray[:7])

#import numpy as np crazylist = [1,2,3,4,5,6,7,8,9,10,11,12] crazyarray = np.array(crazylist) #let´s print an array of conditions conditionals = crazyarray<3 print(conditionals)

Subset with conditions

import numpy as np crazylist = [1,2,3,4,5,6,7,8,9,10,11,12] crazyarray = np.array(crazylist) #let´s say that i want to print all the numbers less than 6 #this is the condition crazyarray < 6 #this is the actual array that i want to print (subset the condition) print(crazyarray[crazyarray < 6]) #if we want to set multiple conditions print(crazyarray[(crazyarray>3) & (crazyarray<7)])

#import numpy as np crazylist = [1,2,3,4,5,6,7,8,9,10,11,12] crazyarray = np.array(crazylist) #three steps sequence print(crazyarray[::3])

2D Numpy arrays (just arrays with rows and columns)

creating a 2D array

my_array = np.array( [ [1,2,3] , [4,5,6] , [7,8,9] ] ) print(my_array) #now print the number of rows and columns print("this array has the following amount of rows and columns respectively:") print(my_array.shape)

import numpy as np np_height = [1.73, 1.68, 1.71, 1.89, 1.79] np_weight = [65.4, 59.2, 63.6, 88.4, 68.7] #this is the 2D array np_bigarray = np.array([np_height, np_weight]) print(np_bigarray) print("rows and columns respectively are:") print(np_bigarray.shape)

how to subset in a 2D array

#import numpy as np np_height = [1.73, 1.68, 1.71, 1.89, 1.79] np_weight = [65.4, 59.2, 63.6, 88.4, 68.7] np_bigarray = np.array([np_height, np_weight]) b = np_bigarray[0,0] #note that this will be element located in first row and first col alsob = np_bigarray[0][0] #same thing but with another structure print(b) print(alsob)

row-col subsetting

#import numpy as np np_height = [1.73, 1.68, 1.71, 1.89, 1.79] np_weight = [65.4, 59.2, 63.6, 88.4, 68.7] np_bigarray = np.array([np_height, np_weight]) element = np_bigarray[1,2] #note that this will be element located in row 2 and col 3 print(element) if element == 63.6: print("Good job") else: print("F bro")

full row subsetting

#import numpy as np np_height = [1.73, 1.68, 1.71, 1.89, 1.79] np_weight = [65.4, 59.2, 63.6, 88.4, 68.7] np_bigarray = np.array([np_height, np_weight]) #first row print(np_bigarray[0])

full column subsetting

#import numpy as np np_height = [1.73, 1.68, 1.71, 1.89, 1.79] np_weight = [65.4, 59.2, 63.6, 88.4, 68.7] np_bigarray = np.array([np_height, np_weight]) #first column print(np_bigarray[ : , 0]) #third and fourth column print(np_bigarray[ : , 2:4]) # 2:4 means: #"take from column index 2 (which is actual column 3) to column index 3 (which is actual column 4)" #btw the column index 4 is not included

Numpy statistical functions

array = np.array([ [1.64, 71.78], [1.37, 63.35], [1.6 , 55.09], [2.04, 74.85], [2.04, 68.72], [2.01, 73.57] ]) #calculate mean from first column print("mean:") print(np.mean(array[:, 0])) #calculate median from first column print("median:") print(np.median(array[:, 0])) #calculate correlation between first and second col print("correlation:") print(np.corrcoef(array[:, 0], array[:, 1] )) #calculate stdev from first column print("stdev:") print(np.std(array[:, 0]))

Copy an array (BE CAREFUL)

my_array = np.arange(0, 11) print(my_array)

sub_array = my_array[0:6] print(sub_array)

sub_array[:] = 0 print(sub_array)

print(my_array)

my_array_copy = my_array.copy() print("before transformation:") print("my_array", my_array) print("my_array_copy", my_array_copy) #let's transform my_array my_array[:] = 0 print("after transformation:") print("my_array", my_array) print("my_array_copy", my_array_copy)

Numpy Useful Methods

Generate data with a normal distribution

import numpy as np #let´s create an array of height with mean 1.75 and sd = 0.20 for 5000 people height = np.random.normal(1.75, 0.20, 5000) print(height)

Generate a random number between 0 and 1

#with np.random.rand() you generate a random number between 0 and 1 import numpy as np print(np.random.rand()) #if you add an argument n, it will generate an array containing n random numbers between 0 and 1 print( np.random.rand(5) ) #if you add two arguments m,n. It will generate an array of random numbers between 0 & 1 and #m columns x n rows print( np.random.rand(5,7) )

Generate a random number between a and b

import numpy as np #random between a & b print( np.random.randint(1,20) ) #1 = a & 20 = b #random ARRAY between a & b print( np.random.randint(1,20, (5,5)) ) #the last tuple (5,5) just indicates the dimensions of the array

Concatenate arrays

import numpy as np a = np.array([[1,2], [3,4]]) b = np.array([5,6]) b = np.expand_dims(b, axis=0) c = np.concatenate( (a, b), axis=0 ) print(a) print(b) print(c)

merge two arrays into a 2D array

import numpy as np #let´s create an array of height with mean 1.75 and sd = 0.20 for 5000 people height = np.random.normal(1.75, 0.20, 5000) #let´s create an array of weight with mean 60.32 and sd = 15 for 5000 people weight = np.random.normal(60.32, 15, 5000) np_city = np.column_stack((height, weight)) print(np_city)

Generate an array given an interval

#Let´s create an array from 0 to 100 import numpy as np a = np.arange(0, 101) print(a) #now let´s create an array from 0 to 100 every 2 steps b = np.arange(0, 101, 2) print(b)

Create useful arrays

import numpy as np #this is a zero matrix zerosmatrix = np.zeros((10, 10)) #the arguments are the dimensions, 10 rows & 10 cols print(zerosmatrix) #this is a ones matrix onesmatrix = np.ones((10, 10)) #the arguments are the dimensions, 10 rows & 10 cols print(onesmatrix) #and now let´s get an equally distributed matrix eqdistmat = np.linspace(0, 11, 6) #arguments mean (from cero, to 11, 6 numbers) print(eqdistmat) #you can also create the idenity matrix identitymatrix = np.eye(3) #the identity matrix will be 3x3 in this case print(identitymatrix)

Know the array dimensions

# you can use the np.shape() method to know the dimensions of the array. import numpy as np #let´s create a 1D array firstarray = np.array([1,2,3,4,5]) print(firstarray.shape) # notice that this has 5 columns and 1 row #now let´s do a 2D array secondarray = np.array( [ [1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12] ] ) print(secondarray.shape) #note that this array is 3 rows & 4 cols

Reshape an array

import numpy as np # let´s modify the dimensions of an array originalarray = np.array( [ [1,2], [3,4], [5,6] ] ) print(originalarray) #let´s reshape to 1 row and 6 col firstmodarray = originalarray.reshape(1,6) print(firstmodarray) #let´s reshape to 2 rows and 3 cols secmodarray = np.reshape(originalarray,(2,3), "C") #IMPORTANT: third argument indicates "organize as the C language does" #C is the base of python print(secmodarray) #let´s reshape to 2 rows and 3 cols thirdmodarray = np.reshape(originalarray,(2,3), "F") #IMPORTANT: third argument indicates "organize as the Fortran language does" print(thirdmodarray) #let´s reshape to 2 rows and 3 cols thirdmodarray = np.reshape(originalarray,(2,3), "A") #IMPORTANT: third argument indicates "organize as the language with the most optimum storage" #in my device print(thirdmodarray)

Numpy main functions

.max() .min() .argmax() & .argmin()

import numpy as np #let's create an array arr = np.random.randint(1, 20, 10) print("array:") print(arr) #let's create a matrix matrixx = arr.reshape(2, 5) print("matrix:") print(matrixx) #.max(): in an array, returns the largest number in our array print("array maximum element:") print(arr.max()) #.max(): in a matrix, returns the largest number in the given the dimension print(".max() column level:") print(matrixx.max(0)) print(".max() row level:") print(matrixx.max(1)) #.argmax(): returns the index that contains the largest number given the dimension print(".argmax() column level:") print(matrixx.argmax(0)) print(".argmax() row level:") print(matrixx.argmax(1))

.ptp()

import numpy as np #let's create an array arr = np.random.randint(1, 20, 10) print("array:") print(arr) #let's create a matrix matrixx = arr.reshape(2, 5) print("matrix:") print(matrixx) #ptp():tells you what is the peak to peak distance (meaning maxvalue - minvalue) print("given the array", arr, ", the .ptp() method returns:" ) print(arr.ptp()) print("which equals", arr.max(), "-", arr.min()) #in a matrix #column level print(".ptp() in column level:") print(matrixx.ptp(0)) #row level print(".ptp() in row level:") print(matrixx.ptp(1))

.percentile()

import numpy as np #let's create an array arr = np.random.randint(1, 20, 10) print("array:") print(arr) #let's create a matrix matrixx = arr.reshape(2, 5) print("matrix:") print(matrixx) #percentile zero print(np.percentile(arr, 0)) #percentile fifty print(np.percentile(arr, 50)) #percentile 100 print(np.percentile(arr, 100))

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Numpy