NumPy

NumPy#

NumPy is a fundamental library for computation in Python.

Additional resources:

import numpy as np

Lets’s compare regular Python lists and NumPy arrays.

# A list is created with [.., ..]
myvals = [1.0, 2.0, 1.5]
myvals

[1.0, 2.0, 1.5]

type(myvals)

list

myvals_np = np.array([1.2, 3.0, 4.0])

myvals_np

array([1.2, 3. , 4. ])

type(myvals_np)

numpy.ndarray

myvals_np.dtype

dtype('float64')

myvals_np.sum()

np.float64(8.2)

x = np.array([1.0,1.5, 2.0, 5.3]) 
x

array([1. , 1.5, 2. , 5.3])

x[1]

np.float64(1.5)

x[-1]

np.float64(5.3)

x[1] = 2.0 # modify the second value in the array
x

array([1. , 2. , 2. , 5.3])

x[:2]

array([1., 2.])

Inline exercise

Python is a general purpose language not designed with numerical computing in mind.

However, NumPy is designed for numerical computing!

[1.2, 4.5] + [2.3, 4.3] # is this the result you expected??

[1.2, 4.5, 2.3, 4.3]

np.array([1.2, 4.5]) + np.array([2.3, 4.3]) 

array([3.5, 8.8])

np.array([1.2, 4.5]) * np.array([2.3, 4.3]) 

array([ 2.76, 19.35])

Note for Matlab users, all operators such as * are element wise

np.array([1.2, 4.5]) @ np.array([2.3, 4.3]) # in case you actually wanted to do a dot product

np.float64(22.11)

x = np.arange(5, 100, 5)
x

array([ 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
       90, 95])

x.dtype # Integers!

dtype('int64')

x + 1 # add 1!

array([ 6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86,
       91, 96])

x = x + 3.0 # add a float to some integers, can we do that?
x

array([ 8., 13., 18., 23., 28., 33., 38., 43., 48., 53., 58., 63., 68.,
       73., 78., 83., 88., 93., 98.])

x.dtype # but now it became floats!

dtype('float64')

xr = np.random.random(10)
xr

array([0.86383357, 0.70528419, 0.76139918, 0.21792628, 0.58735376,
       0.01032422, 0.07407453, 0.36980574, 0.11765277, 0.33692533])

xr.mean()

np.float64(0.4044579548799757)

xr.std()

np.float64(0.2914940161095131)

xr.max()

np.float64(0.8638335650759967)

xr - xr.mean()

array([ 0.45937561,  0.30082624,  0.35694122, -0.18653168,  0.1828958 ,
       -0.39413374, -0.33038343, -0.03465222, -0.28680519, -0.06753263])

xn = np.random.normal(loc=5.0, scale=2.0, size=100)
xn[30] = 99.0

mu = xn.mean()
sigma = xn.std()

xn[xn < mu - 3*sigma]

array([], dtype=float64)

xn[xn > mu + 3*sigma]

array([99.])

NumPy has support for missing values.

y = np.random.random(10)
y

array([0.45423869, 0.08890736, 0.00753467, 0.14937786, 0.86493399,
       0.49444554, 0.21946295, 0.58396526, 0.20913139, 0.5617303 ])

y[5:] = np.nan
y

array([0.45423869, 0.08890736, 0.00753467, 0.14937786, 0.86493399,
              nan,        nan,        nan,        nan,        nan])

y.mean()

np.float64(nan)

np.nanmean(y)

np.float64(0.3129985122418747)

y * np.pi

array([1.42703293, 0.2793107 , 0.02367085, 0.46928439, 2.71727026,
              nan,        nan,        nan,        nan,        nan])

z = np.random.normal(loc=0.0, scale=3.0, size=10)

z_sorted = np.sort(z)
z_sorted

array([-2.17847791, -1.14279691, -1.01844545, -0.74729164,  2.12605903,
        2.38050046,  2.38775808,  3.04072875,  3.48243689,  4.15594908])

z<0.0

array([ True,  True, False,  True, False, False, False, False,  True,
       False])

z_sorted<0.0

array([ True,  True,  True,  True, False, False, False, False, False,
       False])

z_sorted[z_sorted<0.0]

array([-2.17847791, -1.14279691, -1.01844545, -0.74729164])

z_sorted[z_sorted<0.0] = 0.0

z_sorted

array([0.        , 0.        , 0.        , 0.        , 2.12605903,
       2.38050046, 2.38775808, 3.04072875, 3.48243689, 4.15594908])

np.where(z<0.0)

(array([0, 1, 3, 8]),)

xn = np.random.normal(loc=5.0, scale=2.0, size=100)

xn[30] = 99.0 # outlier

median = np.median(xn)
sigma = xn.std()

sigma # sample std affected by outlier

np.float64(9.536367799052742)

xn[xn > median + 3*sigma] # but 1 abnormally high value

array([99.])

xn[xn > median + 3*sigma] = np.nan

np.nanstd(xn) # much closer to the true std==2.0

np.float64(1.8457891591124758)

X = np.array([
              [0.0, 1.0, 2.0],
              [3.0, 4.0, 5.0]
])
    

array([[0., 1., 2.],
       [3., 4., 5.]])

X.shape

(2, 3)

nrows = X.shape[0]
nrows

ncols = X.shape[1]
ncols

X[0,0]

np.float64(0.0)

X[1,1]

np.float64(4.0)

X[-1,-1]

np.float64(5.0)

X[0,:]

array([0., 1., 2.])

X[0]

array([0., 1., 2.])

X.mean()

np.float64(2.5)

colmeans = X.mean(axis=0)
colmeans

array([1.5, 2.5, 3.5])

colmeans.shape

(3,)

rowmeans = X.mean(axis=1)
rowmeans

array([1., 4.])

X - colmeans

array([[-1.5, -1.5, -1.5],
       [ 1.5,  1.5,  1.5]])

# X - rowmeans    # executing this will fail

NumPy broadcasting (detailed explanation of how arrays can be used in expressions)

R = rowmeans[:, np.newaxis] # add a new dimension to create a 2D array
R

array([[1.],
       [4.]])

np.expand_dims(rowmeans, 1) # same result

array([[1.],
       [4.]])

X.shape

(2, 3)

R.shape

(2, 1)

X - R

array([[-1.,  0.,  1.],
       [-1.,  0.,  1.]])

x = X.flatten()

array([0., 1., 2., 3., 4., 5.])

x.reshape(2,3)

array([[0., 1., 2.],
       [3., 4., 5.]])