12: Numpy - Mathematics#
import numpy as np
1. basic (most useful) methods of the ndarray class#
A = np.array([[1, 2, 3], [4, 5, 6]])
print(A.sum()) # sum all elements of the array
print(A.sum(axis=0)) # sum of the columns (sum over elements of the rows)
print(A.sum(axis=1)) # sum the rows (sum over the column elements)
print(A.cumsum(axis=0)) # performs the cumulative sum
print(A.prod()) # performs the product
print(A.cumprod()) # performs the cumulative product
print(A.min()) # find the minimum of the array
print(A.max()) # find the maximum of the array
print(A.mean()) # calculate the average
print(A.std()) # calculate the standard deviation,
print(A.var()) # calculate the variance
21
[5 7 9]
[ 6 15]
[[1 2 3]
[5 7 9]]
720
[ 1 2 6 24 120 720]
1
6
3.5
1.707825127659933
2.9166666666666665
A very important method : the argsort() method
A = np.random.randint(0, 10, [5, 5]) # random array
print(A)
[[1 1 8 6 5]
[0 9 2 9 5]
[7 4 7 9 3]
[8 9 0 6 0]
[7 8 0 3 8]]
print(A.argsort()) # return the indexes to sort each row of the array
[[0 1 4 3 2]
[0 2 4 1 3]
[4 1 0 2 3]
[2 4 3 0 1]
[2 3 0 1 4]]
print(A[:,0].argsort()) # return the indexes to sort the 0 column of A
[1 0 2 4 3]
A = A[A[:,0].argsort(), :] # sorts the columns of the array according to column 0.
A
array([[0, 9, 2, 9, 5],
[1, 1, 8, 6, 5],
[7, 4, 7, 9, 3],
[7, 8, 0, 3, 8],
[8, 9, 0, 6, 0]])
2. Numpy Statistics#
Pearson correlation :
B = np.random.randn(3, 3) # random numbers 3x3
# return the correlation matrix of B
print(np.corrcoef(B))
[[ 1. -0.97790106 0.05478707]
[-0.97790106 1. 0.15517786]
[ 0.05478707 0.15517786 1. ]]
# return the correlation matrix between rows 0 and 1 of B
print(np.corrcoef(B[:,0], B[:, 1]))
[[1. 0.58412611]
[0.58412611 1. ]]
np.unique() :
np.random.seed(0)
A = np.random.randint(0, 10, [5,5])
A
array([[5, 0, 3, 3, 7],
[9, 3, 5, 2, 4],
[7, 6, 8, 8, 1],
[6, 7, 7, 8, 1],
[5, 9, 8, 9, 4]])
np.unique(A)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
values, counts = np.unique(A, return_counts=True)
for i, j in zip(values[counts.argsort()], counts[counts.argsort()]):
print(f'value {i} appears {j}')
value 0 appears 1
value 2 appears 1
value 1 appears 2
value 4 appears 2
value 6 appears 2
value 3 appears 3
value 5 appears 3
value 9 appears 3
value 7 appears 4
value 8 appears 4
Statistical calculations in the presence of missing data (NaN)
A = np.random.randn(5, 5)
A[0, 2] = np.nan # insert a NaN in the matrix A
print('ratio NaN/zise:', (np.isnan(A).sum()/A.size)) # calculate the proportion of NaN in A
print('average without NaN:', np.nanmean(A)) # computes the average of A ignoring the NaN
ratio NaN/zise: 0.04
average without NaN: 0.06036087829219453
3. Linear Algebra#
A = np.ones((2,3))
B = np.ones((3,3))
print(A.T) # transpose of the matrix A (it is an attribute of ndarray)
[[1. 1.]
[1. 1.]
[1. 1.]]
print(A.dot(B)) # matrix product A.B
[[3. 3. 3.]
[3. 3. 3.]]
A = np.random.randint(0, 10, [3, 3])
print('det=', np.linalg.det(A)) # compute the determinant of A
print('inv A:\n', np.linalg.inv(A)) # calculate the inverse of A
det= 23.999999999999993
inv A:
[[-0.5 0.875 -0.25 ]
[ 0.5 -1.125 0.75 ]
[-0. 0.33333333 -0.33333333]]
val, vec = np.linalg.eig(A)
print('eigenvalue:\n', val) # eigenvalue
print('eigenvector:\n', vec) # eigenvector
eigenvalue:
[13.58872344 -3. -0.58872344]
eigenvector:
[[-6.70561268e-01 -8.32050294e-01 6.04408531e-01]
[-5.73889829e-01 2.12013389e-16 -7.73940635e-01]
[-4.70104297e-01 5.54700196e-01 1.88960901e-01]]
4. exercise and solution#
Standardize the following matrix, i.e. perform the following calculation:
\[A = \frac{A - mean(A_{column})}{std(A_{column})}\]
np.random.seed(0)
A = np.random.randint(0, 100, [10, 5])
A
array([[44, 47, 64, 67, 67],
[ 9, 83, 21, 36, 87],
[70, 88, 88, 12, 58],
[65, 39, 87, 46, 88],
[81, 37, 25, 77, 72],
[ 9, 20, 80, 69, 79],
[47, 64, 82, 99, 88],
[49, 29, 19, 19, 14],
[39, 32, 65, 9, 57],
[32, 31, 74, 23, 35]])
Solution#
Show code cell content
D = (A - A.mean(axis=0)) / A.std(axis=0)
D
[-2.22044605e-17 -4.44089210e-17 0.00000000e+00 -1.22124533e-16
-4.44089210e-17]
[1. 1. 1. 1. 1.]
Show code cell content
print(D.mean(axis=0)) # the means are all = 0
print(D.std(axis=0)) # std are all = 1
[-2.22044605e-17 -4.44089210e-17 0.00000000e+00 -1.22124533e-16
-4.44089210e-17]
[1. 1. 1. 1. 1.]