Open In Colab

12: Numpy - Mathematics#

import numpy as np

1. basic (most useful) methods of the ndarray class#

A = np.array([[1, 2, 3], [4, 5, 6]])

print(A.sum()) # sum all elements of the array
print(A.sum(axis=0)) # sum of the columns (sum over elements of the rows)
print(A.sum(axis=1)) # sum the rows (sum over the column elements)
print(A.cumsum(axis=0)) # performs the cumulative sum
 
print(A.prod()) # performs the product
print(A.cumprod()) # performs the cumulative product
 
print(A.min()) # find the minimum of the array
print(A.max()) # find the maximum of the array
 
print(A.mean()) # calculate the average
print(A.std()) # calculate the standard deviation,
print(A.var()) # calculate the variance
21
[5 7 9]
[ 6 15]
[[1 2 3]
 [5 7 9]]
720
[  1   2   6  24 120 720]
1
6
3.5
1.707825127659933
2.9166666666666665

A very important method : the argsort() method

A = np.random.randint(0, 10, [5, 5]) # random array
print(A)
[[1 1 8 6 5]
 [0 9 2 9 5]
 [7 4 7 9 3]
 [8 9 0 6 0]
 [7 8 0 3 8]]
print(A.argsort()) # return the indexes to sort each row of the array 
[[0 1 4 3 2]
 [0 2 4 1 3]
 [4 1 0 2 3]
 [2 4 3 0 1]
 [2 3 0 1 4]]
print(A[:,0].argsort()) # return the indexes to sort the 0 column of A
[1 0 2 4 3]
A = A[A[:,0].argsort(), :] # sorts the columns of the array according to column 0.
A
array([[0, 9, 2, 9, 5],
       [1, 1, 8, 6, 5],
       [7, 4, 7, 9, 3],
       [7, 8, 0, 3, 8],
       [8, 9, 0, 6, 0]])

2. Numpy Statistics#

Pearson correlation :

B = np.random.randn(3, 3) # random numbers 3x3
 
# return the correlation matrix of B
print(np.corrcoef(B))
[[ 1.         -0.97790106  0.05478707]
 [-0.97790106  1.          0.15517786]
 [ 0.05478707  0.15517786  1.        ]]
# return the correlation matrix between rows 0 and 1 of B
print(np.corrcoef(B[:,0], B[:, 1]))
[[1.         0.58412611]
 [0.58412611 1.        ]]

np.unique() :

np.random.seed(0)
A = np.random.randint(0, 10, [5,5])
A
array([[5, 0, 3, 3, 7],
       [9, 3, 5, 2, 4],
       [7, 6, 8, 8, 1],
       [6, 7, 7, 8, 1],
       [5, 9, 8, 9, 4]])
np.unique(A)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
values, counts = np.unique(A, return_counts=True)

for i, j in zip(values[counts.argsort()], counts[counts.argsort()]):
    print(f'value {i} appears {j}')
value 0 appears 1
value 2 appears 1
value 1 appears 2
value 4 appears 2
value 6 appears 2
value 3 appears 3
value 5 appears 3
value 9 appears 3
value 7 appears 4
value 8 appears 4

Statistical calculations in the presence of missing data (NaN)

A = np.random.randn(5, 5)
A[0, 2] = np.nan # insert a NaN in the matrix A
 
print('ratio NaN/zise:', (np.isnan(A).sum()/A.size)) # calculate the proportion of NaN in A
 
print('average without NaN:', np.nanmean(A)) # computes the average of A ignoring the NaN
ratio NaN/zise: 0.04
average without NaN: 0.06036087829219453

3. Linear Algebra#

A = np.ones((2,3))
B = np.ones((3,3))

print(A.T) # transpose of the matrix A (it is an attribute of ndarray)
[[1. 1.]
 [1. 1.]
 [1. 1.]]
print(A.dot(B)) # matrix product A.B
[[3. 3. 3.]
 [3. 3. 3.]]
A = np.random.randint(0, 10, [3, 3])
 
print('det=', np.linalg.det(A)) # compute the determinant of A
print('inv A:\n', np.linalg.inv(A)) # calculate the inverse of A
det= 23.999999999999993
inv A:
 [[-0.5         0.875      -0.25      ]
 [ 0.5        -1.125       0.75      ]
 [-0.          0.33333333 -0.33333333]]
val, vec = np.linalg.eig(A)
print('eigenvalue:\n', val) # eigenvalue
print('eigenvector:\n', vec) # eigenvector
eigenvalue:
 [13.58872344 -3.         -0.58872344]
eigenvector:
 [[-6.70561268e-01 -8.32050294e-01  6.04408531e-01]
 [-5.73889829e-01  2.12013389e-16 -7.73940635e-01]
 [-4.70104297e-01  5.54700196e-01  1.88960901e-01]]

4. exercise and solution#

Standardize the following matrix, i.e. perform the following calculation:

\[A = \frac{A - mean(A_{column})}{std(A_{column})}\]
np.random.seed(0)
A = np.random.randint(0, 100, [10, 5])
A
array([[44, 47, 64, 67, 67],
       [ 9, 83, 21, 36, 87],
       [70, 88, 88, 12, 58],
       [65, 39, 87, 46, 88],
       [81, 37, 25, 77, 72],
       [ 9, 20, 80, 69, 79],
       [47, 64, 82, 99, 88],
       [49, 29, 19, 19, 14],
       [39, 32, 65,  9, 57],
       [32, 31, 74, 23, 35]])

Solution#

Hide code cell content
D = (A - A.mean(axis=0)) / A.std(axis=0)
D
[-2.22044605e-17 -4.44089210e-17  0.00000000e+00 -1.22124533e-16
 -4.44089210e-17]
[1. 1. 1. 1. 1.]
Hide code cell content
print(D.mean(axis=0)) # the means are all = 0
print(D.std(axis=0)) # std are all = 1 
[-2.22044605e-17 -4.44089210e-17  0.00000000e+00 -1.22124533e-16
 -4.44089210e-17]
[1. 1. 1. 1. 1.]