Numpy refresher

In [1]:
import numpy as np
np.__version__
Out[1]:
'1.15.4'

Array from list

In [2]:
my_list = [1,2,3]
arr = np.array(my_list)
arr
Out[2]:
array([1, 2, 3])

Generating sequence

In [3]:
np.arange(0,10,1) #Start, stop, step #Does not include 10
Out[3]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [4]:
np.linspace(0,10,10) # 10 points linearly spaced between 0 and 10 (inclusive)
Out[4]:
array([ 0.        ,  1.11111111,  2.22222222,  3.33333333,  4.44444444,
        5.55555556,  6.66666667,  7.77777778,  8.88888889, 10.        ])

Array creation

In [5]:
np.zeros((3,5)) #or np.ones
Out[5]:
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])
In [6]:
np.random.normal(1,0.01,(3,5)) #mean,std,size
Out[6]:
array([[0.99430157, 1.01091334, 0.97278936, 1.00210614, 1.01060466],
       [1.00633889, 0.99569814, 0.99238449, 0.99574993, 1.01166892],
       [1.00581868, 1.01449977, 1.00839394, 1.00682863, 1.00023794]])
In [7]:
np.random.randint(0,10,(3,5)) #random int between 0 and 10(excluded)
Out[7]:
array([[1, 3, 1, 1, 1],
       [6, 0, 8, 8, 3],
       [5, 4, 9, 4, 1]])

Reshaping array

In [8]:
##Reshape
arr = np.random.randint(0,10,(3,5))
print(arr)
print(arr.reshape(-1))
print(arr.reshape((5,-1)))
[[8 9 1 2 2]
 [3 1 4 3 3]
 [3 6 8 3 2]]
[8 9 1 2 2 3 1 4 3 3 3 6 8 3 2]
[[8 9 1]
 [2 2 3]
 [1 4 3]
 [3 3 6]
 [8 3 2]]

Using random seed

In [9]:
#Seed ensures that the sequence of random numbers generated is same in all computers
np.random.seed(101)
print(np.random.randint(0,100,10))
print(np.random.randint(0,100,10))

np.random.seed(101)
print(np.random.randint(0,100,10))
print(np.random.randint(0,100,10))
[95 11 81 70 63 87 75  9 77 40]
[ 4 63 40 60 92 64  5 12 93 40]
[95 11 81 70 63 87 75  9 77 40]
[ 4 63 40 60 92 64  5 12 93 40]

Array operations

In [10]:
np.random.seed(101)
arr = np.random.randint(0,100,10)
arr
Out[10]:
array([95, 11, 81, 70, 63, 87, 75,  9, 77, 40])
In [11]:
arr.max()
Out[11]:
95
In [12]:
arr.min()
Out[12]:
9
In [13]:
arr.mean()
Out[13]:
60.8
In [14]:
#Index location of max value
arr.argmax()
Out[14]:
0
In [15]:
#Index location of min value
arr.argmin()
Out[15]:
7

Slicing

In [32]:
#Slicing
mat = np.arange(0,100).reshape(10,10)
mat
Out[32]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])
In [17]:
mat[0,:] #First row
Out[17]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [10]:
mat[[0,2]] #First and third row
Out[10]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])
In [18]:
mat[:,0] #First column
Out[18]:
array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
In [11]:
mat[:,[0,2]] #First and third column
Out[11]:
array([[ 0,  2],
       [10, 12],
       [20, 22],
       [30, 32],
       [40, 42],
       [50, 52],
       [60, 62],
       [70, 72],
       [80, 82],
       [90, 92]])
In [19]:
mat[:2,:3] #slice first 2 rows and 3 columns
Out[19]:
array([[ 0,  1,  2],
       [10, 11, 12]])
In [20]:
mat[2:4,5:-2] #slice from 2nd until 4th row, from 5th until last second column
Out[20]:
array([[25, 26, 27],
       [35, 36, 37]])
In [21]:
mat[:5:2,:5:2]#slice every 2nd element from first 5 rows and column 
Out[21]:
array([[ 0,  2,  4],
       [20, 22, 24],
       [40, 42, 44]])
In [22]:
mat[::2,::2]#slice every 2nd element in whole matrix
Out[22]:
array([[ 0,  2,  4,  6,  8],
       [20, 22, 24, 26, 28],
       [40, 42, 44, 46, 48],
       [60, 62, 64, 66, 68],
       [80, 82, 84, 86, 88]])
In [23]:
mat[2::2,3::2] #every 2nd element starting from index 2
Out[23]:
array([[23, 25, 27, 29],
       [43, 45, 47, 49],
       [63, 65, 67, 69],
       [83, 85, 87, 89]])
In [24]:
mat[::-1,::-1] #Reversing the matrix
Out[24]:
array([[99, 98, 97, 96, 95, 94, 93, 92, 91, 90],
       [89, 88, 87, 86, 85, 84, 83, 82, 81, 80],
       [79, 78, 77, 76, 75, 74, 73, 72, 71, 70],
       [69, 68, 67, 66, 65, 64, 63, 62, 61, 60],
       [59, 58, 57, 56, 55, 54, 53, 52, 51, 50],
       [49, 48, 47, 46, 45, 44, 43, 42, 41, 40],
       [39, 38, 37, 36, 35, 34, 33, 32, 31, 30],
       [29, 28, 27, 26, 25, 24, 23, 22, 21, 20],
       [19, 18, 17, 16, 15, 14, 13, 12, 11, 10],
       [ 9,  8,  7,  6,  5,  4,  3,  2,  1,  0]])
In [25]:
mat[3::-1,3::-1] #pick up the first 3 rows and column and reverse them
Out[25]:
array([[33, 32, 31, 30],
       [23, 22, 21, 20],
       [13, 12, 11, 10],
       [ 3,  2,  1,  0]])

Masking

In [23]:
#Masking

my_filter = mat > 50
my_filter
Out[23]:
array([[False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]])
In [24]:
#Mask all values greater than 50
mat[my_filter] #mat[mat>50]
Out[24]:
array([51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

Apply operations to elements that satisfy certain condition

In [39]:
mat[my_filter] = mat[my_filter]*-10
mat
Out[39]:
array([[   0,    1,    2,    3,    4,    5,    6,    7,    8,    9],
       [  10,   11,   12,   13,   14,   15,   16,   17,   18,   19],
       [  20,   21,   22,   23,   24,   25,   26,   27,   28,   29],
       [  30,   31,   32,   33,   34,   35,   36,   37,   38,   39],
       [  40,   41,   42,   43,   44,   45,   46,   47,   48,   49],
       [  50, -510, -520, -530, -540, -550, -560, -570, -580, -590],
       [-600, -610, -620, -630, -640, -650, -660, -670, -680, -690],
       [-700, -710, -720, -730, -740, -750, -760, -770, -780, -790],
       [-800, -810, -820, -830, -840, -850, -860, -870, -880, -890],
       [-900, -910, -920, -930, -940, -950, -960, -970, -980, -990]])

np.where can also be used (returns a new array without modifying original)

In [43]:
mat = np.arange(0,100).reshape(10,10)
np.where(mat%2==0,-1,mat) #np.where(cond,if_true,else) #does not change mat
Out[43]:
array([[-1,  1, -1,  3, -1,  5, -1,  7, -1,  9],
       [-1, 11, -1, 13, -1, 15, -1, 17, -1, 19],
       [-1, 21, -1, 23, -1, 25, -1, 27, -1, 29],
       [-1, 31, -1, 33, -1, 35, -1, 37, -1, 39],
       [-1, 41, -1, 43, -1, 45, -1, 47, -1, 49],
       [-1, 51, -1, 53, -1, 55, -1, 57, -1, 59],
       [-1, 61, -1, 63, -1, 65, -1, 67, -1, 69],
       [-1, 71, -1, 73, -1, 75, -1, 77, -1, 79],
       [-1, 81, -1, 83, -1, 85, -1, 87, -1, 89],
       [-1, 91, -1, 93, -1, 95, -1, 97, -1, 99]])

Get indices of masked elements

In [47]:
np.where(mat%2==0)#Returns indicies
Out[47]:
(array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4,
        4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8,
        8, 9, 9, 9, 9, 9]),
 array([0, 2, 4, 6, 8, 0, 2, 4, 6, 8, 0, 2, 4, 6, 8, 0, 2, 4, 6, 8, 0, 2,
        4, 6, 8, 0, 2, 4, 6, 8, 0, 2, 4, 6, 8, 0, 2, 4, 6, 8, 0, 2, 4, 6,
        8, 0, 2, 4, 6, 8]))

Getting the elements from those indices

In [55]:
mat[idx]
Out[55]:
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
       34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66,
       68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98])

Getting indicies zipped

In [48]:
idx = np.where(mat%2==0)
list(zip(idx[0],idx[1]))
Out[48]:
[(0, 0),
 (0, 2),
 (0, 4),
 (0, 6),
 (0, 8),
 (1, 0),
 (1, 2),
 (1, 4),
 (1, 6),
 (1, 8),
 (2, 0),
 (2, 2),
 (2, 4),
 (2, 6),
 (2, 8),
 (3, 0),
 (3, 2),
 (3, 4),
 (3, 6),
 (3, 8),
 (4, 0),
 (4, 2),
 (4, 4),
 (4, 6),
 (4, 8),
 (5, 0),
 (5, 2),
 (5, 4),
 (5, 6),
 (5, 8),
 (6, 0),
 (6, 2),
 (6, 4),
 (6, 6),
 (6, 8),
 (7, 0),
 (7, 2),
 (7, 4),
 (7, 6),
 (7, 8),
 (8, 0),
 (8, 2),
 (8, 4),
 (8, 6),
 (8, 8),
 (9, 0),
 (9, 2),
 (9, 4),
 (9, 6),
 (9, 8)]

Applying function to specific indices (or rows/columns)

In [67]:
#Get the idx of those elements with np.where
idx = np.where(mat%2==0)

def func(X):
    X = X + X[::-1]*1.5
    return X
        
mat[idx] = func(mat[idx])
print(mat)
#reset matrix
mat = np.arange(0,100).reshape(10,10)
[[147   1 146   3 145   5 144   7 143   9]
 [142  11 141  13 140  15 139  17 138  19]
 [137  21 136  23 135  25 134  27 133  29]
 [132  31 131  33 130  35 129  37 128  39]
 [127  41 126  43 125  45 124  47 123  49]
 [122  51 121  53 120  55 119  57 118  59]
 [117  61 116  63 115  65 114  67 113  69]
 [112  71 111  73 110  75 109  77 108  79]
 [107  81 106  83 105  85 104  87 103  89]
 [102  91 101  93 100  95  99  97  98  99]]

Apply function along axis

In [77]:
a = np.arange(0,15,1).reshape(5,3)
a
Out[77]:
array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

Normalizing all columns

In [78]:
#axis=0 for columns
a = np.apply_along_axis(lambda x: (x - x.min())/ (x.max()-x.min()) ,axis=0,arr=a)
a
Out[78]:
array([[0.  , 0.  , 0.  ],
       [0.25, 0.25, 0.25],
       [0.5 , 0.5 , 0.5 ],
       [0.75, 0.75, 0.75],
       [1.  , 1.  , 1.  ]])

Normalizing all rows

In [79]:
#axis=1 for rows
a = np.arange(0,15,1).reshape(5,3)
a = np.apply_along_axis(lambda x: (x - x.min())/ (x.max()-x.min()) ,axis=1,arr=a)
a
Out[79]:
array([[0. , 0.5, 1. ],
       [0. , 0.5, 1. ],
       [0. , 0.5, 1. ],
       [0. , 0.5, 1. ],
       [0. , 0.5, 1. ]])

Broadcasting

When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing dimensions (from right), and works its way forward. Two dimensions are compatible when

1) they are equal, or
2) one of them is 1

If these conditions are not met, a ValueError: frames are not aligned exception is thrown,

Eg,

Image  (3d array): 256 x 256 x 3
    Scale  (1d array):             3
    Result (3d array): 256 x 256 x 3

    A      (2d array):  5 x 4
    B      (1d array):      1
    Result (2d array):  5 x 4

    A      (3d array):  15 x 3 x 5
    B      (3d array):  15 x 1 x 5
    Result (3d array):  15 x 3 x 5
In [28]:
arr = np.ones((5,4))
arr
Out[28]:
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])
In [29]:
arr + 1 #(5,4) + (1,)
Out[29]:
array([[2., 2., 2., 2.],
       [2., 2., 2., 2.],
       [2., 2., 2., 2.],
       [2., 2., 2., 2.],
       [2., 2., 2., 2.]])
In [30]:
arr + [1,2,3,4] #(5,4) + (4,)
Out[30]:
array([[2., 3., 4., 5.],
       [2., 3., 4., 5.],
       [2., 3., 4., 5.],
       [2., 3., 4., 5.],
       [2., 3., 4., 5.]])
In [31]:
arr + np.array([1,2,3,4,5]).reshape((5,1)) #(5,4) + (5,1)
Out[31]:
array([[2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.]])

Concatenation

In [32]:
features = np.random.randint(0,101,(50,3))
labels = np.random.randint(0,5,(50,1))
data = np.concatenate((features,labels),axis=1)
data
Out[32]:
array([[  4,  63,  40,   3],
       [ 60,  92,  64,   3],
       [  5,  12,  93,   1],
       [ 40,  49,  83,   4],
       [  8,  29,  59,   4],
       [ 34,  44,  72,   4],
       [ 19,  10,  76,   2],
       [ 95,  87,   0,   3],
       [ 73,   8,  62,   4],
       [ 36,  83,  99,   1],
       [ 28,  63,   7,   4],
       [ 10,  52,  56,   0],
       [ 38,  73,  52,   4],
       [ 18,  71,  15,   2],
       [ 44,   0,  12,   2],
       [ 17,  75,  79,   0],
       [ 97,  93,  24,   2],
       [ 36,  63,  19,   3],
       [ 35,  30,  10,   2],
       [ 60,  20,  27,   1],
       [  8,  86,  26,   2],
       [ 87,  46,  47,   4],
       [ 54,  86,   9,   2],
       [ 45,   2,  18,   0],
       [ 58,  92,  11,   1],
       [ 10,  94,  35,   1],
       [ 28,   3,  83,   1],
       [ 84,  47,  14,   1],
       [ 69,  60,  69,   2],
       [ 51,   6,  88,   0],
       [ 71,  68,  23,   2],
       [ 35,  79,  98,   2],
       [ 67,  82,  57,   0],
       [ 77,  46,   3,   1],
       [ 46,  29,  86,   4],
       [ 21,  21,  81,   2],
       [ 23,  94, 100,   1],
       [ 71,  20,  27,   4],
       [ 75,   5,  49,   0],
       [ 86,  89,  63,   4],
       [ 82,  77,   3,   1],
       [ 56,  14,  49,   1],
       [ 87,  52,  13,   3],
       [ 47,  49,  24,   0],
       [ 20,  64,  52,   0],
       [ 60,  47,  29,   0],
       [ 60,  53,  11,   4],
       [ 40,  91,  45,   0],
       [ 97,  24,  36,   0],
       [ 38,   9,  52,   4]])

Sampling

In [3]:
x = np.linspace(0,10,1000)
x.shape
Out[3]:
(1000,)

Sample 10 random index

In [4]:
sample_size=10
rand_ind = np.random.randint(0,len(x),size=sample_size)
rand_ind
Out[4]:
array([ 89, 313, 739, 130, 391, 235, 101, 126, 777, 511])

Grab the samples corresponding to those indices

In [5]:
x[rand_ind]
Out[5]:
array([0.89089089, 3.13313313, 7.3973974 , 1.3013013 , 3.91391391,
       2.35235235, 1.01101101, 1.26126126, 7.77777778, 5.11511512])