In [2]:
import numpy as np 

In [21]:
import matplotlib.pyplot as plt

# Create n-dimensional arrays from lists

### Note: Every element must be the same type (will be converted if possible)

In [22]:
data1 = [6, 7.5, 8, 0, 1]    # list

In [4]:
arr1=np.array(data1)    # 1-d array

In [5]:
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [6]:
data2 = data2 = [range(1, 5), range(5, 9)]    # list of lists

In [7]:
arr2 = np.array(data2)    # 2-d array

In [8]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [9]:
arr2.tolist() # convert array back to list

[[1, 2, 3, 4], [5, 6, 7, 8]]

# Examining arrays

In [10]:
arr1.dtype      # float64

dtype('float64')

In [11]:
arr2.dtype      # int64

dtype('int32')

In [12]:
arr2.ndim       # 2

2

In [13]:
arr2.shape      # (2, 4) - axis 0 is rows, axis 1 is columns

(2L, 4L)

In [14]:
arr2.size       # 8 - total number of elements

8

In [15]:
len(arr2)       # 2 - size of first dimension (aka axis)

2

# Create special arrays

In [16]:
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [17]:
np.zeros((3, 6))

array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])

In [18]:
np.ones(10)

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [19]:
np.linspace(0, 1, 5)            # 0 to 1 (inclusive) with 5 points

array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])

In [20]:
np.logspace(0, 3, 4)            # 10^0 to 10^3 (inclusive) with 4 points

array([    1.,    10.,   100.,  1000.])

## Arrange is like range, excepts it returns an array (not a list)

In [23]:
int_array = np.arange(5)

In [24]:
float_array = int_array.astype(float)

# Slicing

In [25]:
arr1[0]         # 0th element (slices like a list)

6.0

In [26]:
arr2[0]         # row 0: returns 1d array ([1, 2, 3, 4])

array([1, 2, 3, 4])

In [27]:
arr2[0, 3]      # row 0, column 3: returns 4

4

In [28]:
arr2[0][3]      # alternative syntax

4

In [29]:
arr2[:, 0]      # all rows, column 0: returns 1d array ([1, 5])

array([1, 5])

In [30]:
arr2[:, 0:1]    # all rows, column 0: returns 2d array ([[1], [5]])

array([[1],
       [5]])

# Views and Copies

In [31]:
arr = np.arange(10)

In [32]:
arr[5:8]                    # returns [5, 6, 7]

array([5, 6, 7])

In [33]:
arr[5:8] = 12               # all three values are overwritten (would give error on a list)

In [34]:
arr_view = arr[5:8]         # creates a "view" on arr, not a copy

In [35]:
arr_view[:] = 13            # modifies arr_view AND arr

In [36]:
arr_copy = arr[5:8].copy()  # makes a copy instead

In [37]:
arr_copy[:] = 14            # only modifies arr_copy

# Using Boolean arrays

In [38]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob'])

In [39]:
names == 'Bob'                          # returns a boolean array

array([ True, False, False,  True], dtype=bool)

In [40]:
names[names != 'Bob']                   # logical selection

array(['Joe', 'Will'], 
      dtype='|S4')

In [41]:
(names == 'Bob') | (names == 'Will')    # keywords "and/or" don't work with boolean arrays

array([ True, False,  True,  True], dtype=bool)

In [42]:
names[names != 'Bob'] = 'Joe'           # assign based on a logical selection

In [43]:
np.unique(names)                        # set function

array(['Bob', 'Joe'], 
      dtype='|S4')

# Vectorized Operations

In [44]:
nums = np.arange(5)

In [45]:
nums*10                             # multiply each element by 10

array([ 0, 10, 20, 30, 40])

In [46]:
nums = np.sqrt(nums)                # square root of each element

In [47]:
np.ceil(nums)                       # also floor, rint (round to nearest int)

array([ 0.,  1.,  2.,  2.,  2.])

In [48]:
np.isnan(nums)                      # checks for NaN

array([False, False, False, False, False], dtype=bool)

In [49]:
nums + np.arange(5)                 # add element-wise

array([ 0.        ,  2.        ,  3.41421356,  4.73205081,  6.        ])

In [50]:
np.maximum(nums, np.array([1, -2, 3, -4, 5]))  # compare element-wise

array([ 1.        ,  1.        ,  3.        ,  1.73205081,  5.        ])

# Math and Stats

In [51]:
rnd = np.random.randn(4, 2) # random normals in 4x2 array

In [52]:
rnd.mean()

-0.33012773249843086

In [53]:
rnd.std()

1.132773960308415

In [54]:
rnd.argmin()                # index of minimum element

3

In [55]:
rnd.sum()

-2.6410218599874469

In [56]:
rnd.sum(axis=0)             # sum of columns

array([ 0.6777035 , -3.31872536])

In [57]:
rnd.sum(axis=1)             # sum of rows

array([-1.42301047, -2.77545347,  0.02906591,  1.52837617])

# Use numpy to create scatter plots

In [58]:
N = 50

x = np.random.rand(N)
y = np.random.rand(N)
colors = np.random.rand(N)
area =30+(70*np.random.rand(N)) # 30 to 100 point radiuses

plt.scatter(x, y, s=area, c=colors,)
plt.show()

# Conditional Logic

In [59]:
np.where(rnd > 0, 2, -2)    # args: condition, value if True, value if False

array([[-2, -2],
       [-2, -2],
       [-2,  2],
       [ 2, -2]])

In [60]:
np.where(rnd > 0, 2, rnd)   # any of the 3 arguments can be an array

array([[-0.78388288, -0.63912758],
       [-0.46253964, -2.31291383],
       [-0.13222228,  2.        ],
       [ 2.        , -0.52797214]])

# Methods for Boolean arrays

In [61]:
(rnd > 0).sum()             # counts number of positive values

2

In [62]:
(rnd > 0).any()             # checks if any value is True

True

In [63]:
(rnd > 0).all()             # checks if all values are True

False

# Reshape, Transpose, Flatten

In [64]:
nums = np.arange(32).reshape(8, 4) # creates 8x4 array

In [65]:
nums.T                       # transpose

array([[ 0,  4,  8, 12, 16, 20, 24, 28],
       [ 1,  5,  9, 13, 17, 21, 25, 29],
       [ 2,  6, 10, 14, 18, 22, 26, 30],
       [ 3,  7, 11, 15, 19, 23, 27, 31]])

In [66]:
nums.flatten()               # flatten

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])

# Random Numbers

In [67]:
np.random.seed(12234)

In [68]:
np.random.rand(2, 3)      # 0 to 1, in the given shape

array([[ 0.00630595,  0.20303476,  0.76478993],
       [ 0.55513384,  0.74358546,  0.93777808]])

In [69]:
np.random.randn(10)         # random normals (mean 0, sd 1)

array([ -2.79962074e-01,   1.31281104e+00,  -9.27155784e-01,
        -4.01302169e-01,  -2.31085929e+00,  -2.08460156e+00,
         4.59241643e-01,   1.62191344e+00,   1.94515120e-01,
        -2.08631547e-03])

In [70]:
np.random.randint(0, 2, 10) # 0 or 1

array([0, 0, 0, 1, 1, 0, 1, 1, 1, 1])