### Strides

In [1]:
import numpy as np
#### Print out Strides ####
x = np.arange(8, dtype = np.int8)
print("x = ", x)
print("x.strides = ", x.strides)
print(str(x.data))

x =  [0 1 2 3 4 5 6 7]
x.strides =  (1,)
<memory at 0x0000000004E79408>


In [2]:
#### Change the array shape ####
x.shape = 2, 4
print("x = ", x)
print("x.strides = ", x.strides)
print(str(x.data))

x =  [[0 1 2 3]
 [4 5 6 7]]
x.strides =  (4, 1)
<memory at 0x0000000004E727E0>


In [3]:
#### Change the array shape again ####
x.shape = 1,4,2
print("x = ", x)
print("x.strides = ", x.strides)
print(str(x.data))

x =  [[[0 1]
  [2 3]
  [4 5]
  [6 7]]]
x.strides =  (8, 2, 1)
<memory at 0x000000000423F5E8>


In [4]:
#### Create Two Arrays and Compare Shapes ####
x = np.ones((10000,))
y = np.ones((10000 * 100, ))[::100]
print("x.shape: ", x.shape, "; y.shape", y.shape)
print("x == y: ", x == y)

x.shape:  (10000,) ; y.shape (10000,)
x == y:  [ True  True  True ...,  True  True  True]


In [5]:
#### Print Memory Layout of x ####
x.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [6]:
#### Print Memory Layout of y ####
y.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [7]:
#### Print out the Strides ####
print("x.strides: ", x.strides, "y.strides: ", y.strides)

x.strides:  (8,) y.strides:  (800,)


In [8]:
#### Compare Performance between Arrays with Different Strides ####
%timeit x.sum()
%timeit y.sum()

The slowest run took 7.65 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 6.88 µs per loop
10000 loops, best of 3: 27.7 µs per loop


### Structure Array

In [9]:
#### Initialize a Structure Array ####
x = np.empty((2,), dtype = ('i4  ,f4,a10'))
x[:] = [(1,0.5, 'NumPy'), (10,-0.5, 'Essential')]
x

array([(1, 0.5, b'NumPy'), (10, -0.5, b'Essential')], 
      dtype=[('f0', '<i4'), ('f1', '<f4'), ('f2', 'S10')])

In [10]:
#### Retrieve the Values from Structure Array ####
print(x[0])
print(x['f2'])

(1, 0.5, b'NumPy')
[b'NumPy' b'Essential']


In [11]:
#### How Change View Affect Original Structure Array ####
y = x['f0']  
print("y is {}".format(y))
y[:] = y * 10
print("y times 10: {}".format(y))
y[:] = y + 0.5
print("y add 0.5: {}".format(y))
print("the original x change to {}".format(x))

y is [ 1 10]
y times 10: [ 10 100]
y add 0.5: [ 10 100]
the original x change to [(10, 0.5, b'NumPy') (100, -0.5, b'Essential')]


In [12]:
#### Structure Contains Multi-Dimension Array ####
z = np.ones((2,), dtype = ('3i4, (2,3)f4'))
z

array([([1, 1, 1], [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]),
       ([1, 1, 1], [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])], 
      dtype=[('f0', '<i4', (3,)), ('f1', '<f4', (2, 3))])

In [13]:
#### Specify Field Name ####
print("Original field names are: {}".format(x.dtype.names))
x.dtype.names = ('id', 'value', 'note')
print("After alter field names, x: {}".format(x))

Original field names are: ('f0', 'f1', 'f2')
After alter field names, x: [(10, 0.5, b'NumPy') (100, -0.5, b'Essential')]


In [14]:
#### Two Ways to Define dtype in Structure Array ####
list_ex = np.zeros((2,), dtype = [('id', 'i4'), ('value', 'f4', (2,))])
print(list_ex, list_ex.dtype)
dict_ex = np.zeros((2,), dtype = {'names':['id',  'value'  ], 'formats':['i4', '2f4']})
print(dict_ex, dict_ex.dtype)                    

[(0, [0.0, 0.0]) (0, [0.0, 0.0])] [('id', '<i4'), ('value', '<f4', (2,))]
[(0, [0.0, 0.0]) (0, [0.0, 0.0])] [('id', '<i4'), ('value', '<f4', (2,))]


In [15]:
#### Access Multiple Fields ####
x[['id', 'note']]

array([(10, b'NumPy'), (100, b'Essential')], 
      dtype=[('id', '<i4'), ('note', 'S10')])

### Dates and time in NumPy

In [16]:
#### dype with numpy.datetime64 ####
x = np.datetime64('2015-04-01')
y = np.datetime64('2015-04')
print(x.dtype, y.dtype)

datetime64[D] datetime64[M]


In [17]:
#### Change the Time Unit ####
y = np.datetime64('2015-04', 'D')
print(y, y.dtype)

2015-04-01 datetime64[D]


In [18]:
#### np.arange with datetime64 ####
x = np.arange('2015-01', '2015-04', dtype = 'datetime64[M]')
x

array(['2015-01', '2015-02', '2015-03'], dtype='datetime64[M]')

In [19]:
#### Inpropriate Unit Conversion, Should Fail ####
y = np.datetime64('2015-04-01', 's') 

TypeError: Cannot parse "2015-04-01" as unit 's' using casting rule 'same_kind'

In [20]:
#### Arithmetic Operations ####
y = np.datetime64('2015-01-01')
x - y

array([ 0, 31, 59], dtype='timedelta64[D]')

In [21]:
#### datetime64 an timedelta64 ####
print(np.datetime64('2015') + np.timedelta64(12, 'M'))
print(np.timedelta64(1, 'W') / np.timedelta64(1, 'D'))

2016-01
7.0


In [22]:
#### numpy.datetime64 to Python Datetime List ####
x.tolist()
#### Use List Conprehension ####
[element.item() for element in x]

[datetime.date(2015, 1, 1),
 datetime.date(2015, 2, 1),
 datetime.date(2015, 3, 1)]

### File I/O

In [23]:
#### Initialize a Structure Array ####
id = np.arange(1000)
value = np.random.random(1000)
day = np.random.random_integers(0, 365, 1000) * np.timedelta64(1,'D')
date = np.datetime64('2014-01-01') + day
rec_array = np.core.records.fromarrays([id, value, date], names='id, value, date', formats='i4, f4, a10')
rec_array[:5]

rec.array([(0, 0.5922898650169373, b'2014-06-20'),
 (1, 0.9857735633850098, b'2014-03-30'),
 (2, 0.6873448491096497, b'2014-12-15'),
 (3, 0.19049392640590668, b'2014-02-05'),
 (4, 0.24652297794818878, b'2014-12-01')], 
          dtype=[('id', '<i4'), ('value', '<f4'), ('date', '|S10')])

In [24]:
#### Save it to Disc ####
np.savetxt('./record.csv', rec_array, fmt='%i,%.4f,%s')

In [25]:
#### Read from File ####
read_array = np.genfromtxt('./record.csv', dtype='i4,f4,a10',  delimiter=',', skip_header=0)
read_array[:5]

array([(0, 0.5922999978065491, b"b'2014-06-"),
       (1, 0.98580002784729, b"b'2014-03-"),
       (2, 0.6873000264167786, b"b'2014-12-"),
       (3, 0.19050000607967377, b"b'2014-02-"),
       (4, 0.24650000035762787, b"b'2014-12-")], 
      dtype=[('f0', '<i4'), ('f1', '<f4'), ('f2', 'S10')])

In [26]:
#### Apply Mask ####
from numpy.lib.recfunctions import append_fields 
read_array.dtype.names = ('id', 'value', 'date')
mask = read_array['value'] >= 0.75
read_array = append_fields (read_array, 'mask', data=mask, dtypes='i1')
read_array[:5]

masked_array(data = [(0, 0.5922999978065491, b"b'2014-06-", 0)
 (1, 0.98580002784729, b"b'2014-03-", 1)
 (2, 0.6873000264167786, b"b'2014-12-", 0)
 (3, 0.19050000607967377, b"b'2014-02-", 0)
 (4, 0.24650000035762787, b"b'2014-12-", 0)],
             mask = [(False, False, False, False) (False, False, False, False)
 (False, False, False, False) (False, False, False, False)
 (False, False, False, False)],
       fill_value = (999999, 1.0000000200408773e+20, b'N/A', 63),
            dtype = [('id', '<i4'), ('value', '<f4'), ('date', 'S10'), ('mask', 'i1')])