In [1]:
import numpy as np

We have several categories of data on a number of people (name, age, weight)<br>
We want to save this in a Python program

In [2]:
name = ["Alice", "Bob", "Cathy", "Doug"]
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

In [11]:
# Use a compound data type for structured arrays
# this is somehow size 4
data = np.zeros(4, dtype={"names": ("name", "age", "weight"),
                            "formats":("U10", "i4", "f8")})
print(data.dtype)

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


U10 -> Unicode string of maximum length 10<br>
i4 -> 4-byte integer<br>
f8 -> 8-byte float

In [12]:
data["name"] = name
data["age"] = age
data["weight"] = weight
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Doug', 19, 61.5)]


In [14]:
# Get all names
data["name"]

array(['Alice', 'Bob', 'Cathy', 'Doug'], dtype='<U10')

In [15]:
# Get the first row of data
data[0]

('Alice', 25, 55.)

In [16]:
# Get the name from the last row
data[-1]["name"]

'Doug'

In [19]:
# Filtering
# Get names where the age is under 30
# First brackets -> filtering
# Second brackets -> getting the values
data[data["age"] < 30]["name"]

array(['Alice', 'Doug'], dtype='<U10')

# CREATING STRUCTURED ARRAYS

In [20]:
# the dictionary method:
np.dtype({"names":("name", "age", "weight"), "formats":("U10", "i4", "f8")})

dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

Numerical types can be specified using Pythono types or NumPy dtype instead

In [21]:
np.dtype({"names":("name", "age", "weight"), "formats":((np.str_, 10), int, np.float32)})

dtype([('name', '<U10'), ('age', '<i8'), ('weight', '<f4')])

A compound type can also be specified as a list of tuples:

In [22]:
np.dtype([("name", "S10"), ("age", "i4"), ("weight", "f8")])

dtype([('name', 'S10'), ('age', '<i4'), ('weight', '<f8')])

If the names of the types are not important, we can specify the types alone in a comma-separated string

In [23]:
np.dtype("S10, i4, f8")

dtype([('f0', 'S10'), ('f1', '<i4'), ('f2', '<f8')])

# MORE ADVANCED COMPOUND TYPES

Create a data type with "mat" component consisting of a 3x3 floating-point matrix:

In [24]:
tp = np.dtype([("id", "i8"), ("mat", "f8", (3, 3))])
X = np.zeros(1, dtype=tp)
print(X[0])
print(X["mat"][0])

(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [25]:
print(X)

[(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])]


NumPy "dtype" directly maps onto a C structure definition, so the buffer containing the array content can be accessed directly within an appropriately written C program.

# RECORDARRAYS: STRUCTURED ARRAYS WITH A TWIST

"np.recarray" class<br>
fileds can be accessed as attributes rather than as dictionary keys

In [26]:
data["age"]

array([25, 45, 37, 19], dtype=int32)

In [27]:
# if we view our data as a record array instead
# we can access this with slightly fewer keytrokes

data_rec = data.view(np.recarray)
data_rec.age

array([25, 45, 37, 19], dtype=int32)

In [28]:
# downside is that some extra overhead is involved in accessing the fileds
%timeit data["age"]
%timeit data_rec["age"]
%timeit data_rec.age

294 ns ± 30.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
6.08 µs ± 53.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
9.25 µs ± 264 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
