# Numpy

In [1]:
import numpy as np

## how to use


### help function
help(np.FUNC_NAME)

In [3]:
help(np.genfromtxt)

Help on function genfromtxt in module numpy:

genfromtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=" !#$%&'()*+,-./:;<=>?@[\\]^{|}~", replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None, encoding='bytes')
    Load data from a text file, with missing values handled as specified.
    
    Each line past the first `skip_header` lines is split at the `delimiter`
    character, and characters following the `comments` character are discarded.
    
    Parameters
    ----------
    fname : file, str, pathlib.Path, list of str, generator
        File, filename, list, or generator to read.  If the filename
        extension is `.gz` or `.bz2`, the file is first decompressed. Note
        that generators must return byte strings in Python 3

### read online document
see [here](https://numpy.org/doc/1.17/user/quickstart.html)

## build a numpy data

### build from file 

In [2]:
file_data = np.genfromtxt('uname.txt', delimiter = ',', dtype = str)

In [4]:
type(file_data)

numpy.ndarray

In [6]:
file_data.dtype

dtype('<U6')

### build from list

one dimension

In [70]:
help(np.array)

Help on built-in function array in module numpy:

array(...)
    array(object, dtype=None, copy=True, order='K', subok=False, ndmin=0)
    
    Create an array.
    
    Parameters
    ----------
    object : array_like
        An array, any object exposing the array interface, an object whose
        __array__ method returns an array, or any (nested) sequence.
    dtype : data-type, optional
        The desired data-type for the array.  If not given, then the type will
        be determined as the minimum type required to hold the objects in the
        sequence.  This argument can only be used to 'upcast' the array.  For
        downcasting, use the .astype(t) method.
    copy : bool, optional
        If true (default), then the object is copied.  Otherwise, a copy will
        only be made if __array__ returns a copy, if obj is a nested sequence,
        or if a copy is needed to satisfy any of the other requirements
        (`dtype`, `order`, etc.).
    order : {'K', 'A', 'C', 'F'}

In [19]:
data = np.array([1, 3, 5, 7])
print(data)
type(data)

[1 3 5 7]


numpy.ndarray

two dimensions

In [29]:
data = np.array([[1, 3], [5, 7]])

In [30]:
data

array([[1, 3],
       [5, 7]])

see the info of data

In [31]:
data.shape

(2, 2)

the type of elements in a numpy array must be the same. 

numpy array中的元素的类型只能有一个，如果不同，则都会转化成为最兼容的一个。

## data info

In [99]:
# 数据类型
data.dtype

dtype('int64')

In [102]:
data.dtype.name

'int64'

In [98]:
# 数据形状
data.shape

(4,)

In [97]:
# 数据维数
data.ndim

1

In [103]:
# 数据量大小
data.size

4

## how to get element from numpy array
使用索引，index（行号、列号等）  
语法：data[a, b, c, ...]

### get an element

In [41]:
data = np.array([[1, 3], [5, 7]])
print(data)
print(f'a[1, 0] = {data[1, 0]}')

[[1 3]
 [5 7]]
a[1, 0] = 5


### get a row

In [39]:
print(data[1, :])

[5 7]


### get a column

In [40]:
print(data[:, 0])

[1 5]


## operator

运算表示对所有元素进行同样的运算。

In [53]:
data = np.array([[1, 6], [5, 7]])
data

array([[1, 6],
       [5, 7]])

In [52]:
data > 5

array([[False,  True],
       [False,  True]])

In [54]:
data + 1

array([[2, 7],
       [6, 8]])

### filter

In [55]:
# 取data中所有大于5的的元素
data[data > 5]

array([6, 7])

In [56]:
# 取第二列为6的行
data[data[:, 1] == 6, :]

array([[1, 6]])

In [62]:
# 取 大于2 且 小于7 的元素
data[(data > 2) & (data < 7)]

array([6, 5])

In [63]:
# 取 大于6 或 小于2 的元素
data[(data < 2) | (data > 6)]

array([1, 7])

## convert data type

In [137]:
data = np.array([[1, 6], [5, 7]])
print(data.dtype)
data1 = data.astype(np.float32)
print(data1.dtype)

int64
float32


## calculate on axis

In [76]:
print(data)

[[1 6]
 [5 7]]


In [78]:
# 按 列 进行求和
data.sum(axis = 0)

array([ 6, 13])

In [73]:
# 按 行 进行求和
data.sum(axis = 1)

array([ 7, 12])

In [79]:
data = np.array([
    [
        [1, 3]
        , [5, 7]
    ]
    , [
        [9, 11]
        , [13, 15]
    ]
])

In [80]:
print(data)

[[[ 1  3]
  [ 5  7]]

 [[ 9 11]
  [13 15]]]


In [87]:
data.sum(axis = 0)

array([[10, 14],
       [18, 22]])

In [88]:
data.sum(axis = 2)

array([[ 4, 12],
       [20, 28]])

这样理解，axis = 0 表示从最外层操作，逐步增大则向内层操作。比如上面3维的，axis = 0 操作的是最外面的方括号，axis = 1 操作的是中间一层的对应元素，axis = 2 操作最内层的数据

## functions

### reshape
改变数据的维度（形状？）
reshape(a, b)或reshape(a, -1)， -1表示自动计算

In [140]:
data = np.array([1, 2, 3, 4])

In [93]:
data2 = data.reshape(2, 2)
data2

array([[1, 2],
       [3, 4]])

In [141]:
data.reshape(2, -1)

array([[1, 2],
       [3, 4]])

In [95]:
data2.shape

(2, 2)

In [96]:
data2.ndim

2

### revel
延展成为1维

In [138]:
data2.ravel()

array([1, 2, 3, 4])

### zeros
np.zeros(SHAPE)  
SHAPE可以为list或tuple等等

In [109]:
np.zeros([3, 4])

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [110]:
np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

### ones

In [111]:
np.ones([3, 4], dtype = np.int16)

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int16)

### arange
np.arange([start, ]end[, step])

In [115]:
print(np.arange(15))
np.arange(15).reshape(3, 5)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]


array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [116]:
np.arange(10, 20, 2)

array([10, 12, 14, 16, 18])

### random
np.random是一个包，其下还有很多种函数，如  
np.random.random(SHAPE)


In [117]:
np.random.random([2, 3])

array([[0.00141913, 0.98715195, 0.70946064],
       [0.3207335 , 0.68297509, 0.77486921]])

### linspace
线性分割
np.linspace(start, end, nums)

In [121]:
np.linspace(0, 2 * np.pi, 11)

array([0.        , 0.62831853, 1.25663706, 1.88495559, 2.51327412,
       3.14159265, 3.76991118, 4.39822972, 5.02654825, 5.65486678,
       6.28318531])

In [122]:
np.sin(np.linspace(0, 2 * np.pi, 11))

array([ 0.00000000e+00,  5.87785252e-01,  9.51056516e-01,  9.51056516e-01,
        5.87785252e-01,  1.22464680e-16, -5.87785252e-01, -9.51056516e-01,
       -9.51056516e-01, -5.87785252e-01, -2.44929360e-16])

### math operator

In [131]:
a = np.array([3, 6, 1, 4]).reshape((2, 2))
b = np.arange(4).reshape((2, 2))
print(a)
print(b)

[[3 6]
 [1 4]]
[[0 1]
 [2 3]]


In [132]:
print(a - b)
print(a ** 2)

[[ 3  5]
 [-1  1]]
[[ 9 36]
 [ 1 16]]


矩阵乘法

In [133]:
# 对应位置相乘
print(a * b) 
# 矩阵乘法
print(a.dot(b))
print(np.dot(a, b))

[[ 0  6]
 [ 2 12]]
[[12 21]
 [ 8 13]]
[[12 21]
 [ 8 13]]


In [134]:
# 自然指数
print(np.exp(a))

[[ 20.08553692 403.42879349]
 [  2.71828183  54.59815003]]


In [135]:
# 开方
print(np.sqrt(a))

[[1.73205081 2.44948974]
 [1.         2.        ]]


In [136]:
# 平方
print(np.square(a))

[[ 9 36]
 [ 1 16]]


### stack
拼接

In [143]:
a = np.array([3, 6, 1, 4]).reshape((2, -1))
b = np.arange(4).reshape((2, -1))
print(a)
print(b)

[[3 6]
 [1 4]]
[[0 1]
 [2 3]]


In [146]:
# hstack(DATA) horizontal
np.hstack((a, b))

array([[3, 6, 0, 1],
       [1, 4, 2, 3]])

In [147]:
np.vstack([a, b])

array([[3, 6],
       [1, 4],
       [0, 1],
       [2, 3]])

### split

In [156]:
a = np.arange(24).reshape((2, -1))
print(a)

[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]


In [157]:
# 按份数平均切分
np.hsplit(a, 3)

[array([[ 0,  1,  2,  3],
        [12, 13, 14, 15]]), array([[ 4,  5,  6,  7],
        [16, 17, 18, 19]]), array([[ 8,  9, 10, 11],
        [20, 21, 22, 23]])]

In [167]:
# 在指定位置切分
np.hsplit(a, (3,))

[array([[ 0,  1,  2],
        [12, 13, 14]]), array([[ 3,  4,  5,  6,  7,  8,  9, 10, 11],
        [15, 16, 17, 18, 19, 20, 21, 22, 23]])]

In [152]:
b = np.arange(24).reshape((6, -1))
print(b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [155]:
np.vsplit(b, 3)

[array([[0, 1, 2, 3],
        [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]]), array([[16, 17, 18, 19],
        [20, 21, 22, 23]])]

### copy data

In [170]:
a = np.arange(4)
b = a 

In [171]:
print(id(a))
print(id(b))

140589028928128
140589028928128


using view()  
creates a new array object that looks at the same data.   
只改变了引用，其中的值的指向未变

In [172]:
c = a.view()
print(c)
print(id(a))
print(id(c))

[0 1 2 3]
140589028928128
140589028928368


In [174]:
c[2] = 88
print(a)

[ 0  1 88  3]


using copy()  
makes a complete copy of the array and its data.  
完全复制了数据，引用变了，其中的值也变了

In [179]:
a = np.arange(4)
d = a.copy()

In [180]:
print(d)
print(id(a))
print(id(d))

[0 1 2 3]
140589028730960
140589028730800


In [181]:
d[2] = 88
print(a)

[0 1 2 3]


### arg

In [184]:
a = np.random.random((3, 4))
print(a)

[[0.34072692 0.47163078 0.66349133 0.52282792]
 [0.16150948 0.29945252 0.18677508 0.09691227]
 [0.60727063 0.60997801 0.17988585 0.25905712]]


In [190]:
# argmax 取最大值的索引
max_index = a.argmax(axis = 0)
print(max_index)

[2 2 0 0]


In [192]:
a_max = a[max_index, range(a.shape[1])]
print(a_max)

[0.60727063 0.60997801 0.66349133 0.52282792]


In [193]:
# 同理有argmin

### tile
数据扩展，按行倍数、列倍数

In [200]:
a = np.arange(0, 40, 10)
a

array([ 0, 10, 20, 30])

In [199]:
np.tile(a, [2, 3])

array([[ 0, 10, 20, 30,  0, 10, 20, 30,  0, 10, 20, 30],
       [ 0, 10, 20, 30,  0, 10, 20, 30,  0, 10, 20, 30]])

### sort

In [207]:
a = np.array([[4, 3, 5], [1 , 2, 1]])
print(a)

[[4 3 5]
 [1 2 1]]


In [208]:
b = np.sort(a, axis = 0)
print(b)

[[1 2 1]
 [4 3 5]]


In [209]:
a.sort(axis = 0)
print(a)

[[1 2 1]
 [4 3 5]]


In [211]:
a = np.array([[4, 3, 5], [1 , 2, 1]])
np.sort(a, axis = 1)

array([[3, 4, 5],
       [1, 1, 2]])

### argsort
取按sort的索引

In [212]:
a = np.array([3, 2, 5, 1])
print(a)

[3 2 5 1]


In [214]:
b = np.argsort(a)
print(b)

[3 1 0 2]


In [216]:
b = a.argsort()
print(b)

[3 1 0 2]


In [217]:
print(a)

[3 2 5 1]


In [218]:
a[b]

array([1, 2, 3, 5])

### set operation
交集、差集、并集

In [1]:
import numpy as np

In [2]:
a = np.array([1, 3, 4, 6, 7])
b = np.array([2, 4, 5, 8])

差集

In [3]:
np.setdiff1d(a, b)

array([1, 3, 6, 7])

In [5]:
a

array([1, 3, 4, 6, 7])

异或，

In [4]:
np.setxor1d(a, b)

array([1, 2, 3, 5, 6, 7, 8])

交集

In [6]:
np.intersect1d(a, b)

array([4])

并集

In [7]:
np.union1d(a, b)

array([1, 2, 3, 4, 5, 6, 7, 8])

In [26]:
a = 'abcd'
print(a[-1:-5:-1])

dcba
