# 1 Estatísticas com arrays Numpy

https://numpy.org/doc/1.16/reference/arrays.ndarray.html#calculation

https://numpy.org/doc/1.16/reference/routines.statistics.html

https://numpy.org/doc/1.16/reference/routines.math.html

https://numpy.org/doc/1.16/reference/generated/numpy.column_stack.html

In [3]:
import numpy as np

In [6]:
anos = np.loadtxt(fname = "data/carros-anos.txt", dtype = int)
km = np.loadtxt(fname = "data/carros-km.txt")
valor = np.loadtxt(fname = "data/carros-valor.txt")

In [24]:
valor.reshape(-1,1).shape

(258, 1)

In [27]:
anos.shape

(258,)

In [28]:
# Retorna um Fork ou seja, um array unidimensional em colunas de array bidimensional
dataset = np.column_stack((anos, km, valor))
dataset

array([[2.0030000e+03, 4.4410000e+04, 8.8078640e+04],
       [1.9910000e+03, 5.7120000e+03, 1.0616194e+05],
       [1.9900000e+03, 3.7123000e+04, 7.2832160e+04],
       [2.0190000e+03, 0.0000000e+00, 1.2454907e+05],
       [2.0060000e+03, 2.5757000e+04, 9.2612100e+04],
       [2.0120000e+03, 1.0728000e+04, 9.7497730e+04],
       [2.0190000e+03, 0.0000000e+00, 5.6445200e+04],
       [2.0090000e+03, 7.7599000e+04, 1.1231044e+05],
       [2.0100000e+03, 9.9197000e+04, 1.2071627e+05],
       [2.0110000e+03, 3.7978000e+04, 7.6566490e+04],
       [2.0020000e+03, 1.2859000e+04, 7.1647590e+04],
       [2.0070000e+03, 8.0520000e+03, 7.3919530e+04],
       [2.0010000e+03, 8.9773000e+04, 1.1273299e+05],
       [2.0190000e+03, 0.0000000e+00, 5.3183380e+04],
       [2.0090000e+03, 4.1457000e+04, 1.2748842e+05],
       [2.0160000e+03, 1.1560700e+05, 5.9910400e+04],
       [2.0120000e+03, 4.6449000e+04, 6.1118590e+04],
       [2.0190000e+03, 0.0000000e+00, 8.8552390e+04],
       [2.0160000e+03, 3.708

In [29]:
dataset.shape

(258, 3)

In [25]:
a = np.array([2,4,6,8])
b = np.array([1,3,5,7])

In [26]:
# Operações com vetores

a + 2

array([ 4,  6,  8, 10])

In [13]:
a // 2

array([1, 2, 3, 4])

In [15]:
a ** 2

array([ 4, 16, 36, 64])

In [17]:
a // 3

array([0, 1, 2, 2])

In [20]:
A = np.ones((2,2))
B = 10*np.ones((2,2))

In [21]:
A @ B

array([[20., 20.],
       [20., 20.]])

In [22]:
A = np.ones((2,2))
B = 10*np.ones((2,2))
C = np.dot(A,B)
C

array([[20., 20.],
       [20., 20.]])

## *np.mean()*

Retorna a média dos elementos do array ao longo do eixo especificado.

In [31]:
np.mean(dataset)

48489.14648578811

In [34]:
np.mean(dataset, axis=0)

array([ 2007.51162791, 44499.41472868, 98960.51310078])

In [37]:
print('Média de Km rodados: ' ,np.mean(dataset[:,1]))
print('Média de preços %.2f: ' %np.mean(dataset[:,2]))

Média de Km rodados:  44499.41472868217
Média de preços 98960.51: 


In [39]:
np.sum(dataset, axis=0)

array([  517938.        , 11480849.        , 25531812.37999999])

In [40]:
np.sum(dataset[:,1])

11480849.0

## *np.std()*

Retorna o desvio padrão dos elementos do array ao longo do eixo especificado.

In [38]:
print('Desvio padrão de Km rodados: ' ,np.std(dataset[:,1]))

Desvio padrão de Km rodados:  39859.82699005149


In [41]:
u = np.arange(5)
v = np.exp(u)
print(v)

[ 1.          2.71828183  7.3890561  20.08553692 54.59815003]


In [42]:
v = np.sin(u)
print(v)

[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025 ]


## *ndarray.sum()*

Retorna a soma dos elementos do array ao longo do eixo especificado.

# Multiplicação entre matrizes

Método dot retorna o produto escalar de duas matrizes: 
https://numpy.org/doc/stable/reference/generated/numpy.dot.html



In [43]:
v = np.array([10,10])
A = np.arange(4).reshape(2,2)
u = A.dot(v)
print(u)

[10 50]


In [44]:
v

array([10, 10])

In [45]:
A

array([[0, 1],
       [2, 3]])

In [46]:
A = np.ones((2,2))
B = 10*np.ones((2,2))
C = np.dot(A,B)
C

array([[20., 20.],
       [20., 20.]])

In [47]:
A @ B

array([[20., 20.],
       [20., 20.]])

In [48]:
A.transpose()

array([[1., 1.],
       [1., 1.]])

In [49]:
A.T

array([[1., 1.],
       [1., 1.]])

In [50]:
B = np.array([2,4,6,8,90])
B

array([ 2,  4,  6,  8, 90])

# Álgebra Linear

O NumPy possui um submódulo específico para a realização de operações típicas da álgebra linear chamado linalg. Com este módulo é possível resolver sistemas lineares, obter inversas de matrizes, calcular autovalores e autovetores, etc.

In [1]:
import numpy as np

In [2]:
A = np.array([10,20,30,40]).reshape(2,2)
b = np.array([5,10])
x = np.linalg.solve(A,b)
x

array([0.  , 0.25])

In [6]:
b1 = np.array([5, 10]).reshape(2,1)
b2 = np.array([10, 12]).reshape(2,1)
b = np.hstack([b1, b2])
print("b =\n", b)
x = np.linalg.solve(A, b)
print("x =\n", x)

b =
 [[ 5 10]
 [10 12]]
x =
 [[ 0.   -0.8 ]
 [ 0.25  0.9 ]]


In [7]:
def fun(A, b1, b2):
    x1 = np.linalg.solve(A, b1)
    x2 = np.linalg.solve(A, b2)
    
A = np.random.rand(1000,1000)
b1 = np.random.rand(1000). reshape(1000,1)
b2 = np.random.rand(1000). reshape(1000,1)
b = np.hstack([b1,b2])

%time fun(A,b1,b2)
%time np.linalg.solve(A,b)

CPU times: user 394 ms, sys: 161 ms, total: 555 ms
Wall time: 253 ms
CPU times: user 231 ms, sys: 62.7 ms, total: 293 ms
Wall time: 117 ms


array([[ 0.98061004, -1.21274999],
       [-0.35511667,  2.00682458],
       [ 0.90859078,  0.14452455],
       ...,
       [-0.51809183,  0.3760868 ],
       [ 0.10411977,  0.0456418 ],
       [ 0.64239666, -1.34986821]])

In [8]:
A = np.array([10,20,30,40]).reshape(2,2)
inv_A = np.linalg.inv(A)
inv_A

array([[-0.2 ,  0.1 ],
       [ 0.15, -0.05]])

# Broadcasting



In [9]:
u = np.ones((4,4))
v = np.array([10,20,30,40]).reshape(1,4)
x = u*v
print('u =\n',u)
print('v =\n',v)
print('x =\n',x)


u =
 [[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
v =
 [[10 20 30 40]]
x =
 [[10. 20. 30. 40.]
 [10. 20. 30. 40.]
 [10. 20. 30. 40.]
 [10. 20. 30. 40.]]


In [10]:
arr1 = np.array([0,1,2])
arr2 = np.array([3,4,5])
print('Correlation: \n', np.correlate(arr1,arr2))

Correlation: 
 [14]


In [11]:
take = np.matrix('[4,1,12,3,4,6,7]')
ls = take.take(2)
ls

matrix([[12]])

In [12]:
take

matrix([[ 4,  1, 12,  3,  4,  6,  7]])

In [14]:
n_array = np.array([[55, 25, 15],
                   [30,44,2],
                  [11,45,77]])
trace = np.trace(n_array)
trace

176