# 成為資料分析師 | Python 與資料科學應用

> NumPy 101：隨堂練習參考解答

## 郭耀仁

In [1]:
import numpy as np

## 隨堂練習：創建一個九九乘法表的二維陣列

In [2]:
def create_99_array():
    """
    >>> create_99_array()
    array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9],
           [ 2,  4,  6,  8, 10, 12, 14, 16, 18],
           [ 3,  6,  9, 12, 15, 18, 21, 24, 27],
           [ 4,  8, 12, 16, 20, 24, 28, 32, 36],
           [ 5, 10, 15, 20, 25, 30, 35, 40, 45],
           [ 6, 12, 18, 24, 30, 36, 42, 48, 54],
           [ 7, 14, 21, 28, 35, 42, 49, 56, 63],
           [ 8, 16, 24, 32, 40, 48, 56, 64, 72],
           [ 9, 18, 27, 36, 45, 54, 63, 72, 81]])
    """
    A = np.arange(1, 10, dtype=int).reshape(9, 1)
    B = np.arange(1, 10, dtype=int).reshape(1, 9)
    return np.dot(A, B)

## 隨堂練習：寫作一個可以計算陣列[變異數](https://zh.wikipedia.org/zh-tw/%E6%96%B9%E5%B7%AE)的函式 `var(x)`

$$Var(x) = \frac{1}{N}\sum_{n=1}^{N}(x_n - \bar{x})^2$$

In [3]:
def var(x):
    """
    >>> var(np.arange(10))
    8.25
    >>> var(np.arange(100))
    833.25
    """
    N = x.size
    x_bar = x.mean()
    errors = x - x_bar
    squared_errors = errors**2
    sum_squared_errors = squared_errors.sum()
    return sum_squared_errors / N

## 隨堂練習：寫作一個可以計算陣列[標準差](https://zh.wikipedia.org/wiki/%E6%A8%99%E6%BA%96%E5%B7%AE)的函式 `std(x, ddof=0)`

$$SD(x) = \sqrt{\frac{1}{N- ddof}\sum_{i=1}^{N}(x_i - \bar{x})^2}$$

In [4]:
def std(x, ddof=0):
    """
    >>> std(np.arange(10))
    2.8722813232690143
    >>> std(np.arange(10), 1)
    3.0276503540974917
    >>> std(np.arange(1))
    ValueError: The length of array must be larger than 1.
    """
    N = x.size
    if N == 1:
        raise ValueError('The length of array must be larger than 1.')
    x_bar = x.mean()
    errors = x - x_bar
    squared_errors = errors**2
    sum_squared_errors = squared_errors.sum()
    sd = np.sqrt(sum_squared_errors / (N - ddof))
    return sd

## 隨堂練習：計算 $x$ 與 $y$ 的[共變異數](https://zh.wikipedia.org/wiki/%E5%8D%8F%E6%96%B9%E5%B7%AE)

$$cov(x, y) = \frac{1}{N}\sum_{n=1}^{N}(x_n-\bar{x})(y_n-\bar{y})$$

In [5]:
def cov(x, y):
    """
    >>> np.random.seed(123)
    >>> x = np.random.randint(0, 50, 10)
    >>> y = np.random.randint(0, 50, 10)
    >>> cov(x, y)
    -54.7
    """
    N = x.size
    x_bar = x.mean()
    y_bar = y.mean()
    errors = (x - x_bar) * (y - y_bar)
    sum_errors = errors.sum()
    return sum_errors / N

## 隨堂練習：計算 $x$ 與 $y$ 的[相關係數](https://zh.wikipedia.org/wiki/%E7%9B%B8%E5%85%B3) $r_{xy}$

$$r_{xy} = \frac{cov(x, y)}{\sqrt{cov(x, x)cov(y, y)}}$$

In [6]:
def corr(x, y):
    """
    >>> np.random.seed(123)
    >>> x = np.random.randint(0, 50, 10)
    >>> y = np.random.randint(0, 50, 10)
    >>> corr(x, y)
    -0.3409853364175933
    """
    N = x.size
    x_bar = x.mean()
    y_bar = y.mean()
    cov_xy = ((x - x_bar) * (y - y_bar)).sum() / N
    cov_xx = ((x - x_bar)**2).sum() / N
    cov_yy = ((y - y_bar)**2).sum() / N
    return cov_xy / np.sqrt(cov_xx * cov_yy)

In [7]:
# %load ../test_cases/test_cases_02.py
import unittest

class TestNumpy(unittest.TestCase):
    def test_create_99_array(self):
        np.testing.assert_almost_equal(create_99_array(), np.array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9],
           [ 2,  4,  6,  8, 10, 12, 14, 16, 18],
           [ 3,  6,  9, 12, 15, 18, 21, 24, 27],
           [ 4,  8, 12, 16, 20, 24, 28, 32, 36],
           [ 5, 10, 15, 20, 25, 30, 35, 40, 45],
           [ 6, 12, 18, 24, 30, 36, 42, 48, 54],
           [ 7, 14, 21, 28, 35, 42, 49, 56, 63],
           [ 8, 16, 24, 32, 40, 48, 56, 64, 72],
           [ 9, 18, 27, 36, 45, 54, 63, 72, 81]]))
    def test_var(self):
        self.assertAlmostEqual(var(np.arange(10)), 8.25)
        self.assertAlmostEqual(var(np.arange(100)), 833.25)
    def test_std(self):
        self.assertAlmostEqual(std(np.arange(10)), 2.8722813232690143)
        self.assertAlmostEqual(std(np.arange(10), 1), 3.0276503540974917)
        self.assertRaises(ValueError, std, np.arange(1))
    def test_cov(self):
        np.random.seed(123)
        x = np.random.randint(0, 50, 10)
        y = np.random.randint(0, 50, 10)
        self.assertAlmostEqual(cov(x, y), -54.7)
    def test_corr(self):
        np.random.seed(123)
        x = np.random.randint(0, 50, 10)
        y = np.random.randint(0, 50, 10)
        self.assertAlmostEqual(corr(x, y), -0.3409853364175933)

suite = unittest.TestLoader().loadTestsFromTestCase(TestNumpy)
runner = unittest.TextTestRunner(verbosity=2)
test_results = runner.run(suite)

test_corr (__main__.TestNumpy) ... ok
test_cov (__main__.TestNumpy) ... ok
test_create_99_array (__main__.TestNumpy) ... ok
test_std (__main__.TestNumpy) ... ok
test_var (__main__.TestNumpy) ... ok

----------------------------------------------------------------------
Ran 5 tests in 0.008s

OK
