In [2]:
#coding=utf-8
import sys
import os
import copy


sys.path.insert(0, "../")
from common import get_tb_info
from common import debug_line

%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


# <font color=blue size=72>一、第一部分</font>

## <font color=blue>1.1 cython的速度提升</font> 

In [16]:
%%cython
import numpy as np
cimport numpy as cnp

from scipy.stats import poisson
poi = poisson(10.0)
n = 100
pmf = poi.pmf(np.arange(n))

def shannon_entropy_cy(cnp.ndarray p_x):
    return - np.sum(p_x * np.log(p_x))

def shannon_entropy_py(p_x):
    return - np.sum(p_x * np.log(p_x))

In [18]:
%%timeit
poi.entropy()

885 µs ± 26.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [19]:
%%timeit
shannon_entropy_py(pmf)

5.28 µs ± 25.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [20]:
%%timeit
shannon_entropy_cy(pmf)

5.12 µs ± 152 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [22]:
print(poi.entropy(),"|", shannon_entropy_py(pmf), "|",shannon_entropy_cy(pmf))

2.5614099352749125 | 2.5614099352749125 | 2.5614099352749125


In [27]:
%%cython

cimport numpy as cnp
from libc.math cimport log as clog

cpdef shannon_entropy_v1(cnp.ndarray p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        res += p_x[i] * clog(p_x[i])
    return -res

In [28]:
%%timeit
shannon_entropy_v1(pmf)

32.4 µs ± 692 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [24]:
%%cython

cimport numpy as cnp
from libc.math cimport log as clog

cpdef shannon_entropy_v2(cnp.ndarray[double] p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        res += p_x[i] * clog(p_x[i])
    return -res

In [25]:
%%timeit
shannon_entropy_v2(pmf)

1.37 µs ± 21.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [49]:
%%cython

cimport cython
cimport numpy as cnp
from libc.math cimport log

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef shannon_entropy_v3(cnp.ndarray[double] p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        res += p_x[i] * log(p_x[i])
    return -res

In [50]:
%%timeit
shannon_entropy_v3(pmf)

1.33 µs ± 12.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [52]:
%%cython 

cimport cython
from libc.math cimport log

@cython.boundscheck(False)
@cython.wraparound(False)
def shannon_entropy_v4(double[::1] p_x):
    cdef double res = 0.0
    cdef int n = p_x.shape[0]
    cdef int i
    for i in range(n):
        res += p_x[i] * log(p_x[i])
    return -res

In [53]:
%%timeit
shannon_entropy_v4(pmf)

1.1 µs ± 24.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [54]:
print(poi.entropy())
print(shannon_entropy_v1(pmf))
print(shannon_entropy_v2(pmf))
print(shannon_entropy_v3(pmf))
print(shannon_entropy_v4(pmf))

2.5614099352749125
2.5614099352749107
2.5614099352749107
2.5614099352749107
2.5614099352749107


## <font color=blue>1.2 profiler，更详细的代码见笔记本linshi_env</font> 

In [64]:
from line_profiler import LineProfiler
import random

def do_stuff(numbers):
    s = sum(numbers)
    l = [numbers[i]/43 for i in range(len(numbers))]
    m = ['hello'+str(numbers[i]) for i in range(len(numbers))]
    return m    



numbers = [i for i in range(10000)]
lp = LineProfiler()
lp_wrapper = lp(do_stuff)
lp_wrapper(numbers)
lp.print_stats()

Timer unit: 1e-06 s

Total time: 0.007101 s
File: <ipython-input-64-4d80a9c9513d>
Function: do_stuff at line 4

Line #      Hits         Time  Per Hit   % Time  Line Contents
     4                                           def do_stuff(numbers):
     5         1         82.0     82.0      1.2      s = sum(numbers)
     6         1       1947.0   1947.0     27.4      l = [numbers[i]/43 for i in range(len(numbers))]
     7         1       5071.0   5071.0     71.4      m = ['hello'+str(numbers[i]) for i in range(len(numbers))]
     8         1          1.0      1.0      0.0      return m    



In [65]:
%%timeit
do_stuff(numbers)

3.28 ms ± 84.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [67]:
import cProfile
import re
cProfile.run('re.compile("foo|bar")')

## <font color=blue>1.3 cython与c的连接，见connect_c文件夹【unfix】</font> 

## <font color=blue>1.4 并行-ray的语法，见ray文件夹【unfix】</font> 