# python data analysis
## appendix
### 变量和按引用传递

In [1]:
a = [1, 2, 3]  
# 变量赋值的过程可以理解为，1.创建一个对象，2.创建一个变量（名），3.将这个变量和对象绑定，变量指向该对象
# 变量使用，以参数形式传递给函数，传入一个引用，不是将对象复制到函数中去

In [2]:
b = a
a.append(4)
print(b)
# a, b是变量，是指向同一个对象的两个引用

[1, 2, 3, 4]


### 类型

In [3]:
a = [1, 2, 3]
# a，一个变量，指向一个对象（list）
# [1, 2, 3]，一个（list）对象，包含该对象的类型信息等

In [4]:
a = 6
type(a)
a = 'abc'
type(a)
# 变量a不包含对象的类型信息，其指向的对象包含

int

str

In [5]:
c = 3.1415926
isinstance(c, (int, float)) # 判断变量（指向的对象）是否属于某个（些）类型（之一）

True

### 属性和方法

In [6]:
# attribute：存储在对象内部的其他python对象
# method：与对象有关的能够访问其内部数据的函数

In [7]:
a = 'manunited'

In [None]:
a.<Tab> # 返回a的所有方法

In [8]:
getattr(a, 'split') # 返回特定方法是否属于该对象

<function str.split>

### 引入模块、函数

In [None]:
import moduleName
a = moduleName.funName(...)

import moduleName as defNM
a = defNM.funName(...)

from moduleName import funName
a = funName(...)

from moduleName import funName as defFN
a = defFN(...)

### 二元运算、比较运算

In [9]:
a = [1, 2, 3]
b = a
c = list(a)

In [10]:
b is a # is 判断两个引用是否指向同一对象，is，is not常用来判断变量是否为None
c is a # list函数会创建新的列表

True

False

In [12]:
d = 7
e = 11
e // d # 取整除法
e ** d # 幂运算
e & d # and
e | d # or
e ^ d # xor 

1

19487171

3

15

12

In [13]:
f = 1
g = 1
f & g
f | g
f ^ g

1

1

0

### 惰性

In [14]:
a = b = c = 5
d = a + b * c # python 急性子的语言，计算结果和表达式都是立即求值的，此处，先计算b * c的结果25，再加上a
# 利用iterator和generator等可以实现惰性/延迟运算，不会立即计算中间结果

### 可变、不可变对象

In [15]:
a_list = ['foo', 2, [4, 5]] # list可变
a_list[2] = (3, 4)
a_list

['foo', 2, (3, 4)]

In [16]:
# 不可变的immutable，是指不能修改内存块的数据。即便修改了，实际是创建了一个信对象，并将其引用赋值给原变量
a_tuple = (3, 5, (3, 4)) # tuple 是不可变对象
a_tuple[1] = 'four'

TypeError: 'tuple' object does not support item assignment

### 标量类型

|类型|说明|
|--|--|
|None|null值|
|str|字符串|
|float|浮点型|
|bool|布尔型|
|int|整型（带符号整数）|
|long|长整型（带符号整数，任意精度）|


### 数值类型

In [17]:
ival = 123456789
ival ** 3

fval = 1.23456
fval1 = 1.23e-7

3 / 2
3 // 2

cval = 1 + 2j # j表示虚数
cval * (1 - 2j)

1881676371789154860897069

1.5

1

(5+0j)

### 字符串

In [18]:
a = 'one way of writing a string'
b = "another way"

c = '''
this is a long string that
sapans multiple lines
'''
d = """
to write a multiple string 
in another way 
"""

In [20]:
e = 'string is immutable' # string不可变对象
e[7] = 7 # error

TypeError: 'str' object does not support item assignment

In [21]:
f = e.replace('string', 'longer string') # replace方法是创建了新的对象
f

'longer string is immutable'

In [22]:
g = 3.7
h = str(g)

s = 'python'
list(s)
s[:3]

s = '12\\34'  # backslash \, escape character
print(s)


s = r'this\has\no\special\characters' # r''
s

a = 'this is the first half'
b = 'and this is the second half'
a + b

['p', 'y', 't', 'h', 'o', 'n']

'pyt'

12\34


'this\\has\\no\\special\\characters'

'this is the first halfand this is the second half'

In [23]:
template = '%.2f %s are worth $%d' # 字符串格式化输出
template % (4.567, 'Argentine Pesos', 1)

'4.57 Argentine Pesos are worth $1'

### Booleans 布尔值

In [24]:
True and True
False and True

True

False

In [25]:
a = [1, 2, 3]
if a:
    print('I found something!')

b = []
if not b:
    print('Empty!')

I found something!
Empty!


In [26]:
bool([]), bool([1, 2, 3])
bool('Hello World!'), bool('')
bool(0), bool(1)

(False, True)

(True, False)

(False, True)

### Type casting 类型转换

In [27]:
s = '3.14159'
fval = float(s)
type(fval)
int(fval)
bool(fval)

float

3

True

### None

In [28]:
# it’s worth bearing in mind that None is not a reserved keyword but rather a unique instance of NoneType
a = None
a is None

b = 1
b is not None

True

True

In [29]:
def add_and_maybe_multiple(a, b, c=None): # None 作为参数默认值
    result = a + b
    if c is not None:
        result = result * c
    return result

In [30]:
add_and_maybe_multiple(1, 2, 3)

9

### Dates and Times

In [39]:
from datetime import datetime, date, time
dt = datetime(2018, 6, 22, 9, 45, 59)
dt.day
dt.minute
dt.date()
dt.time()

22

45

datetime.date(2018, 6, 22)

datetime.time(9, 45, 59)

In [40]:
dt.strftime('%m%d%Y %H:%M')
dt.replace(minute = 0, second = 0)

'06222018 09:45'

datetime.datetime(2018, 6, 22, 9, 0)

In [41]:
datetime.strptime('20180202', '%Y%m%d')

datetime.datetime(2018, 2, 2, 0, 0)

In [43]:
dt2 = datetime(2018, 5, 26)
delta = dt2 - dt
delta
type(delta)

datetime.timedelta(-28, 51241)

datetime.timedelta

In [44]:
dt + delta

datetime.datetime(2018, 5, 26, 0, 0)

## 控制流
### 条件判断
`if elif else`

In [46]:
def equal0(x):
    if (x < 0):
        print('It\'s negative')
    elif (x == 0):
        print('equal to 0')
    else:
        print('positive')

In [47]:
equal0(7)

positive


In [48]:
a = 5
b = 7
c = 8
d = 4
if a < b or c > d:  # c > d 不会被计算，python立即计算结果
    print('made it')

made it


### 循环
`for`

In [49]:
seq = [1, 2, None, 4, None, 5]
total = 0
for value in seq:
	if value is None:
		continue
	total += value
total

12

In [50]:
seq = [1, 2, 0, 4, 6, 5, 2, 1]
total_til_5 = 0
for i in seq:
	if i == 5:
		break
	total_til_5 += i
total_til_5

13

`while`

In [51]:
x = 256
total = 0
while x > 0:
	if total > 500:
		break
	total += x
	x = x // 2
total

504

### 空语句
`pass`

In [52]:
def equal0(x):
	if x < 0:
		print('negative')
	elif x == 0:
		pass #空操作
	else:
		print('positive')
equal0(7)
equal0(0)
equal0(-7)

positive
negative


### 异常处理

In [53]:
float('3.1415')

3.1415

In [54]:
float('something') # ValueError

ValueError: could not convert string to float: 'something'

In [55]:
# 处理 ValueError
def attempt_float(x):
	try:
		return float(x)
	except ValueError: # try语句发生异常时，执行except语句
		return x

In [56]:
attempt_float('3.1415')
attempt_float('something')

3.1415

'something'

In [57]:
float((1, 2)) #TypeError

TypeError: float() argument must be a string or a number, not 'tuple'

In [58]:
attempt_float((1, 2)) #TypeError

TypeError: float() argument must be a string or a number, not 'tuple'

In [59]:
# 处理 ValueError TypeError
def attempt_float1(x):
	try:
		return float(x)
	except (ValueError, TypeError):
		return x

attempt_float1((1, 2))

(1, 2)

In [None]:
f = open(path, 'w')
try:
	write_to_file(f)
finally: # 无论try语句成功与否，finally后的语句都执行
	f.close()

In [None]:
f = open(path, 'w')
try:
	write_to_file(f)
except:
	print('Failed')
else: # try语句成功时，执行else语句
	print('Succeeded')
finally:
	f.close()

In [62]:
range(10) # 返回一个用于逐个产生整数的迭代器
list(range(10))

range(0, 10)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [63]:
range(0, 20, 2)
list(range(0, 20, 2))

range(0, 20, 2)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [65]:
seq = [1, 2, 3, 4, 5]
for i in range(len(seq)):
    val = seq[i]
    print(val)

1
2
3
4
5


In [69]:
sum = 0
for i in range(10000):
    if (i % 3 == 0) or (i % 5 == 0):
        sum += i
sum

23331668

### 三元表达式

In [72]:
x = 5
'Non_negative' if x > 0 else 'Negative' #将一个if-else块转化为一行

'Non_negative'

## 数据结构
### 元组 tuple

In [73]:
tup = (2, 3, 7, 11, 18)
nested_tup = ((2, 3, 7), (11, 18))
tuple([2, 3, 7])
a_tup = tuple('string')
a_tup[2]

(2, 3, 7)

'r'

In [77]:
tup = tuple('foo', [1, 2], True)

TypeError: tuple() takes at most 1 argument (3 given)

In [75]:
tup[1].append(3) # ? 怎么解释

AttributeError: 'int' object has no attribute 'append'

In [76]:
tup

(2, 3, 7, 11, 18)