# Python 数据获取

## Tushare

In [None]:
#先引入后面分析、可视化等可能用到的库

import tushare as ts

import pandas as pd

import matplotlib.pyplot as plt

#正常显示画图时出现的中文和负号

from pylab import mpl

import json
from pathlib import Path

mpl.rcParams['font.sans-serif']=['SimHei']

mpl.rcParams['axes.unicode_minus']=False

source_path = Path("./py_assets_config.json")
configs = json.loads(source_path.read_text())
#设置token

# token='你的token'
token = configs['tushare_token']

#ts.set_token(token)

pro = ts.pro_api(token)

In [None]:
# 个股行情数据：
# pro.stock_basic()

# 参数：is_hs：是否沪深港通标的，N否、H沪股通、S深股通；list_status：上市状态，L上市、D退市、P暂停上市；exchange：交易所 SSE上交所，SZSE深交所，HKEX港交所。 

# pro.daily(ts_code= 或 trade_date=)
# 日行情：daily;周行情：weekly;月行情：monthly

#获取当前上市的股票代码、简称、注册地、行业、上市时间等数据

basic = pro.stock_basic(list_status='L')

#查看前五行数据

#basic.head(5)

In [None]:
#获取平安银行日行情数据

pa = pro.daily(ts_code='000001.SZ', start_date='20180101', end_date='20190106')

#pa.head()

In [None]:
#K线图可视化

from pyecharts import Kline

pa.index = pd.to_datetime(pa.trade_date)

pa = pa.sort_index()

v1 = list(pa.loc[:,['open','close','low','high']].values)

t = pa.index

v0 = list(t.strftime('%Y%m%d'))

kline = Kline("平安银行K线图",title_text_size=15)

kline.add("", v0, v1,is_datazoom_show=True,

mark_line=["average"],

mark_point=["max","min"],

mark_point_symbolsize=60,

mark_line_valuedim=['highest','lowest'])

#kline.render("上证指数图.html")

kline

In [None]:
#定义获取多只股票函数：

def get_stocks_data(stocklist,start,end):
    all_data={}
    for code in stocklist:
        all_data[code] = pro.daily(ts_code=code, start_date=start, end_date=end)
    return all_data

#保存本地

def save_data(all_data):
    for code,data in all_data.items():
        data.to_csv('./data/stock_data/'+code+'.csv',header=True,index=False)

stocklist = list(basic.ts_code)[:15]

start=''
end=''
all_data = get_stocks_data(stocklist,start,end)

all_data['000002.SZ'].tail()

#将数据保存到本地
save_data(all_data)

#读取本地文件夹里所有文件
import os

#文件存储路径

file='./data/stock_data/'

g = os.walk(file)

filenames=[]

for path,d,filelist in g:
    for filename in filelist:
        filenames.append(os.path.join(filename))

print(filenames)

#将读取的数据文件放入一个字典中
df = {}

#从文件名中分离出股票代码
code = [name.split('.')[0] for name in filenames]

for i in range(len(filenames)):
    filename = file + filenames[i]
    df[code[i]]=pd.read_csv(filename)

#查看第一只股票前五行数据
#df[code[0]].head()

In [None]:
# 指数数据: pro.index_daily(ts_code=)

def get_index_data(indexs):
    '''indexs是字典格式'''
    index_data={}
    for name, code in indexs.items():
        df = pro.index_daily(ts_code=code)
        df.index = pd.to_datetime(df.trade_date)
        index_data[name] = df.sort_index()
    return index_data

#获取常见股票指数行情
indexs={'上证综指':'000001.SH','深证成指':'399001.SZ',

'沪深300':'000300.SH','创业板指':'399006.SZ',

'上证50':'000016.SH','中证500':'000905.SH',

'中小板指':'399005.SZ','上证180':'000010.SH'}

index_data = get_index_data(indexs)

#index_data['上证综指'].head()

#对股价走势进行可视化分析

subjects = list(index_data.keys())

#每个子图的title

plot_pos = [421,422,423,424,425,426,427,428]# 每个子图的位置

new_colors = ['#1f77b4','#ff7f0e','#2ca02c','#d62728',

'#9467bd','#8c564b','#e377c2',

'#7f7f7f','#bcbd22','#17becf']

fig = plt.figure(figsize=(16,18))

fig.suptitle('A股股指走势',fontsize=18)

for pos in np.arange(len(plot_pos)):
    ax = fig.add_subplot(plot_pos[pos])
    y_data =index_data[subjects[pos]]['close']
    b = ax.plot(y_data,color=new_colors[pos])
    ax.set_title(subjects[pos])

# 将右上边的两条边颜色设置为空，相当于抹掉这两条边

ax = plt.gca()

ax.spines['right'].set_color('none')

ax.spines['top'].set_color('none')

plt.show()

## 证劵宝

In [None]:
# 证券宝链接地址:http://baostock.com/baostock/index.php/Python_API文档  。安装：进入cmd模式，pip install baostock

import baostock as bs

#### 登陆系统 ####

lg = bs.login()

#### 获取历史K线数据 ####

# query_history_k_data()

fields="date,code,open,high,low,close"

rs = bs.query_history_k_data("sh.000001",fields,

start_date='2000-01-01', end_date='2018-09-07',

frequency="d", adjustflag="2")

#frequency="d"取日k线，adjustflag="3"默认不复权，

#1：后复权；2：前复权

data_list = []

while(rs.error_code =='0') & rs.next():
    # 获取一条记录，将记录合并在一起
    data_list.append(rs.get_row_data())

result = pd.DataFrame(data_list, columns=rs.fields)

result.index= pd.to_datetime(result.date)

#### 结果集输出到csv文件 ####
#result.to_csv("./data/history_k_data.csv", 
#        encoding="gbk", index=False)

result.head()

#### 登出系统 ####
#bs.logout()

result.info()

#将某些object转化numeric
result=result.apply(pd.to_numeric, errors='ignore')

result.info()

result.close.plot(figsize=(16,8))

ax = plt.gca()

ax.spines['right'].set_color('none')

ax.spines['top'].set_color('none')

plt.show()

## Yahoo

In [None]:
# 原来的雅虎财经Python开源库2018年后已不在维护，还好有大神推出了雅虎财经的修复版本，使用pip install fix_yahoo_finance安装。

import fix_yahoo_finance as fy

fy.pdr_override()

def get_data(tick,start_date="2000-01-01", end_date="2019-01-07"):
    data = fy.download(tick, start=start_date, end=end_date)
    return data

tickers=['AAPL','GOOG','AMZN','FB']

all_data = {}

for ticker in tickers:
    all_data[ticker]=get_data(ticker)
    
subjects = [
    '苹果公司股价走势',
    '谷歌公司股价走势',
    '亚马逊公司股价走势',
    'FaceBook公司股价走势'
]
# 每个子图的 title

plot_pos = [221,222,223,224] # 每个子图的位置

new_colors =['#1f77b4','#ff7f0e','#2ca02c','#d62728',

'#9467bd','#8c564b','#e377c2',

'#7f7f7f','#bcbd22','#17becf']

fig = plt.figure(figsize=(16,9))

fig.suptitle('美股&指数走势',fontsize=18)

for pos in np.arange(len(plot_pos)):
    ax = fig.add_subplot(plot_pos[pos])
    y_data = all_data[tickers[pos]]['Adj Close']
    b = ax.plot(y_data,color=new_colors[pos])
    ax.set_title(subjects[pos])

ax = plt.gca()

ax.spines['right'].set_color('none')

ax.spines['top'].set_color('none')

plt.show()

In [None]:
# 全球股指

WorldStockIndexList = {

'000001.SS':'中国上证指数',

'^DJI':'道琼斯工业平均指数',

'^IXIC':'纳斯达克综合指数',

'^N225':'日本日经225指数',

'^HSI':'香港恒生指数',

'^FCHI':'法国CAC40指数',

'^FTSE':'英国富时100指数',

'^GDAXI':'德国法兰克福DAX指数'}

world_data={}

for ticker in WorldStockIndexList.keys():
    world_data[ticker]=get_data(ticker)

subjects = list(WorldStockIndexList.values())

tickers = list(WorldStockIndexList)

#每个子图的title

plot_pos = [421,422,423,424,425,426,427,428]
# 每个子图的位置

new_colors = ['#1f77b4','#ff7f0e','#2ca02c','#d62728',

'#9467bd','#8c564b','#e377c2',

'#7f7f7f','#bcbd22','#17becf']

fig = plt.figure(figsize=(16,18))

fig.suptitle('全球股指走势',fontsize=18)

for pos in np.arange(len(plot_pos)):
    ax = fig.add_subplot(plot_pos[pos])
    y_data =world_data[tickers[pos]]['Adj Close']
    b = ax.plot(y_data,color=new_colors[pos])
    ax.set_title(subjects[pos])

ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')

plt.show()