diff --git a/QUANTAXIS/QASU/crawl_eastmoney.py b/QUANTAXIS/QASU/crawl_eastmoney.py index 4bf54d403..163efbda8 100644 --- a/QUANTAXIS/QASU/crawl_eastmoney.py +++ b/QUANTAXIS/QASU/crawl_eastmoney.py @@ -1,21 +1,275 @@ - import os from QUANTAXIS.QAUtil import QALocalize -from QUANTAXIS_CRAWLY.run_selenium_alone import read_east_money_page_zjlx_to_sqllite +from QUANTAXIS_CRAWLY.run_selenium_alone import (read_east_money_page_zjlx_to_sqllite, open_chrome_driver, close_chrome_dirver) +import urllib +import pandas as pd +import time + +from QUANTAXIS.QAUtil import (DATABASE) + + + +def QA_request_eastmoney_zjlx( param_stock_code_list ): + # 改用 + strUrl = "http://data.eastmoney.com/zjlx/{}.html".format(param_stock_code_list[0]) + + # 延时 + time.sleep(1.223) + + response = urllib.request.urlopen(strUrl) + content = response.read() + + # 🛠todo 改用 re 正则表达式做匹配 + strings = content.decode("utf-8", "ignore") + string_lines = strings.split("\r\n") + + #for aline in string_lines: + # aline = aline.strip() + # if '_stockCode' in aline: + # _stockCode = aline[len('var _stockCode = '):] + # _stockCode = _stockCode.strip("\"\"\,") + + # if '_stockMarke' in aline: + # _stockMarke = aline[len('_stockMarke = '):] + # _stockMarke = _stockMarke.strip("\"\"\,") + # # 60XXXX , + #_stockMarke = 1 + + # 00XXXX , + # _stockMarke = 2 + + # 30XXXX , + # _stockMarke = 2 + + # if '_stockName' in aline: + # _stockName = aline[len('_stockName = '):] + # _stockName = _stockName.strip("\"\"\,") + + # if '_market' in aline: + # _market = aline[len('_market = '):] + # _market = _market.strip("\"\"\,") + # break + #_market= 'hsa' + + # print(_stockCode) + # print(_stockMarke) + # print(_stockName) + # print(_market) + + values = [] + for aline in string_lines: + aline = aline.strip() + if 'EM_CapitalFlowInterface' in aline: + # print(aline) + # print('------------------') + aline = aline.strip() + if aline.startswith('var strUrl = '): + if 'var strUrl = ' in aline: + aline = aline[len('var strUrl = '):] + values = aline.split('+') + # print(values) + break + # print('------------------') + + print(values) + + + for iStockCode in range(len(param_stock_code_list)): + requestStr = "" + + strCode = param_stock_code_list[iStockCode] + if strCode[0:2] == '60': + _stockMarke = '1' + elif strCode[0:2] == '00' or strCode[0:2] == '30': + _stockMarke = '2' + else: + print(strCode + " 暂不支持, 60, 00, 30 开头的股票代码") + return + + for iItem in values: + if '_stockCode' in iItem: + requestStr = requestStr + param_stock_code_list[iStockCode] + elif '_stockMarke' in iItem: + requestStr = requestStr + _stockMarke + else: + if 'http://ff.eastmoney.com/' in iItem: + requestStr = 'http://ff.eastmoney.com/' + else: + iItem = iItem.strip(' "') + iItem = iItem.rstrip(' "') + requestStr = requestStr + iItem + + # print(requestStr) + # 延时 + time.sleep(1.456) + + response = urllib.request.urlopen(requestStr) + content2 = response.read() + + # print(content2) + strings = content2.decode("utf-8", "ignore") + + # print(strings) + + list_data_zjlx = [] + + if 'var aff_data=({data:[["' in strings: + leftChars = strings[len('var aff_data=({data:[["'):] + # print(leftChars) + dataArrays = leftChars.split(',') + # print(dataArrays) + for aItemIndex in range(0, len(dataArrays), 13): + ''' + 日期 + 收盘价 + 涨跌幅 + 主力净流入 净额 净占比 + 超大单净流入 净额 净占比 + 大单净流入 净额 净占比 + 中单净流入 净额 净占比 + 小单净流入 净额 净占比 + ''' + dict_row = {} + + dict_row['stock_code'] = param_stock_code_list[iStockCode] + + # 日期 + # print(aItemIndex) + data01 = dataArrays[aItemIndex] + data01 = data01.strip('"') + # print('日期',data01) + + dict_row['date'] = data01 + + # 主力净流入 净额 + data02 = dataArrays[aItemIndex + 1] + data02 = data02.strip('"') + # print('主力净流入 净额',data02) + + dict_row['zljll_je_wy'] = data02 + + # 主力净流入 净占比 + data03 = dataArrays[aItemIndex + 2] + data03 = data03.strip('"') + # print('主力净流入 净占比',data03) + # date01 = aItemData.strip('[\'\'') + + dict_row['zljll_jzb_bfb'] = data03 + + # 超大单净流入 净额 + data04 = dataArrays[aItemIndex + 3] + data04 = data04.strip('"') + # print('超大单净流入 净额',data04) + + dict_row['cddjll_je_wy'] = data04 + + # 超大单净流入 净占比 + data05 = dataArrays[aItemIndex + 4] + data05 = data05.strip('"') + # print('超大单净流入 净占比',data05) + + dict_row['cddjll_je_jzb'] = data05 + + # 大单净流入 净额 + data06 = dataArrays[aItemIndex + 5] + data06 = data06.strip('"') + # print('大单净流入 净额',data06) + + dict_row['ddjll_je_wy'] = data06 + + # 大单净流入 净占比 + data07 = dataArrays[aItemIndex + 6] + data07 = data07.strip('"') + # print('大单净流入 净占比',data07) + + dict_row['ddjll_je_jzb'] = data07 + + # 中单净流入 净额 + data08 = dataArrays[aItemIndex + 7] + data08 = data08.strip('"') + # print('中单净流入 净额',data08) + + dict_row['zdjll_je_wy'] = data08 + + # 中单净流入 净占比 + data09 = dataArrays[aItemIndex + 8] + data09 = data09.strip('"') + # print('中单净流入 净占比',data09) + + dict_row['zdjll_je_jzb'] = data09 + + # 小单净流入 净额 + data10 = dataArrays[aItemIndex + 9] + data10 = data10.strip('"') + # print('小单净流入 净额',data10) + + dict_row['xdjll_je_wy'] = data10 + + # 小单净流入 净占比 + data11 = dataArrays[aItemIndex + 10] + data11 = data11.strip('"') + # print('小单净流入 净占比',data11) + + dict_row['xdjll_je_jzb'] = data11 + + # 收盘价 + data12 = dataArrays[aItemIndex + 11] + data12 = data12.strip('"') + # print('收盘价',data12) + + dict_row['close_price'] = data12 + + # 涨跌幅 + data13 = dataArrays[aItemIndex + 12] + data13 = data13.strip('"') + data13 = data13.strip('"]]})') + # print('涨跌幅',data13) + + dict_row['change_price'] = data13 + + # 读取一条记录成功 + # print("成功读取一条记录") + # print(dict_row) + list_data_zjlx.append(dict_row) + + # print(list_data_zjlx) + + df = pd.DataFrame(list_data_zjlx) + + # print(df) + + client = DATABASE + coll_stock_zjlx = client.eastmoney_stock_zjlx + + # coll_stock_zjlx.insert_many(QA_util_to_json_from_pandas(df)) + + for i in range(len(list_data_zjlx)): + aRec = list_data_zjlx[i] + + # 🛠todo 当天结束后,获取当天的资金流相,当天的资金流向是瞬时间点的 + ret = coll_stock_zjlx.find_one(aRec) + if ret == None: + coll_stock_zjlx.insert_one(aRec) + print("🤑 插入新的记录 ", aRec) + else: + print("😵 记录已经存在 ", ret) -def QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode = None): +''' + 作为测试用例来获取, 对比 reqeust 方式的获取数据是否一致 +''' +def QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCodeList = None): # todo 🛠 check stockCode 是否存在有效合法 # todo 🛠 QALocalize 从QALocalize 目录中读取 固定位置存放驱动文件 print("📨当前工作路径文件位置 : ",os.getcwd()) - path_check = os.getcwd()+"/selenium_driver" + path_check = os.getcwd()+"/QUANTAXIS_WEBDRIVER" if os.path.exists(path_check) == False: print("😵 确认当前路径是否包含selenium_driver目录 😰 ") return else: - print(os.getcwd()+"/selenium_drive"," 目录存在 😁") + print(os.getcwd()+"/QUANTAXIS_WEBDRIVER"," 目录存在 😁") print("") path_for_save_data = QALocalize.download_path + "/eastmoney_stock_zjlx" @@ -31,10 +285,13 @@ def QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode = None): else: print(path_for_save_data,"目录存在!准备读取数据 😋") - full_path_name = path_for_save_data + "/" + stockCode + "_zjlx.sqlite.db" + browser = open_chrome_driver() - read_east_money_page_zjlx_to_sqllite(stockCode,full_path_name) + for indexCode in range(len(stockCodeList)): + full_path_name = path_for_save_data + "/" + stockCodeList[indexCode] + "_zjlx.sqlite.db" + read_east_money_page_zjlx_to_sqllite(stockCodeList[indexCode],full_path_name, browser) + close_chrome_dirver(browser) #创建目录 #启动线程读取网页,写入数据库 #等待完成 \ No newline at end of file diff --git a/QUANTAXIS/QASU/main.py b/QUANTAXIS/QASU/main.py index dd31ff600..036ac64c4 100755 --- a/QUANTAXIS/QASU/main.py +++ b/QUANTAXIS/QASU/main.py @@ -226,13 +226,20 @@ def QA_SU_crawl_eastmoney(action="zjlx",stockCode=None): if stockCode=="all": #读取tushare股票列表代码 - print(" 一共需要获取 %d 个股票的 资金流向 , 需要大概 %d 小时" % (len(stockItems), (len(stockItems)*30)/60/60 )) + print("💪 一共需要获取 %d 个股票的 资金流向 , 需要大概 %d 小时" % (len(stockItems), (len(stockItems)*5)/60/60 )) + + code_list = [] for stock in stockItems: + code_list.append(stock['code']) #print(stock['code']) - crawl_eastmoney_file.QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode=stock['code']) + crawl_eastmoney_file.QA_read_eastmoney_zjlx_web_page_to_sqllite(code_list) #print(stock) return else: #todo 检查股票代码是否合法 - return crawl_eastmoney_file.QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode=stockCode) + #return crawl_eastmoney_file.QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode=stockCode) + code_list = [] + code_list.append(stockCode) + return crawl_eastmoney_file.QA_request_eastmoney_zjlx(param_stock_code_list=code_list) + diff --git a/QUANTAXIS/QASU/save_to_db_fields_description.py b/QUANTAXIS/QASU/save_to_db_fields_description.py index 3cb018061..acf32784d 100644 --- a/QUANTAXIS/QASU/save_to_db_fields_description.py +++ b/QUANTAXIS/QASU/save_to_db_fields_description.py @@ -73,6 +73,25 @@ {"meigujingzichan": "每股净资产"}, {"baoliu2": "保留"} ] + }, + { + 'eastmoney_stock_zjlx': + [ + {"stock_code" : "股票代码"}, + {"date" : "日期"}, + {"zljll_je_wy" : "主力资金流入(金额)单位万元"}, + {"zljll_jzb_bfb" : "主力资金流入(净占比)%"}, + {"cddjll_je_wy" : "超级大单流入(金额)单位万元"}, + {"cddjll_je_jzb" : "超级大单流入(净占比)"}, + {"ddjll_je_wy" : "大单资金流入(金额)单位万元"}, + {"ddjll_je_jzb" : "大单资金流入(净占比)%"}, + {"zdjll_je_wy" : "中单资金流入(金额)单位万元"}, + {"zdjll_je_jzb" : "中单资金流入(净占比)%"}, + {"xdjll_je_wy" : "小单资金流入(金额)单位万元"}, + {"xdjll_je_jzb" : "小单资金流入(净占比)"}, + {"close_price" : "收盘价"}, + {"change_price" : "涨跌幅"} + ] } diff --git a/QUANTAXIS_CRAWLY/eastmoney_simulation_web_trader.py b/QUANTAXIS_CRAWLY/eastmoney_simulation_web_trader.py new file mode 100644 index 000000000..556ea0664 --- /dev/null +++ b/QUANTAXIS_CRAWLY/eastmoney_simulation_web_trader.py @@ -0,0 +1,110 @@ +''' + +''' +import time +import re +from selenium import webdriver +import sys +from selenium.common.exceptions import NoSuchElementException +import sqlite3 + +class SingletonMeta(type): + def __init__(cls, name, bases, namespaces): + super().__init__(cls, name, bases, namespaces) + cls.instance = None + + def __call__(cls, *args, **kwargs): + if cls.instance is None: + cls.instance = super().__call__(*args, **kwargs) + else: + print("instance already existed!") + return cls.instance + + + +class EastMoneySimulationWebTrader(): + + + def startTrade(self): + + urls = 'http://www.eastmoney.com/' #登陆到我的东方财富 + pa = re.compile(r'\w+') + + self.webdriver_parent_path = './QUANTAXIS_WEBDRIVER/macos/' + + if sys.platform == 'darwin': + browser = webdriver.Chrome(self.webdriver_parent_path+'chromedriver') + elif sys.platform == 'win32': + browser = webdriver.Chrome(self.webdriver_parent_path+'chromedriver') + elif sys.platform == 'linux': + browser = webdriver.Chrome(self.webdriver_parent_path+'chromedriver') + # todo 🛠 linux 下没有测试, linux 下 非gui环境下,用chrome headless driver + print("🎃") + print("🎃./selenium_driver/linux/chromedrive linux 平台上的的 🤖chromedriver 的路径") + print("🎃./selenium_driver/windows/chromedrive windows 平台上的的 🤖chromedriver 的路径") + print("🎃 https://npm.taobao.org/mirrors/chromedriver/ 🤖chromedriver下载地址") + print("🎃") + return + + # 启动chrome + print("🖼 准备获取数据, 打开chromedrive ,") + browser.set_page_load_timeout(30) # throw a TimeoutException when thepage load time is more than 15 seconds + #browser.minimize_window() + + print("🖼 正在请求数据中,请耐心等待 🍺 ⌛ ⌛ ⌛ ⌛ ⌛ ️") + #.get(urls) + #browser. + browser.get(urls) + + num = browser.window_handles + + print(type(num[0])) + + + browser.find_element_by_id('loginMenu').click() + + num = browser.window_handles + # browser.switch_to(num[1]) + time.sleep(1) # Let the page load + time.sleep(1) # Let the page load + + #spanTag = browser.find_elements_by_name('body') + + #browser.find_elements_by_id() + + #currHandle = browser.current_window_handler + browser.switch_to.window(num[1]) + + txt = browser.find_element_by_xpath('/html/body/div[1]/div/div/h1') + print(txt.text) + + frameLogIn = browser.find_element_by_id('frame_login') + browser.switch_to.frame(frameLogIn) + + + #account = browser.find_element_by_id('txt_account') + account = browser.find_elements_by_xpath('//*[@id="txt_account"]') + + #输入用户名 + account[0].send_keys('*********') + #输入密码 + password = browser.find_element_by_id('txt_pwd') + password.send_keys('*********') + + browser.find_element_by_id('btn_login').click() + # id; txt_account + # account; txt_pwd + + + time.sleep(1) # Let the page load + time.sleep(1) # Let the page load + time.sleep(1) # Let the page load + time.sleep(1) # Let the page load + time.sleep(1) # Let the page load + + + # 成功登陆东方财富 + + browser.quit() + +pass diff --git a/QUANTAXIS_CRAWLY/run_selenium_alone.py b/QUANTAXIS_CRAWLY/run_selenium_alone.py index 4601c1de9..8815f548d 100644 --- a/QUANTAXIS_CRAWLY/run_selenium_alone.py +++ b/QUANTAXIS_CRAWLY/run_selenium_alone.py @@ -4,36 +4,50 @@ import re from selenium import webdriver import sys +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.common.action_chains import ActionChains from selenium.common.exceptions import NoSuchElementException + import sqlite3 +import pandas as pd +import time +import timeit -def read_east_money_page_zjlx_to_sqllite(stockCode, save_sqlite_full_path_name): - urls = 'http://data.eastmoney.com/zjlx/{}.html'.format(stockCode) - pa=re.compile(r'\w+') +from QUANTAXIS.QAUtil import (DATABASE) +def open_chrome_driver(): if sys.platform == 'darwin': - browser = webdriver.Chrome('./selenium_driver/macos/chromedriver') + browser = webdriver.Chrome('./QUANTAXIS_WEBDRIVER/macos/chromedriver') elif sys.platform == 'win32': - browser = webdriver.Chrome('./selenium_driver/windows/chromedriver') + browser = webdriver.Chrome('./QUANTAXIS_WEBDRIVER/windows/chromedriver') elif sys.platform == 'linux': - browser = webdriver.Chrome('./selenium_driver/linux/chromedriver') - #todo 🛠 linux 下没有测试, linux 下 非gui环境下,用chrome headless driver + browser = webdriver.Chrome('./QUANTAXIS_WEBDRIVER/linux/chromedriver') + # todo 🛠 linux 下没有测试, linux 下 非gui环境下,用chrome headless driver print("🎃") print("🎃./selenium_driver/linux/chromedrive linux 平台上的的 🤖chromedriver 的路径") print("🎃./selenium_driver/windows/chromedrive windows 平台上的的 🤖chromedriver 的路径") print("🎃 https://npm.taobao.org/mirrors/chromedriver/ 🤖chromedriver下载地址") print("🎃") - return + return browser + +def close_chrome_dirver(browser): + browser.quit() + +def read_east_money_page_zjlx_to_sqllite(stockCode, save_sqlite_full_path_name, browser): + + urls = 'http://data.eastmoney.com/zjlx/{}.html'.format(stockCode) + pa=re.compile(r'\w+') # 启动chrome print("🖼 准备获取数据, 打开chromedrive ,") - browser.set_page_load_timeout(30) # throw a TimeoutException when thepage load time is more than 15 seconds - browser.minimize_window() + browser.set_page_load_timeout(10) # throw a TimeoutException when thepage load time is more than 15 seconds + #browser.minimize_window() print("🖼 正在请求数据中,请耐心等待 🍺 ⌛ ⌛ ⌛ ⌛ ⌛ ️") + browser.get(urls) - time.sleep(1) # Let the page load + #time.sleep(1) # Let the page load try: #result = [] @@ -71,7 +85,9 @@ def read_east_money_page_zjlx_to_sqllite(stockCode, save_sqlite_full_path_name): head2_list.append(head_name) #print(table_head_name1[i].get_attribute("value")) else: - raise NoSuchElementException + #raise NoSuchElementException + print("error !!!!!!!!") + row1_list = [] if isinstance(table_body2,list) == True: @@ -79,92 +95,158 @@ def read_east_money_page_zjlx_to_sqllite(stockCode, save_sqlite_full_path_name): table_body_row = table_body2[0].find_elements_by_tag_name('tr') print("🖼 成功获取 %d 天的资金流向数据️"%(len(table_body_row))) - for i in range(len(table_body_row)): + t0 = time.clock() + + row_length = len(table_body_row) + for i in range(row_length): + + table_body_cell = table_body_row[i].find_elements_by_tag_name('td') assert( len(table_body_cell) == 13 ) - v = [] - v.append(table_body_cell[0].text) # 日期 - v.append(table_body_cell[1].text) # 收盘价 - v.append(table_body_cell[2].text) # 涨跌幅 - v.append(table_body_cell[3].text) # 主力净流入_净额(万元) - v.append(table_body_cell[4].text) # 主力净流入_净占比(%) - v.append(table_body_cell[5].text) # 超大单净流入_净额(万元) - v.append(table_body_cell[6].text) # 超大单净流入_净占比(%) - v.append(table_body_cell[7].text) # 大单净流入_净额(万元) - v.append(table_body_cell[8].text) # 大单净流入_净占比(%) - v.append(table_body_cell[9].text) # 中单净流入_净额(万元) - v.append(table_body_cell[10].text)# 中单净流入_净占比(%) - v.append(table_body_cell[11].text)# 小单净流入_净额(万元) - v.append(table_body_cell[12].text)# 小单净流入_净占比(%) - - row1_list.append(v) + + + dict_row = {} + dict_row['stock_code'] = stockCode + + dict_row['date'] = table_body_cell[0].text + dict_row['zljll_je_wy'] = table_body_cell[1].text + dict_row['zljll_jzb_bfb'] = table_body_cell[2].text + dict_row['cddjll_je_wy'] = table_body_cell[3].text + dict_row['cddjll_je_jzb'] = table_body_cell[4].text + dict_row['ddjll_je_wy'] = table_body_cell[5].text + dict_row['ddjll_je_jzb'] = table_body_cell[6].text + dict_row['zdjll_je_wy'] = table_body_cell[7].text + dict_row['zdjll_je_jzb'] = table_body_cell[8].text + dict_row['xdjll_je_wy'] = table_body_cell[9].text + dict_row['xdjll_je_jzb'] = table_body_cell[10].text + dict_row['close_price'] = table_body_cell[11].text + dict_row['change_price'] = table_body_cell[12].text + + row1_list.append(dict_row) + + + + # todo 🛠 循环获取网页速度非常慢, 进一步学习 selenium 的操作, 批量一次获取数据 + iPct = round((i / row_length) * 100.0) + s1 = "\r读取数据%d%%[%s%s]" % (iPct, "🐢" * iPct, " " * (100 - iPct)) + sys.stdout.write(s1) + sys.stdout.flush() + + # v = [] + # v.append() # 日期 + # v.append(table_body_cell[1].text) # 收盘价 + # v.append(table_body_cell[2].text) # 涨跌幅 + # v.append(table_body_cell[3].text) # 主力净流入_净额(万元) + # v.append(table_body_cell[4].text) # 主力净流入_净占比(%) + # v.append(table_body_cell[5].text) # 超大单净流入_净额(万元) + # v.append(table_body_cell[6].text) # 超大单净流入_净占比(%) + # v.append(table_body_cell[7].text) # 大单净流入_净额(万元) + # v.append(table_body_cell[8].text) # 大单净流入_净占比(%) + # v.append(table_body_cell[9].text) # 中单净流入_净额(万元) + # v.append(table_body_cell[10].text)# 中单净流入_净占比(%) + # v.append(table_body_cell[11].text)# 小单净流入_净额(万元) + # v.append(table_body_cell[12].text)# 小单净流入_净占比(%) + + t = time.clock() - t0 + print('总体耗时间: %f'%t) + else: - raise NoSuchElementException + #raise NoSuchElementException + print("error !!!!!!!!") + assert (len(row1_list) != 0 ) assert (len(head1_list) != 0) assert (len(head2_list) != 0) - browser.close() + ActionChains(browser).key_down(Keys.CONTROL).send_keys("w").key_up(Keys.CONTROL).perform() + print("🖼 完成获取数据,关闭chromedrive ,") - #保存sqllite文件 - print("🥕准备写入数据库文件 🎞保存路径",save_sqlite_full_path_name) - - conn = sqlite3.connect(save_sqlite_full_path_name) - # Create table - conn.execute('''CREATE TABLE IF NOT EXISTS - zjlx(code text, date text, - close text, chg text, - zljll_je text, zljll_jzb text, - cddjll_je text, cddjll_jzb text, - ddjll_je text, ddjll_jzb text, - zdjll_je text, zdjll_jzb text, - xdjll_je text, xdjll_jzb text, - primary key(code,date)) - ''') - - for a_row in row1_list: - # 如果记录重复,则替换 - strSQLExec = 'INSERT OR REPLACE INTO zjlx(code, date, close, chg, \ - zljll_je, zljll_jzb, \ - cddjll_je, cddjll_jzb, \ - ddjll_je, ddjll_jzb, \ - zdjll_je, zdjll_jzb, \ - xdjll_je, xdjll_jzb) \ - VALUES("%s","%s","%s","%s",\ - "%s","%s",\ - "%s","%s",\ - "%s","%s",\ - "%s","%s",\ - "%s","%s")'%\ - (stockCode, - a_row[0], - a_row[1], - a_row[2], - a_row[3], - a_row[4], - a_row[5], - a_row[6], - a_row[7], - a_row[8], - a_row[9], - a_row[10], - a_row[11], - a_row[12] - ) - - conn.execute(strSQLExec) - - print("🖼 🎞写入数据库 🐌", a_row, " 💹 ") - - conn.commit() - - conn.close() + + df = pd.DataFrame(row1_list) + + # print(df) + + client = DATABASE + coll_stock_zjlx = client.eastmoney_stock_zjlx + + # coll_stock_zjlx.insert_many(QA_util_to_json_from_pandas(df)) + + print("🥕准备写入mongodb 🎞保存数据库 ", 'eastmoney_stock_zjlx' ) + + new_rec = 0 + for i in range(len(row1_list)): + aRec = row1_list[i] + + # 🛠todo 当天结束后,获取当天的资金流相,当天的资金流向是瞬时间点的 + ret = coll_stock_zjlx.find_one(aRec) + if ret == None: + coll_stock_zjlx.insert_one(aRec) + new_rec = new_rec + 1 + #print("🤑 插入新的记录 ", aRec) + else: + #print("😵 记录已经存在 ", ret) + pass + + print("🖼 🎞写入数据库 🐌 新纪录 ", new_rec, "条 💹 ") + #保存sqllite文件 + #print("🥕准备写入数据库文件 🎞保存路径",save_sqlite_full_path_name) + + # conn = sqlite3.connect(save_sqlite_full_path_name) + # # Create table + # conn.execute('''CREATE TABLE IF NOT EXISTS + # zjlx(code text, date text, + # close text, chg text, + # zljll_je text, zljll_jzb text, + # cddjll_je text, cddjll_jzb text, + # ddjll_je text, ddjll_jzb text, + # zdjll_je text, zdjll_jzb text, + # xdjll_je text, xdjll_jzb text, + # primary key(code,date)) + # ''') + # + # for a_row in row1_list: + # # 如果记录重复,则替换 + # strSQLExec = 'INSERT OR REPLACE INTO zjlx(code, date, close, chg, \ + # zljll_je, zljll_jzb, \ + # cddjll_je, cddjll_jzb, \ + # ddjll_je, ddjll_jzb, \ + # zdjll_je, zdjll_jzb, \ + # xdjll_je, xdjll_jzb) \ + # VALUES("%s","%s","%s","%s",\ + # "%s","%s",\ + # "%s","%s",\ + # "%s","%s",\ + # "%s","%s",\ + # "%s","%s")'%\ + # (stockCode, + # a_row[0], + # a_row[1], + # a_row[2], + # a_row[3], + # a_row[4], + # a_row[5], + # a_row[6], + # a_row[7], + # a_row[8], + # a_row[9], + # a_row[10], + # a_row[11], + # a_row[12] + # ) + # + # conn.execute(strSQLExec) + # + # print("🖼 🎞写入数据库 🐌", a_row, " 💹 ") + + # conn.commit() + # + # conn.close() except NoSuchElementException: - print("❌ read_east_money_page_zjlx_to_sqllite 读取网页数据错误 🤮") + print("❌ read_east_money_page_zjlx_to_sqllite 读取网页数据错误 🤮") #driver.close() diff --git a/QUANTAXIS_CRAWLY_TEST/eastmoney_simulation_web_trader_test.py b/QUANTAXIS_CRAWLY_TEST/eastmoney_simulation_web_trader_test.py new file mode 100644 index 000000000..c8bb8abe8 --- /dev/null +++ b/QUANTAXIS_CRAWLY_TEST/eastmoney_simulation_web_trader_test.py @@ -0,0 +1,10 @@ +from QUANTAXIS_CRAWLY.eastmoney_simulation_web_trader import EastMoneySimulationWebTrader +import unittest + +class TestEastMoneySimTrader(unittest.TestCase): + + def testTrader(self): + EMSimTrader = EastMoneySimulationWebTrader() + EMSimTrader.startTrade() + + diff --git a/QUANTAXIS_Test/QAData_Test/data_fq_test.py b/QUANTAXIS_Test/QAData_Test/data_fq_test.py new file mode 100644 index 000000000..799e0df43 --- /dev/null +++ b/QUANTAXIS_Test/QAData_Test/data_fq_test.py @@ -0,0 +1,26 @@ +import unittest + +class QAData_fq_test(unittest.TestCase): + + ''' + wind 复权算法 + + 定点复权公司 + Pt’= P0* ( P1/ f1(P0))* ( P2/ f2(P1))*...*( Pt-1/ ft-1(Pt-2))*(Pt/ ft(Pt-1)) + + Pt’:t 点复权价 + Pt:t 点交易价 + ft(Pt-1):昨收盘价 + + 当天交易价格/前一天的交易价格 + + 假设 股价 序列 + p[0:10] = [1.1,0.6,0.7,0.8,0.8,1.0,0.5,0.6,0.7,0.8] + t=1 1转1股 + t=5 1转1股 + + ''' + def fq_test(self): + print("测试复权") + pass + pass \ No newline at end of file diff --git a/QUANTAXIS_Test/QAFetch_Test/QACrawl_Test.py b/QUANTAXIS_Test/QAFetch_Test/QACrawl_Test.py index acda7ff55..91bde4ab6 100644 --- a/QUANTAXIS_Test/QAFetch_Test/QACrawl_Test.py +++ b/QUANTAXIS_Test/QAFetch_Test/QACrawl_Test.py @@ -1,90 +1,26 @@ import unittest import urllib +import pandas as pd -class QACrawl_Test(unittest.TestCase): - - def test_QACrawl_Eastmoney(self): - - - #改用 re 正则表达式做匹配 - #改用 - response = urllib.request.urlopen("http://data.eastmoney.com/zjlx/002433.html") - content = response.read() - - strings = content.decode("utf-8", "ignore") - string_lines = strings.split("\r\n") - - for aline in string_lines: - aline = aline.strip() - if '_stockCode' in aline: - _stockCode = aline[len('var _stockCode = '):] - _stockCode = _stockCode.strip("\"\"\,") +from QUANTAXIS.QAUtil.QATransform import QA_util_to_json_from_pandas +import time +from QUANTAXIS.QAUtil import (DATABASE) - if '_stockMarke' in aline: - _stockMarke = aline[len('_stockMarke = '):] - _stockMarke = _stockMarke.strip("\"\"\,") +from QUANTAXIS.QASU.crawl_eastmoney import QA_request_eastmoney_zjlx +class QACrawl_Test(unittest.TestCase): - if '_stockName' in aline: - _stockName = aline[len('_stockName = '):] - _stockName = _stockName.strip("\"\"\,") - - - if '_market' in aline: - _market = aline[len('_market = '):] - _market = _market.strip("\"\"\,") - break - - # print(_stockCode) - # print(_stockMarke) - # print(_stockName) - # print(_market) - - values = [] - for aline in string_lines: - aline = aline.strip() - if 'EM_CapitalFlowInterface' in aline: - #print(aline) - #print('------------------') - aline = aline.strip() - if aline.startswith('var strUrl = '): - if 'var strUrl = ' in aline: - aline = aline[len('var strUrl = '):] - values = aline.split('+') - #print(values) - break - #print('------------------') - - print(values) - requestStr = "" - for iItem in values: - if '_stockCode' in iItem: - requestStr = requestStr + _stockCode - elif '_stockMarke' in iItem: - requestStr = requestStr + _stockMarke - else: - if 'http://ff.eastmoney.com/' in iItem: - requestStr = 'http://ff.eastmoney.com/' - else: - iItem = iItem.strip(' "') - iItem = iItem.rstrip(' "') - requestStr = requestStr + iItem - #print(requestStr) + def test_QACrawl_Eastmoney(self): - response = urllib.request.urlopen(requestStr) - content2 = response.read() + listStock = ['600001','600002','600003','600004','600005','600006','600007','600008','600009','6000010', + '600011','600012','600013','600014','600015','600016','600017','600018','600019','6000020', - #print(content2) - strings = content2.decode("utf-8", "ignore") + ] - if 'var aff_data=({data:[["' in strings: - leftChars = strings[len('var aff_data=({data:[["'):] - #print(leftChars) - dataArrays = leftChars.split(',') - print(dataArrays) + QA_request_eastmoney_zjlx(listStock) # print(string_lines) # print("-----------------------------------------------------------------------------")