Skip to content

Commit

Permalink
Merge pull request #8 from ts7ming/dev
Browse files Browse the repository at this point in the history
功能合并
  • Loading branch information
ts7ming committed Nov 14, 2023
2 parents 8a1e3ca + 65509f4 commit a28e7c2
Show file tree
Hide file tree
Showing 10 changed files with 221 additions and 26 deletions.
29 changes: 27 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ ds.to_db(df=df_to_write, tb_name='')

# 执行sql
ds.exe_sql(sql='delete from table')

# pd.DataFrame 转图片
### 可以指定文件路径: file_path. 默认生成临时文件
### 可以用列表为每一列指定宽度 col_width
### 指定字体大小 font_size
path = ds.to_image(df, file_path=None, col_width=None, font_size=None)
```

#### ETL辅助功能
Expand Down Expand Up @@ -252,5 +258,24 @@ Utils.div_list(listTemp=[1, 2, 3], n=2)
# 用正则从sql里提取用到的表
### kw: (可选)指定匹配关键词
### strip: (可选)指定需要清除的字符
Utils.sql2table(sql_text='', kw=None,strip=None)
```
Utils.sql2table(sql_text='', kw=None, strip=None)
# 多进程执行
### func: 待执行函数
### args_list: 每个子任务的参数
### max_process = 1: 最大进程数, 默认为 1
### 以list返回每个子进程执行结果, 和 args_list 顺序一致
result = Utils.mult_run(func, args_list=[], max_process=1)
```

#### 命令行
```commandline
用法: pyqueen command args1,args2,...
---
command:
#1 sql2table [file_path] 从sql解析用到的表(通过正则解析, 有误差) (不带参数时读取剪切板)
#2 getcode file_path: 检测文件编码
#3 md5 基于剪切板文本生成md5
```

29 changes: 27 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ ds.to_db(df=df_to_write, tb_name='')

# 执行sql
ds.exe_sql(sql='delete from table')

# pd.DataFrame 转图片
### 可以指定文件路径: file_path. 默认生成临时文件
### 可以用列表为每一列指定宽度 col_width
### 指定字体大小 font_size
path = ds.to_image(df, file_path=None, col_width=None, font_size=None)
```

#### ETL辅助功能
Expand Down Expand Up @@ -252,5 +258,24 @@ Utils.div_list(listTemp=[1, 2, 3], n=2)
# 用正则从sql里提取用到的表
### kw: (可选)指定匹配关键词
### strip: (可选)指定需要清除的字符
Utils.sql2table(sql_text='', kw=None,strip=None)
```
Utils.sql2table(sql_text='', kw=None, strip=None)
# 多进程执行
### func: 待执行函数
### args_list: 每个子任务的参数
### max_process = 1: 最大进程数, 默认为 1
### 以list返回每个子进程执行结果, 和 args_list 顺序一致
result = Utils.mult_run(func, args_list=[], max_process=1)
```

#### 命令行
```commandline
用法: pyqueen command args1,args2,...
---
command:
#1 sql2table [file_path] 从sql解析用到的表(通过正则解析, 有误差) (不带参数时读取剪切板)
#2 getcode file_path: 检测文件编码
#3 md5 基于剪切板文本生成md5
```

1 change: 1 addition & 0 deletions pyqueen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from pyqueen.service.showdoc import Showdoc
from pyqueen.utility.time_kit import TimeKit
from pyqueen.utility.utils import Utils
from pyqueen.utility.command import cmd
9 changes: 6 additions & 3 deletions pyqueen/etl/data_source.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import datetime

import pandas as pd

from pyqueen.etl.excel import Excel


Expand Down Expand Up @@ -45,7 +43,6 @@ def import_test_data(self, excel_path):
for sht_name, df in data.items():
self.__db.to_db(df, sht_name)


@staticmethod
def __file_log(etl_log):
log_path = etl_log['py_path']
Expand Down Expand Up @@ -262,3 +259,9 @@ def pdsql(sql, data):
conn.register(df_name, df)
result = conn.execute(sql).df()
return result

@staticmethod
def to_image(df, file_path=None, col_width=None, font_size=None):
from pyqueen.etl.image import Image
path = Image.df2image(df, file_path=file_path, col_width=col_width, font_size=font_size)
return path
6 changes: 4 additions & 2 deletions pyqueen/etl/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
import xlsxwriter
import os


class Excel:
@staticmethod
def read_excel(path, sheet_name=None):
df = pd.read_excel(path, sheet_name=sheet_name)
return df

@staticmethod
def to_excel(file_path, sheet_list, fillna='', fmt=None, font='微软雅黑', font_color='black', font_size=11,column_width=17):
def to_excel(file_path, sheet_list, fillna='', fmt=None, font='微软雅黑', font_color='black', font_size=11,
column_width=17):
if str(file_path)[-5:] != '.xlsx':
raise Exception('文件路径必须 .xlsx 结尾')
if os.path.exists(os.path.dirname(file_path)) is False:
Expand Down Expand Up @@ -68,4 +70,4 @@ def delete_file(path):
except Exception as e:
pass
except Exception as e:
pass
pass
45 changes: 45 additions & 0 deletions pyqueen/etl/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pandas as pd


class Image:
@staticmethod
def df2image(df, file_path=None, col_width=None, font_size=None):
"""
基于 pandas.DataFrame 生成png图片
:param font_size: 字体大小
:param col_width: 列宽: auto: 根据传入 df 自动设置 也可以传入列表指定每列宽度 由plt自动设置,
:param df: pd.DataFrame对象
:param file_path: 目标图片路径, 如果为None则自动生成临时路径
:return:
"""
import matplotlib.pylab as plt
import tempfile

if file_path is None:
file_path = tempfile.gettempdir() + '/tmp.png'
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)

plt.figure()
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.subplots_adjust(top=0.7, bottom=0, left=0, right=1, hspace=3, wspace=3)
plt.margins(1, 1)
ax = plt.subplot(111, 1, 1)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)

if col_width == 'auto':
tmp_list = [max(len(str(a)), len(str(b))) for a, b in
zip(list(df.head(1).to_records()[0])[1:], list(df.columns))]
new_col_width = [x / sum(tmp_list) for x in tmp_list]
dtable = ax.table(cellText=df.values, colLabels=df.columns, colWidths=new_col_width)
elif col_width != 'auto' and col_width is not None:
dtable = ax.table(cellText=df.values, colLabels=df.columns, colWidths=col_width)
else:
dtable = ax.table(cellText=df.values, colLabels=df.columns)
if font_size is not None:
dtable.auto_set_font_size(False)
dtable.set_fontsize(font_size)
plt.savefig(file_path, dpi=600, bbox_inches='tight')
return file_path
53 changes: 53 additions & 0 deletions pyqueen/utility/command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import sys
from pyqueen.utility.utils import Utils

doc = '''
____ ___
| _ \ _ _ / _ \ _ _ ___ ___ _ __
| |_) | | | | | | | | | |/ _ \/ _ \ '_ \
| __/| |_| | |_| | |_| | __/ __/ | | |
|_| \__, |\__\_\\\__,_|\___|\___|_| |_|
|___/
=================================================================================
用法: pyqueen command args1,args2,...
---
command:
#1 sql2table [file_path] 从sql解析用到的表(通过正则解析, 有误差) (不带参数时读取剪切板)
#2 getcode file_path 检测文件编码
#3 md5 基于剪切板文本生成md5
'''


def cmd():
parms = sys.argv[1:]
if len(parms) == 0:
print(doc)
elif parms[0] == 'sql2table':
if len(parms) == 1:
print('从剪切板读取SQL文本')
import pyperclip
sql_text = str(pyperclip.paste()).replace('\n', '').strip(' ')
else:
with open(parms[1], 'r', encoding='utf-8') as f:
sql_text = f.read()
tb_list = Utils.sql2table(sql_text)
print('================= begin =========================\n')
for tb in tb_list:
print(tb)
print('\n================= end =========================\n')
elif parms[0] == 'getcode':
if len(parms) >= 2:
print('================= begin =========================\n')
encoding, confidence = Utils.detect_encoding(parms[1])
print('文件编码: %s, 可信度: %s' % (str(encoding), str(confidence)))
print('\n================= end =========================\n')
else:
print('指定文件路径')
elif parms[0] == 'md5':
import pyperclip
text = str(pyperclip.paste()).replace('\n', '').strip(' ')
print('================= begin =========================\n')
print(Utils.md5(text))
print('\n================= end =========================\n')
22 changes: 11 additions & 11 deletions pyqueen/utility/time_kit.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,16 @@ def get_nday_of_week(self, the_day):
@classmethod
def get_month_on_day(self, the_day):
""" 获取指定日期月份 """
month_start = int(str(the_day)[0:6]+'01')
month_end = self.time_delta(self.time_delta(month_start,'months',1),'days',-1)
month_start = int(str(the_day)[0:6] + '01')
month_end = self.time_delta(self.time_delta(month_start, 'months', 1), 'days', -1)
return month_start, month_end

@classmethod
def get_week_on_day(self, the_day):
""" 获取指定日期周 """
n = self.get_nday_of_week(the_day) -1
week_start = self.time_delta(the_day,'days', -n)
week_end = self.time_delta(week_start,'days',6)
n = self.get_nday_of_week(the_day) - 1
week_start = self.time_delta(the_day, 'days', -n)
week_end = self.time_delta(week_start, 'days', 6)
return week_start, week_end

@classmethod
Expand Down Expand Up @@ -178,11 +178,11 @@ def int2str(self, time_value, sep='-'):
return None

@classmethod
def date_div(self, start, end, num,by='groups'):
def date_div(self, start, end, num, by='groups'):
""" 指定分段数 拆分时间段 """
if by=='ndays':
step=num
elif by=='groups':
if by == 'ndays':
step = num
elif by == 'groups':
d_len = self.get_day_num(start, end)
step = int(d_len / num)
else:
Expand All @@ -200,7 +200,7 @@ def date_div(self, start, end, num,by='groups'):

@staticmethod
def get_nweek_of_year(week_start):
week_start=str(week_start)
week_start = str(week_start)
thetime_obj = datetime.date(int(week_start[0:4]), int(week_start[4:6]), int(week_start[6:8]))
return thetime_obj.isocalendar()[1]

Expand Down Expand Up @@ -253,4 +253,4 @@ def __base_time(self, today):
lm2_start_date = lm_start_date - relativedelta(months=1)
lm2_end_date = lm_start_date - datetime.timedelta(days=1)
self.lm2_start = self.date2int(lm2_start_date)
self.lm2_end = self.date2int(lm2_end_date)
self.lm2_end = self.date2int(lm2_end_date)
40 changes: 40 additions & 0 deletions pyqueen/utility/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,43 @@ def sql2table(sql_text, kw=None, strip=None):
if tmp not in table_list:
table_list.append(tmp)
return table_list

@staticmethod
def detect_encoding(file_path):
import chardet
with open(file_path, 'rb') as file:
data = file.read()
result = chardet.detect(data)
encoding = result['encoding']
confidence = result['confidence']
return encoding, confidence

@staticmethod
def exec(q, func, index, args):
result = func(args)
q.put([index, result])

@staticmethod
def mult_run(func, args_list, max_process=1):
import multiprocessing
q = multiprocessing.Queue()
args_list_with_index = []
i = 0
for t in args_list:
args_list_with_index.append([i, t])
i += 1
task_list = Utils.div_list(args_list_with_index, max_process)
for sub_args_list in task_list:
job_list = []
for index, args in sub_args_list:
p = multiprocessing.Process(target=Utils.exec, args=(q, func, index, args))
p.start()
job_list.append(p)
for job in job_list:
job.join()
result = []
for _ in range(q.qsize()):
result.append(q.get())
sorted(result, key=(lambda x: x[0]))
result = [x[1] for x in result]
return result
13 changes: 7 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

setup(
name='pyqueen',
version='1.0.7',
version='1.0.8',
url='https://github.com/ts7ming/pyqueen.git',
description='Rule your Data',
long_description=open("README.md",encoding='utf-8').read(),
long_description=open("README.md", encoding='utf-8').read(),
long_description_content_type="text/markdown",
author='ts7ming',
author_email='qiming.ma@outlook.com',
Expand All @@ -17,14 +17,15 @@
"Operating System :: OS Independent",
],
install_requires=[
'importlib-metadata',
'numpy',
'pandas',
'PyMySQL',
'requests',
'socket.engine',
'SQLAlchemy==1.4.49',
'xlrd==1.2.0',
'XlsxWriter'
],
entry_points={
'console_scripts': [
'pyqueen = pyqueen:cmd',
]
}
)

0 comments on commit a28e7c2

Please sign in to comment.