In [1]:
import os
import re
from docx import Document
from docx.enum.table import WD_ROW_HEIGHT
from docx.enum.table import WD_ALIGN_VERTICAL
from docx.shared import Cm, Pt, RGBColor
from docx.oxml.ns import qn
import pandas as pd
import logging
import argparse
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

In [2]:
def fill_cell(cell, content, font_name='微软雅黑', font_size=10, color=RGBColor(0, 0, 0), bold=False):
    run = cell.paragraphs[0].add_run(str(content))
    run.bold = bold
    run.font.name = font_name
    run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
    run.font.size = Pt(font_size)
    run.font.color.rgb = color

def add_row(table, row_no, height=1):
    while len(table.rows) < row_no:
        row = table.add_row()
        row.height_rule = WD_ROW_HEIGHT.AT_LEAST
        row.height = Cm(height)

def write_table(table, row, col, text, text_vertical=WD_ALIGN_VERTICAL.BOTTOM, bold=False, font_size=10):
    if len(table.rows) < row:
        add_row(table, row)
    cell = table.cell(row-1, col-1)
    cell.vertical_alignment = text_vertical
    fill_cell(cell, text, bold=bold, font_size=font_size)
    
def insert_pic(table, row, col, pic_path, width=Cm(19)):
    if len(table.rows) < row:
        add_row(table, row)
    table.cell(row-1, col-1).add_paragraph().add_run().add_picture(pic_path, width=width)

In [3]:
def ifmatch(text):
    match = re.search('\{(\S+)\}',text)
    if match:
        return match.group(1)
    else:
        return None
def sub_cell(raw, new, text):
    match = re.sub('\{'+f'{raw}'+'\}', new, text)
    return match

In [11]:
def fill_template_table(table, data):
    for rowIdx, row in enumerate(table.rows):
        for cell in table.row_cells(rowIdx):
            match = ifmatch(cell.text)
            if match:
                if match in data.keys():
                    logging.info(match)
                    try:
                        text = sub_cell(match, str(data[match]), cell.text)
                        cell.text = ''
                        fill_cell(cell, text)
                    except Exception as e:
                        logging.error(e)
                        
def fill_template(document, data):
    tables = document.tables
    for table in tables:
        fill_template_table(table, data)

In [14]:
def get_excel_info(excel_file, sheet_name='家系', index_col='家系编号'):
    df = pd.read_excel(excel_file, sheet_name=sheet_name).astype(str)
    return df.set_index(index_col)

def get_config(config_excel):
    df_config = pd.read_excel(config_excel, header=None, index_col=0)[1].to_dict()
    return df_config

def make_report_by_family(family_info, sample_info, config, template_docx, outdir):
    for idx in family_info.index:
        logging.info(idx)
        d = Document(template_docx)
        tables = d.tables
        fill_template(d, family_info.loc[idx].to_dict())
        d.save(os.path.join(outdir, str(idx)+'.docx'))


template_docx = 'template_default.docx'
in_data = 'test_data/test_input.xlsx'
config_data = 'template_config.xlsx'
outdir = 'test_data/'

family_info = get_excel_info(in_data)
sample_info = get_excel_info(in_data, sheet_name='样本')
config = get_config(config_data)
make_report_by_family(family_info, sample_info, config, template_docx, outdir)

2022-03-24 17:40:42 INFO 1
2022-03-24 17:40:42 INFO 女方姓名
2022-03-24 17:40:42 INFO 女方年龄
2022-03-24 17:40:42 INFO 活检日期
2022-03-24 17:40:42 INFO 男方姓名
2022-03-24 17:40:42 INFO 男方年龄
2022-03-24 17:40:42 INFO 接收日期
2022-03-24 17:40:42 INFO 报告日期
2022-03-24 17:40:42 INFO 2
2022-03-24 17:40:42 INFO 女方姓名
2022-03-24 17:40:42 INFO 女方年龄
2022-03-24 17:40:42 INFO 活检日期
2022-03-24 17:40:42 INFO 男方姓名
2022-03-24 17:40:42 INFO 男方年龄
2022-03-24 17:40:42 INFO 接收日期
2022-03-24 17:40:42 INFO 报告日期


In [None]:
def make_PGS_report(dict_family, dict_sample, dict_config, outdir='./', png_dir=None):
    for f, family in dict_family.items():
        print(f)
        f_out = os.path.join(outdir, f'{f}.docx')
        if f in dict_sample.keys():
            d = Document(tempate_file)
            tables = d.tables
            for idx, idx_info in family.items():
                if idx in dict_config.keys():
                    write_table(tables[dict_config[idx]['tab']-1], dict_config[idx]['row'], dict_config[idx]['col'], f'{idx_info}'.split()[0])
            idx = '样本编号'
            row_no = 0
            for s, s_info in dict_sample[f].items():
                print(s)
                s_info['样本编号'] = str(s)    
                for idx, idx_info in s_info.items():
                    if idx in dict_config.keys():
                        write_table(tables[dict_config[idx]['tab']-1], dict_config[idx]['row']+row_no, dict_config[idx]['col'], f'{idx_info}')
                if png_dir:
                    write_table(tables[4], row_no*4+1, 1, f"样本编号：", bold=True, font_size=11)
                    write_table(tables[4], row_no*4+2, 1, f"检测结果：", bold=True, font_size=11)
                    write_table(tables[4], row_no*4+3, 1, f"染色体拷贝数结果", bold=True, font_size=11)

                    write_table(tables[4], row_no*4+1, 1, f"{s_info['样本编号']}")
                    write_table(tables[4], row_no*4+2, 1, f"{s_info['检测结果']}")
                    pic_path = os.path.join(pngdir, f'{s}.png')
                    insert_pic(tables[4], row_no*4+4, 1, pic_path)
                row_no += 1
            d.save(f_out)