# Импорт необходимых для работы кода библиотек

In [1]:
import pandas as pd
import xlsxwriter
import json
import re
import xml.etree.ElementTree as ET
import io

# Настройки (чувствительно к регистру)

In [2]:
# укажи полный путь до xml файла без расширения .bpmn
# либо просто название файла, если он находится в той же папке, что и этот скрипт
file_name = r'test'

# укажи префикс bpmn:definitions xmlns:bpmn в фигурных скобках
xmls_bpmn_prefix = "{http://www.omg.org/spec/BPMN/20100524/MODEL}"

# укажи префикс bpmn:definitions xmlns:camunda в фигурных скобках
xmlns_camunda_prefix = "{http://camunda.org/schema/1.0/bpmn}"

# укажи список тасок для рассмотрения
task_list = ['serviceTask', 'scriptTask', 'userTask', 'callActivity', 
             'startEvent', 'boundaryEvent', 'intermediateCatchEvent', 'endEvent',
             'linkEventDefinition', 'executionListener'
#              'exclusiveGateway', 'parallelGateway', 
            ]

# укажи как сохранять input/output. Как словарь, или дублировать строки
io_like_dict = False

# Код

In [10]:
def get_df_from_xml(xml_string) -> pd.DataFrame:
    
    # should write autodetection of bpmn:, camunda: prefixes with some regex
    pass

    #create root
    root = ET.fromstring(xml_string)
    
    records = dict()
    index = 0
    
    variables = set()
    
    for child in list(root.iter()):
        # we need to consider only process subtags (i suppose)
        if child.tag == xmls_bpmn_prefix + 'process':
            for child in list(child.iter()):
#                 print(child)
                if child.tag in [xmls_bpmn_prefix + task_ent for task_ent in task_list]:
                    
                    # collect first level data
                    records_ = dict()
                    records_['name'] = child.attrib.get('name', '-')
                    records_['id'] = child.attrib.get('id', '-')
                    records_['type'] = (child.tag.replace(xmls_bpmn_prefix,'') 
                                        if xmls_bpmn_prefix in child.tag 
                                        else child.tag)
                    records_['delegate'] = child.attrib.get("{}class".format(xmlns_camunda_prefix), '') 
                    
                    # collect input/output parameters description and docs
                    input_ = dict()
                    ouptut_ = dict()
                    for child in list(child.iter()):
                        if child.tag == xmls_bpmn_prefix + 'documentation':
                            records_['documentation'] = child.text
                        if child.tag == xmlns_camunda_prefix + 'inputOutput':
                            for child in list(child.iter()):
                                if child.tag == xmlns_camunda_prefix + 'inputParameter':
                                    if child.attrib.get('name', '-'):
                                        input_[child.attrib['name']] = child.text
                                if child.tag == xmlns_camunda_prefix + 'outputParameter':
                                    if child.attrib.get('name', '-'):
                                        ouptut_[child.attrib['name']] = child.text
                                        
                    # at first need to save io vars to separate printing
                    variables.update(input_)
                    variables.update(ouptut_)
                    
                    # and then add it regulary to records
                    if io_like_dict == True:
                        records_['inputParameter'] = input_
                        records_['outputParameter'] = ouptut_
                    else:
                        records_['inputParameter'] = '\n'.join([f"{key} = {input_[key]}" for key in input_])
                        records_['outputParameter'] = '\n'.join([f"{key} = {ouptut_[key]}" for key in ouptut_])
                    
                    records[index] = records_
                    index += 1
       
    df = pd.DataFrame.from_dict(records, orient='index')

    return df, variables


with open(file=file_name + ".bpmn", mode="r", encoding='utf-8') as file:
    
    # get data as string
    xml_string = file.read().replace('/n', '').rstrip()

    df, variables = get_df_from_xml(xml_string)

    with pd.ExcelWriter(file_name + ".xlsx", engine="xlsxwriter") as writer:
        writer.book.formats[0].set_text_wrap() 
        writer.book.formats[0].set_align('left')
        writer.book.formats[0].set_align('top')
        df.to_excel(writer, index=False)
    
    with pd.ExcelWriter(file_name + "_list_of_vars.xlsx", engine="xlsxwriter") as writer:
        variables_df = pd.DataFrame(data=variables, columns=['variable_name'])
        variables_df.to_excel(writer, index=False)
    
    print('Файл сохранен!')

Файл сохранен!


In [9]:
df

Unnamed: 0,name,id,type,delegate,inputParameter,outputParameter,documentation
0,подпроцесс external task,Activity_0x3v5gj,callActivity,,topicName = testGroovyScript,,
1,-,Event_0egc5wl,boundaryEvent,,,,
2,Формирование ДТО для Реестра order,Activity_0i4prmb,scriptTask,,,,
3,Создание записи в Реестре order,Activity_17lsfaw,serviceTask,ru.seventech.camunda.serviceTask.MdmDelegate,origin = order\naction = create\ndataScript = ...,,
4,Создать объект заявки,Activity_0v09jxj,scriptTask,,,,
...,...,...,...,...,...,...,...
155,Изменение уведомлений (7 шаг)/Заполнение перем...,Activity_0xs1b5r,scriptTask,,,,
156,Сохранение аттача,Activity_0m3c26r,serviceTask,ru.seventech.camunda.serviceTask.AttachmentsIn...,attachName = \n \nfiles = \n ...,,
157,Сохранение XML в ЕСМ,Activity_1nxd4sa,serviceTask,ru.seventech.camunda.serviceTask.EcmDelegate,origin = \n \naction = smartCreate\...,,
158,формирование XML запроса в vetrf-delegate,Activity_0gboq2r,serviceTask,ru.seventech.camunda.serviceTask.vetrf.AmsVetA...,outputName = certifyEnterpriseForExportApplica...,,


In [8]:
variables

{'BIK',
 'ImoNumber',
 'Input_StageStatus',
 'NEXT',
 '_fn_convertMap',
 'action',
 'activityType',
 'activityTypeProdList',
 'activityTypeProdListUpd',
 'actualAddressMatches',
 'actualPower',
 'additionalInfoAddress',
 'address',
 'addressHierarchyList',
 'areaNumber',
 'attachList',
 'attachName',
 'attach_name',
 'author',
 'bankName',
 'buildPlace',
 'buildYear',
 'caption',
 'checkingAccount',
 'choiceOfObject',
 'companyName',
 'companyNameQuestionary',
 'correspondentAccount',
 'createItem',
 'dataMap',
 'dataScript',
 'directorEmail',
 'directorFIO',
 'directorPhone',
 'directorPosition',
 'ecmDocs',
 'endPoint',
 'enterpriseEnglishName',
 'exportCountriesValue',
 'factoryNumber',
 'fieldsMap',
 'files',
 'filterMap',
 'fio',
 'helperVariables',
 'hidden',
 'homePort',
 'importCountries',
 'inn',
 'input_ProcessStatus',
 'isNeedCheckError',
 'isSaved',
 'juridicalAddress',
 'kpp',
 'notificationLookupName',
 'objectName',
 'objectNameEng',
 'objectNumber',
 'objectType',
 'ord