# Applying python-docx to ZMS content

Hint: The _jupyter notebook_ shows how to use the python-docx library to extract and convert content from ZMS. This code works with direct access to the ZODB database, so that images are not extracted (images need a running Zope server to be accessed).
For production needs ZMS contains a (customizable) command (aka "action") _manage_export_pydocx_ "Export to python-docx" that is based on this notebook code and exports a ZMS page content to a .docx file.


For the action _manage_export_pydocx_ the function `get_docx_normalized_json`creates a normalized JSON stream of a PAGE-like ZMS node. This JSON stream is used for transforming the content to DOCX. It is a list of dicts (key/value-pairs), where the first dict is representing the container meta data and the following blocks are representing the PAGEELEMENTS of the document. Each object dictionary has the following keys:
- id: the id of the node
- meta_id: the meta_id of the node
- parent_id: the id of the parent node
- parent_meta_id: the meta_id of the parent node
- title: the title of the node
- description: the description of the node
- last_change_dt: the last change date of the node
- docx_format: the format of the content ('html' or e.g.'Normal')
- content: the content of the node

Any PAGEELEMENT-node may have a specific 'standard_json_docx' attribute which preprocesses it's ZMS content model close to the translation into the DOCX model. The key 'docx_format' is used to determine the style of the content block.
If this attribute method (py-primtive) is not available, the object's class standard_html-method is used to get the  content, so that the (maybe not optimum) html will be transformed to DOCX.

Depending on the complexity of the content it's JSON representation may consist of ore or multiple key/value-sequences. Any of these blocks will create a new block element (e.g. paragraph) in the DOCX document.

## Step-1: Load basic libraries

In [2]:
import ZODB
from Products.Five.browser.tests.pages import SimpleView
from Testing.makerequest import makerequest 							# makerequest(context)
from Testing.ZopeTestCase.testZODBCompat import make_request_response 	# make_request_response()[1]
from Acquisition import aq_get

from Products.zms import standard
from Products.zms import rest_api

import os
import re
import shutil
import tempfile
import urllib
import json
import requests

import docx
from docx.shared import Pt
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.style import WD_STYLE_TYPE
from docx.oxml import OxmlElement, ns
from docx.shared import Emu

from bs4 import BeautifulSoup

__REGISTRY__['confdict'] None


## Step-2: Open ZODB

In [3]:
# Create a ZODB connection to an existing ZODB database file
# Important: Fit working directory to your Zope instance
try:
	wd = '/home/zope/instance/zms5_dev/var/'
	db = ZODB.DB(os.path.join(wd, 'Data.fs'))
	conn = db.open()
	root = conn.root
	###{'Application': <Application at >}
except:
	db.close()
	print('Error: Database connection had to be closed before reopened.')

## Step-3: Get ZODB Context

In [4]:
# ZMS-Node /myzms2/content
# ##############################################
context = root.Application.myzms2.content 
# ##############################################

# Add REQUEST to zmscontext object
context = makerequest(context)
# Add REQUEST vars
context.REQUEST.environ.setdefault('SERVER_NAME','localhost')
context.REQUEST.environ.setdefault('SERVER_PORT', '8087')
context.REQUEST['URL' ]= 'http://localhost:8087'
context.REQUEST.set('lang','ger')
context.REQUEST.set('path_to_handle','')
# Add RESPONSE
context.REQUEST.set('RESPONSE', make_request_response()[1])

zmscontext = context.e5
request = rest_api._get_request(zmscontext)

# Test: Some example API calls for extracting content from ZMS objects

In [5]:
# tree_nodes = rest_api.RestApiController(context,request).list_tree_nodes(zmscontext)[0:1]

node = context.e12
print(json.dumps(rest_api.get_attrs(node),indent=2))

node = context.e4
print(json.dumps(rest_api.get_attrs(node),indent=2))

## Test Python-Script
# a_pyscript = makerequest(root.Application.myzmsx.a_pyscript)
# print(a_pyscript.read())

# Get a custom py methods standard_json_docx of a node (zpt does not work!)
print(json.dumps(rest_api.get_attr(node,'standard_json_docx'),indent=2))

{
  "id": "e12",
  "meta_id": "ZMSFolder",
  "uid": "uid:38335f90-dea3-459b-b5fb-8566e66b065e",
  "getPath": "/myzms2/content/e12",
  "active": 1,
  "title": "Details about the ZMS concept",
  "titlealt": "Concept",
  "is_page": true,
  "is_page_element": false,
  "index_html": "http://nohost/myzms2/content/e12/",
  "parent_uid": "uid:4b46b796-d146-43c1-8eca-954d1ba2aafc",
  "home_id": "myzms2",
  "level": 1,
  "restricted": false,
  "titleimage": null,
  "levelnfc": "",
  "attr_dc_description": "Aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsu",
  "attr_dc_subject": "",
  "attr_dc_type": "",
  "attr_dc_creator": ""
}
{
  "id": "e4",
  "meta_id": "ZMSTextarea",
  "uid": "uid:68eeb9a5-c69e-4d0f-8869-b07f07e18d1a",
  "getPath": "/myzms2/content/e4",
  "active": 1,
  "title": "ZMSTextarea",
  "titlealt": "ZMSTextarea",
  "is_page": false,
  "is_page_element": true,
  "index_html": "

## Helper Functions 1: DOCX-XML

1. `add_page_number(run)` : add page number to text-run (e.g. footer)
2. `add_bottom_border(style)` : adds border-properties to paragraph-style-object

_Hint: the docx API does not support the page counter directly. We have to create a custom footer with a page counter._

In [6]:
# #############################################
# Helper Functions 1: DOCX-XML
# 1. `add_page_number(run)` : add page number to text-run (e.g. footer)
# 2. `add_bottom_border(style)` : adds border-properties to paragraph-style-object
# Hint: the docx API does not support the page counter directly. 
# We have to create a custom footer with a page counter.
# #############################################

# XML-Helpers
def create_element(name):
	return OxmlElement(name)

def create_attribute(element, name, value):
	element.set(ns.qn(name), value)


def get_normalized_image_width(w, h, max_w = 460):
	if w:
		if h > w:
			scale =  h>max_w and float(h)/float(max_w) or 1
		else:
			scale =  w>max_w and float(w)/float(max_w) or 1
		w = int(w/scale)
	else:
		w = max_w
	return w

# #############################################

# ADD DATA FIELD: eg PAGE, SAVEDATE
def add_field(paragraph, field_code="PAGE"):
	fldChar1 = create_element('w:fldChar')
	create_attribute(fldChar1, 'w:fldCharType', 'begin')
	instrText = create_element('w:instrText')
	create_attribute(instrText, 'xml:space', 'preserve')
	instrText.text = field_code
	fldChar2 = create_element('w:fldChar')
	create_attribute(fldChar2, 'w:fldCharType', 'end')
	run = paragraph.add_run()
	run._r.append(fldChar1)
	run._r.append(instrText)
	run._r.append(fldChar2)


# BOOKMARK ZMS-ID
def prepend_bookmark(docx_block, bookmark_id):
	bookmark_start = create_element('w:bookmarkStart')
	create_attribute(bookmark_start, 'w:id', bookmark_id)
	create_attribute(bookmark_start, 'w:name', bookmark_id)
	bookmark_end = create_element('w:bookmarkEnd')
	create_attribute(bookmark_end, 'w:id', bookmark_id)
	try:
		docx_block._element.insert(0, bookmark_end)
		docx_block._element.insert(0, bookmark_start)
	except:
		pass

def add_hyperlink(docx_block, link_text, url):
	# url_base = 'http://127.0.0.1:8080/'
	url_base = 'http://neon/'
	# Omit javascript links
	if not url.startswith('javascript:'):
		# Fix missing domain name
		url = ('http' in url) and url.replace('http:///', url_base) or (url_base + (url.startswith('/') and url[1:] or url))
		r_id = docx_block.part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)
		hyper_link = create_element('w:hyperlink')
		create_attribute(hyper_link, 'r:id', r_id)
		hyper_link_run = create_element('w:r')
		hyper_link_run_prop = create_element('w:rPr')
		hyper_link_run_prop_style = create_element('w:rStyle')
		create_attribute(hyper_link_run_prop_style, 'w:val', 'Hyperlink')
		hyper_link_run_prop.append(hyper_link_run_prop_style)
		hyper_link_run.append(hyper_link_run_prop)
		hyper_link_text = create_element('w:t')
		hyper_link_text.text = link_text
		hyper_link_run.append(hyper_link_text)
		hyper_link.append(hyper_link_run)
		docx_block._p.append(hyper_link)

## Helper Functions 2: HTML/Richtext-Processing 

In [7]:
# #############################################
# ADD RUNS
# #############################################
def add_runs(docx_block, bs_element):
	# #########################################
	# Adding a minimum set of inline runs
	# any BeautifulSoup block element may contain
	# to the docx-block, e.g. <strong>, <em>, <a>
	# #########################################
	if bs_element.children:
		for elrun in bs_element.children:
			if elrun.name == 'strong':
				docx_block.add_run(elrun.text).bold = True
			elif elrun.name == 'em':
				docx_block.add_run(elrun.text).italic = True
			elif elrun.name == 'a':
				add_hyperlink(block = docx_block, link_text = elrun.text, url = elrun.get('href'))
				docx_block.add_run(' ')
			else:
				docx_block.add_run(str(elrun))
	else:
		docx_block.text(bs_element.text)


# #############################################
# ADD HTML-BLOCK TO DOCX
# #############################################
def add_htmlblock_to_docx(zmscontext, docx_doc, htmlblock, zmsid=None):
	# Clean HTML
	htmlblock = clean_html(htmlblock)

	# Apply BeautifulSoup and iterate over elements
	soup = BeautifulSoup(htmlblock, 'html.parser')

	# Counter for html elements: set bookmark before first element
	c = 0

	# Iterate over elements
	for element in soup.children:
		if element.name != None and element not in ['\n']:
			c+=1
			if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
				heading_level = int(element.name[1])
				p = docx_doc.add_heading(element.text, level=heading_level)
				if c==1: 
					prepend_bookmark(p, zmsid)

			elif element.name == 'p':
				p = docx_doc.add_paragraph()
				if c==1: 
					prepend_bookmark(p, zmsid)
				if element.has_attr('class'):
					if 'caption' in element['class']:
						p.style = docx_doc.styles['Caption']
					else:
						class_name = element['class'][0]
						style_name = (class_name in docx_doc.styles) and class_name or 'Normal'
						p.style = docx_doc.styles[style_name]
				add_runs(docx_block = p, bs_element = element)

			elif element.name in ['ul','ol']:
				def add_list(docx_doc, element, level=0, c=0):
					li_styles = {'ul':'ListBullet', 'ol':'ListNumber'}
					level_suffix = level!=0 and str(level+1) or ''
					for li in element.find_all('li', recursive=False):
						p = docx_doc.add_paragraph(li.contents[0].strip(), style='%s%s'%(li_styles[element.name], level_suffix))
						if c==1: 
							prepend_bookmark(p, zmsid)
						for ul in li.find_all(['ul','ol'], recursive=False):
							add_list(docx_doc, ul, level+1)
				add_list(docx_doc, element, level=0, c=c)

			elif element.name == 'table':
				caption = element.find('caption')
				if caption:
					p = docx_doc.add_paragraph(caption.text, style='Caption')
					if c==1: 
						prepend_bookmark(p, zmsid)
				rows = element.find_all('tr')
				cols = rows[0].find_all(['td','th'])
				table = docx_doc.add_table(rows=len(rows), cols=len(cols))
				table.style = 'Table Grid'
				table.alignment = WD_TABLE_ALIGNMENT.CENTER
				r=-1
				for row in rows:
					r+=1
					cells = row.find_all(['td','th'])
					for i, cl in enumerate(cells):
						table.cell(r,i).text = cl.text
						if cl.name == 'th':
							table.cell(r,i).paragraphs[0].runs[0].bold = True
				if not caption and c==1:
					prepend_bookmark(table, zmsid)


			elif element.name == 'img' or element.name == 'figure':
				if element.name == 'figure':
					element = element.find('img')
				if element.has_attr('src'):
					img_name = element['src'].split('/')[-1]
					if not element['src'].startswith('http'):
						src_url0 = zmscontext.absolute_url().split('/content/')[0]
						src_url1 = element['src'].split('/content/')[-1]
						element['src'] = '%s/content/%s'%(src_url0, src_url1)

				maxwidth = 460
				imgwidth = element.has_attr('width') and int(float(element['width'])) or None
				if imgwidth:
					scale =  imgwidth>maxwidth and imgwidth/maxwidth or 1
					imgwidth = imgwidth/scale
				else:
					imgwidth = maxwidth

				try:
					response = requests.get(element['src'])
					with open(img_name, 'wb') as f:
						f.write(response.content)
					docx_doc.add_picture(img_name, width=Emu(imgwidth*9525))
					if c==1:
						prepend_bookmark(docx_doc.paragraphs[-1], zmsid)
				except:
					pass

			elif element.name == 'div':
				child_tags = [e.name for e in element.children if e.name]
				if 'em' in child_tags or 'strong' in child_tags:
					p = docx_doc.add_paragraph()
					if c==1: 
						prepend_bookmark(p, zmsid)
					if element.has_attr('class'):
						class_name = element['class'][0]
						style_name = (class_name in docx_doc.styles) and class_name or 'Normal'
						p.style = docx_doc.styles[style_name]
					add_runs(docx_block = p, bs_element = element)
				else:
					div_html = ''.join([str(e) for e in element.children])
					add_htmlblock_to_docx(zmscontext, docx_doc, div_html, zmsid)

			elif element.name == 'a':
				# Hyperlink containing a block element 
				div_html = ''.join([str(e) for e in element.children])
				add_htmlblock_to_docx(zmscontext, docx_doc, div_html, zmsid)

			elif element.name == 'hr':
				# ignore horizontal rule
				pass

			else:
				p = docx_doc.add_paragraph(str(element))
				if c==1: 
					prepend_bookmark(p, zmsid)

	return docx_doc

## Helper Functions 3: Set Docx-Styles

In [8]:

# #############################################
# Helper Function for Style Definitions
# #############################################

# BORDER BOTTOM
def add_bottom_border(style):
	border = create_element('w:pBdr') # pBdr = Paragraph border
	bottom = create_element('w:bottom')
	create_attribute(bottom, 'w:val', 'single')
	create_attribute(bottom, 'w:sz', '2')
	create_attribute(bottom, 'w:space', '9')
	create_attribute(bottom, 'w:color', '017D87')
	border.append(bottom)
	try:
		style.element.pPr.append(border) # pPr = Paragraph properties
	except:
		standard.writeStdout(None, 'Error: Could not add bottom border to style %s' % style.name)

def add_listbullet_arrow(style):
	# Add arrow list character to paragraph properties
	numPr = create_element('w:numPr') # numPr = Numbering properties
	ilvl = create_element('w:ilvl')
	create_attribute(ilvl, 'w:val', '1')
	numId = create_element('w:numId')
	create_attribute(numId, 'w:val', '1')
	numPr.append(ilvl)
	numPr.append(numId)
	style.element.pPr.append(numPr)
	jc = create_element('w:jc')
	create_attribute(jc, 'w:val', 'left')
	style.element.pPr.append(jc)
	outlineLvl = create_element('w:outlineLvl')
	create_attribute(outlineLvl, 'w:val', '0')
	style.element.pPr.append(outlineLvl)


def add_paragraph_bgcolor(style, color):
	"""
	Add shadow and borders to paragraph properties
	Parameters:
		style = styles['ZMSNotiz']
		color = 'fff5ce'
	"""
	shading = create_element('w:shd') # shd = Shading
	create_attribute(shading, 'w:val', 'clear')
	create_attribute(shading, 'w:color', 'auto')
	create_attribute(shading, 'w:fill', color)
	style.element.pPr.append(shading)
	border = create_element('w:pBdr') # pBdr = Paragraph border
	for side in ['left', 'right', 'top', 'bottom']:
		border_side = create_element('w:%s' % side)
		create_attribute(border_side, 'w:val', 'single')
		create_attribute(border_side, 'w:sz', '4')
		create_attribute(border_side, 'w:space', '5')
		create_attribute(border_side, 'w:color', color)
		border.append(border_side)
	style.element.pPr.append(border)

def add_table_bgcolor(style, color):
	"""
	Add shadow and borders to table properties
	Parameters:
		style = styles['Normal Table']
		color = 'fff5ce'
	"""
	shading = create_element('w:shd') # shd = Shading
	create_attribute(shading, 'w:val', 'clear')
	create_attribute(shading, 'w:color', 'auto')
	create_attribute(shading, 'w:fill', color)
	style.element.tblPr.append(shading)
	border = create_element('w:tblBorders') # tblBorders = Table borders
	create_attribute(border, 'w:val', 'single')
	create_attribute(border, 'w:sz', '4')
	create_attribute(border, 'w:space', '5')
	create_attribute(border, 'w:color', color)
	style.element.tblPr.append(border)

def add_character_bgcolor(style, color):
	"""
	Add shadow and borders to run properties
	Parameters:
		style = styles['Macro Text Char']
		color = '017D87'
	"""
	shading = create_element('w:shd') # shd = Shading
	create_attribute(shading, 'w:val', 'clear')
	create_attribute(shading, 'w:color', 'auto')
	create_attribute(shading, 'w:fill', color)
	style.element.rPr.append(shading)
	border = create_element('w:bdr') # bdr = run border
	create_attribute(border, 'w:val', 'single')
	create_attribute(border, 'w:sz', '12')
	create_attribute(border, 'w:space', '1')
	create_attribute(border, 'w:color', color)
	style.element.rPr.append(border)

# #############################################
# Helper Functions 3: Set Docx-Styles
# #############################################
# List of all style names:
# print('\n'.join([s.name for s in doc.styles]))
# List of all style IDs:
# print('\n'.join([s.style_id for s in doc.styles]))

# OBSOLETE: 
# Function is not used in the current version
# Styles are provided in the docx-template
def set_docx_styles(doc):
	styles = doc.styles

	# Custom color 1: #017D87 dark turquoise
	color_turquoise = docx.shared.RGBColor(1, 125, 135)
	# Custom color 2: #AAAAAA light grey
	color_lightgrey = docx.shared.RGBColor(170, 170, 170)
	# Custom color 3: #333333 dark grey
	color_darkgrey = docx.shared.RGBColor(51, 51, 51)
	# Custom color 4: #FFFFFF white
	color_white = docx.shared.RGBColor(255, 255, 255)
	# Custom color 5: #0070FF blue
	color_blue = docx.shared.RGBColor(0, 112, 255)

	# Page margins
	doc.sections[0].top_margin = Emu(120*9525)
	# Normal
	styles['Normal'].font.name = 'Arial'
	styles['Normal'].font.size = Pt(9)
	styles['Normal'].paragraph_format.space_after = Pt(6)
	styles['Normal'].paragraph_format.space_before = Pt(6)
	styles['Normal'].paragraph_format.line_spacing = 1.35

	# Headlines derived from Normal
	if sys.version_info[0] > 2:
		styles['Heading 1'].basedOn = doc.styles['Normal']
	styles['Heading 1'].font.name = 'Arial'
	styles['Heading 1'].font.size = Pt(24)
	styles['Heading 1'].font.bold = False
	styles['Heading 1'].paragraph_format.line_spacing = 1
	styles['Heading 1'].paragraph_format.space_before = Pt(18)
	styles['Heading 1'].paragraph_format.space_after = Pt(18)
	styles['Heading 1'].font.color.rgb = color_turquoise

	if sys.version_info[0] > 2:
		styles['Title'].basedOn = doc.styles['Heading 1']
	styles['Title'].font.name = 'Arial'
	styles['Title'].font.size = Pt(24)
	styles['Title'].font.bold = False
	styles['Title'].paragraph_format.line_spacing = 1
	styles['Title'].paragraph_format.space_before = Pt(18)
	styles['Title'].paragraph_format.space_after = Pt(18)
	styles['Title'].font.color.rgb = color_turquoise
	add_bottom_border(styles['Title'])

	if sys.version_info[0] > 2:
		styles['Heading 2'].basedOn = doc.styles['Normal']
	styles['Heading 2'].font.name = 'Arial'
	styles['Heading 2'].font.size = Pt(18)
	styles['Heading 2'].font.bold = False
	styles['Heading 2'].paragraph_format.line_spacing = 1.2
	styles['Heading 2'].paragraph_format.space_before = Pt(24)
	styles['Heading 2'].font.color.rgb = color_turquoise

	if sys.version_info[0] > 2:
		styles['Heading 3'].basedOn = doc.styles['Normal']
	styles['Heading 3'].font.name = 'Arial'
	styles['Heading 3'].font.size = Pt(13)
	styles['Heading 3'].font.bold = True
	styles['Heading 3'].paragraph_format.space_before = Pt(22)
	styles['Heading 3'].font.color.rgb = color_turquoise

	if sys.version_info[0] > 2:
		styles['Heading 4'].basedOn = doc.styles['Normal']
	styles['Heading 4'].font.name = 'Arial'
	styles['Heading 4'].font.size = Pt(10)
	styles['Heading 4'].paragraph_format.space_before = Pt(14)
	styles['Heading 4'].font.bold = False
	styles['Heading 4'].font.bold = True
	styles['Heading 4'].font.color.rgb = color_turquoise


	# More styles derived from Normal
	styles.add_style('Description', WD_STYLE_TYPE.PARAGRAPH)
	if sys.version_info[0] > 2:
		styles['Description'].basedOn = doc.styles['Normal']
	styles['Description'].font.name = 'Arial'
	styles['Description'].font.size = Pt(9)
	styles['Description'].font.italic = True
	styles['Description'].font.color.rgb = color_turquoise
	styles['Description'].paragraph_format.space_after = Pt(18)
	styles['Description'].paragraph_format.line_spacing = 1.35
	add_bottom_border(styles['Description'])

	styles['Caption'].font.name = 'Arial'
	styles['Caption'].font.size = Pt(8)
	styles['Caption'].font.italic = True
	styles['Caption'].font.color.rgb = color_turquoise
	styles['Caption'].paragraph_format.space_before = Pt(6)
	styles['Caption'].paragraph_format.space_after = Pt(24)
	styles['Caption'].paragraph_format.keep_with_next = True

	styles['Quote Char'].font.color.rgb = color_lightgrey
	styles['Quote Char'].font.bold = True
	styles['Quote Char'].font.italic = True

	styles.add_style('Hyperlink', WD_STYLE_TYPE.CHARACTER)
	styles['Hyperlink'].font.color.rgb = color_turquoise
	styles['Hyperlink'].font.underline = True

	styles.add_style('refGlossary', WD_STYLE_TYPE.CHARACTER)
	styles['refGlossary'].font.color.rgb = color_turquoise
	styles['refGlossary'].font.italic = False

	styles['macro'].font.size = Pt(9)
	styles['macro'].paragraph_format.space_before = Pt(12)
	styles['macro'].paragraph_format.space_after = Pt(12)
	styles['macro'].paragraph_format.line_spacing = 1.35

	styles['Macro Text Char'].font.bold = True
	styles['Macro Text Char'].font.color.rgb = color_blue

	styles.add_style('Keyboard', WD_STYLE_TYPE.CHARACTER)
	styles['Keyboard'].font.name = 'Courier New'
	styles['Keyboard'].font.size = Pt(8)
	styles['Keyboard'].font.bold = True
	styles['Keyboard'].font.color.rgb = color_white
	add_character_bgcolor(styles['Keyboard'], '000000')

	styles['header'].font.size = Pt(7)
	styles['header'].font.color.rgb = color_lightgrey
	styles['header'].paragraph_format.space_before = Pt(0)
	styles['header'].paragraph_format.line_spacing = 1.5
	styles['footer'].font.size = Pt(7)
	styles['footer'].font.color.rgb = color_lightgrey
	# Remove default tabstops
	styles['footer'].paragraph_format.tab_stops.clear_all()
	# Set new tabstop for right-aligned text
	styles['footer'].paragraph_format.tab_stops.add_tab_stop(Cm(16.5), WD_TAB_ALIGNMENT.RIGHT)

	# Table small
	styles.add_style('Table-Small', WD_STYLE_TYPE.PARAGRAPH)
	styles['Table-Small'].font.name = 'Arial'
	styles['Table-Small'].font.size = Pt(7)
	styles['Table-Small'].paragraph_format.space_after = Pt(2)
	styles['Table-Small'].paragraph_format.space_before = Pt(2)
	styles['Table-Small'].paragraph_format.line_spacing = 1.2

	styles.add_style('Table-Caption', WD_STYLE_TYPE.PARAGRAPH)
	if sys.version_info[0] > 2:
		styles['Table-Caption'].basedOn = doc.styles['Normal']
	styles['Table-Caption'].font.name = 'Arial'
	styles['Table-Caption'].font.italic = True
	styles['Table-Caption'].font.color.rgb = color_turquoise
	styles['Table-Caption'].paragraph_format.space_before = Pt(24)
	styles['Table-Caption'].paragraph_format.space_after = Pt(3)
	styles['Table-Caption'].paragraph_format.keep_with_next = True

	# Inhaltsverzeichnis
	styles.add_style('TOC-Header', WD_STYLE_TYPE.PARAGRAPH)
	if sys.version_info[0] > 2:
		styles['TOC-Header'].basedOn = doc.styles['Heading 2']
	styles['TOC-Header'].font.name = 'Arial'
	styles['TOC-Header'].font.size = Pt(12)
	styles['TOC-Header'].font.bold = True
	styles['TOC-Header'].font.color.rgb = color_lightgrey
	styles['TOC-Header'].paragraph_format.space_before = Pt(12)
	add_bottom_border(styles['TOC-Header'])

	# Notiz
	styles.add_style('ZMSNotiz', WD_STYLE_TYPE.PARAGRAPH)
	if sys.version_info[0] > 2:
		styles['ZMSNotiz'].basedOn = doc.styles['Normal']
	styles['ZMSNotiz'].font.name = 'Arial'
	styles['ZMSNotiz'].font.size = Pt(8)
	styles['ZMSNotiz'].paragraph_format.space_before = Pt(12)
	styles['ZMSNotiz'].paragraph_format.space_after = Pt(12)
	styles['ZMSNotiz'].paragraph_format.line_spacing = 1.5
	# Add background color
	add_paragraph_bgcolor(styles['ZMSNotiz'], 'fff5ce')

	# Merksatz
	styles.add_style('emphasis', WD_STYLE_TYPE.PARAGRAPH)
	if sys.version_info[0] > 2:
		styles['emphasis'].basedOn = doc.styles['Normal']
	styles['emphasis'].font.name = 'Arial'
	styles['emphasis'].font.size = Pt(9)
	styles['emphasis'].font.bold = False
	styles['emphasis'].font.italic = True
	styles['emphasis'].paragraph_format.space_before = Pt(12)
	styles['emphasis'].paragraph_format.space_after = Pt(12)
	styles['emphasis'].paragraph_format.line_spacing = 1.5
	# Add background color
	add_paragraph_bgcolor(styles['emphasis'], 'f0f8ff')

	return doc

## HINT: standard_json_docx

PAGEELEMENTS may have a 'standard_json_docx' to create a docx-like representation of the content.
PAGE objects will iterate over all PAGEELEMENTS and create a JSON stream of the page content. The action 'manage_export_pydocx' contains this method a function; if you want to use it in a notebook, you have to copy the code into the model of the PAGE-like content classes.

```python
## Script (Python) "ZMSDocument.standard_json_docx"
##bind container=container
##bind context=context
##bind namespace=
##bind script=script
##bind subpath=traverse_subpath
##parameters=zmscontext=None,options=None
##title=py: JSON Template: ZMSDocument
##
# --// standard_json //--
from Products.zms import standard
request = zmscontext.REQUEST

id = zmscontext.id
meta_id = zmscontext.meta_id
parent_id = zmscontext.getParentNode().id
parent_meta_id = zmscontext.getParentNode().meta_id
title = zmscontext.attr('title')
descripton = zmscontext.attr('attr_dc_descripton')
last_change_dt = zmscontext.attr('change_dt') or zmscontext.attr('created_dt')
url = zmscontext.getHref2IndexHtml(request)

# 1st block is container meta data
blocks = [
	{
		'id':id,
		'url':url,
		'meta_id':meta_id,
		'parent_id':parent_id,
		'parent_meta_id':parent_meta_id,
		'title':title,
		'descripton':descripton,
		'last_change_dt':last_change_dt
	}
]

# Sequence all pageelements
for pageelement in zmscontext.filteredChildNodes(request,zmscontext.PAGEELEMENTS):
	if pageelement.attr('change_dt') and pageelement.attr('change_dt') >= last_change_dt:
		last_change_dt = pageelement.attr('change_dt')
	json_block = []
	json_block = pageelement.attr('standard_json')
	if not json_block:
		html = ''
		try:
			html = pageelement.getBodyContent(request)
			# Clean html data
			html = standard.re_sub(r'<!--(.|\s|\n)*?-->', '', html)
			html = standard.re_sub(r'\n|\t|\s\s', '', html)
		except:
			html = '<table>'
			html += '<caption>Rendering Error: %s</caption>' % pageelement.meta_id
			attrs = [d['id'] for d in zmscontext.getMetaobjAttrs(pageelement.meta_id) if d['type'] not in ['dtml','zpt','py','constant','resource','interface']]
			for attr in attrs:
				html += '<tr><td>%s</td><td>%s</td></tr>' % (attr, pageelement.attr(attr))
			html += '</table>'
		# Create a json block
		json_block = [{
			'id': pageelement.id,
			'meta_id': pageelement.meta_id,
			'parent_id': pageelement.getParentNode().id,
			'parent_meta_id': pageelement.getParentNode().meta_id,
			'docx_format': 'html',
			'content': html
		}]
	blocks.extend(json_block)

# Update last_change_dt
blocks[0]['last_change_dt'] = last_change_dt

return blocks

# --// /standard_json //--
```

## MAIN function for DOCX-Generation

In [9]:
def manage_export_pydocx(self):
	request = self.REQUEST

	# #############################################
	# 1. INIT DOCUMENT
	# #############################################
	doc = docx.Document()	# Hint: may use template like docx.Document('template.docx')

	# #############################################
	# 2. SET DOCX STYLES
	# #############################################
	doc = set_docx_styles(doc)

	# #############################################
	# 3. ITERATE JSON CONTENT TO DOCX
	# #############################################
	zmsdoc = self.attr('standard_json_docx')
	heading = zmsdoc[0]
	blocks = zmsdoc[1:]

	dt = standard.getLangFmtDate(self, heading.get('last_change_dt',''), 'eng', '%Y-%m-%d')
	url = heading.get('url','').replace('nohost','localhost')
	doc.sections[0].header.paragraphs[0].text = '%s\t\t%s\nURL: %s'%(heading.get('title',''), dt, url)
	add_page_number(doc.sections[0].footer.paragraphs[0].add_run('Seite '))
	
	doc.add_heading(heading.get('title',''), level=1)
	prepend_bookmark(doc.paragraphs[-1], heading.get('id',''))
	
	if heading.get('description','')!='':
		descr = doc.add_paragraph(heading.get('description',''))
		descr.style = 'Description'
	
	for block in blocks:
		v = block['content']
		if v and block['docx_format'] == 'html':
			add_htmlblock_to_docx(zmscontext=self, docx_doc=doc, htmlblock=v, zmsid=block['id'])
		else:
			p = doc.add_paragraph(v, style=block['docx_format'])
			prepend_bookmark(p, block['id'])


	# Save document in temporary directory
	fn = '%s.docx'%(self.id_quote(self.getTitlealt(request)))
	tempfolder = tempfile.mkdtemp()
	docx_filename = os.path.join(tempfolder, fn)
	doc.save(docx_filename)
	
	# Read the docx file
	with open(docx_filename, 'rb') as f:
		data = f.read()

	# Remove the temporary folder
	shutil.rmtree(tempfolder)

	# Set the HTTP response headers
	request.RESPONSE.setHeader('Content-Disposition', f'inline;filename={fn}')
	request.RESPONSE.setHeader('Content-Type', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')

	# Return the data of the docx file
	return data

## Example: Generate DOCX from ZMS-Content

In [10]:
zmsdoc = context.e34.e36
data = manage_export_pydocx(zmsdoc)
# Write the data to a file
with open('test.docx', 'wb') as f:
	f.write(data)

In [11]:
# # Finally close ZODB connection
db.close()