-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyzot.py
65 lines (50 loc) · 2.41 KB
/
pyzot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
'''
Greift auf exzerpierte Texte in Zotero zu unf generiert PDF und DOCX aus den Exzerpten.
'''
import os, pypandoc, progressbar
from pyzotero import zotero
from slugify import slugify
base_dir = os.path.dirname(os.path.abspath(__file__))
paths = {
'tmp_dir': os.path.join(base_dir,"tmp"),
'output_dir': os.path.join(base_dir,"Output"), # Statdessen Google Drive implementieren
}
# Pfade erstellen
for path in paths:
if not os.path.exists(paths[path]):
os.makedirs(paths[path])
# Import Zotero API Key
with open('zotero.key', 'r') as f:
lines = f.readlines()
consumer_key = lines[0].strip()
zot = zotero.Zotero('4111725', 'user', consumer_key)
items = zot.collection_items('86IBK7ME', itemType='-note')
items = [item for item in items if item['data']['itemType'] != 'attachment'] # Pyzotero unterstützt keine doppelten Searchqueries. (-note&-attachment)
# Exzerpierte Dokumente durchlaufen
bar = progressbar.ProgressBar()
for item in bar(items):
title = "%s-%s" % (slugify(item['meta']['creatorSummary']), slugify(item['data']['title']))
html_path = os.path.join(paths['tmp_dir'],'%s.html' % title)
with open(html_path, 'w+') as f:
# Get Exzerpt Metadata > HTML-Datei
citation = zot.item(item['data']['key'], content='citation', style='chicago-fullnote-bibliography')
print('<!DOCTYPE html>\n<html><head><meta charset="utf-8"></head>\n<body>\n\n<h1>',citation[0],'</h1>\n', file=f) # encode('utf-8')?
# Get Exzerpte > HTML-Datei
children = zot.children(item['data']['key'], itemType='note')
excerpts = [child['data']['note'].replace('<strong>Yellow Annotations</strong>','') for child in children if child['data']['note'].startswith('<p><strong>Yellow')]
for excerpt in excerpts:
print(excerpt,'</body></html>', file=f) # encode('utf-8')?
# PDF erzeugen
output = pypandoc.convert_file(html_path, 'latex', outputfile=os.path.join(paths['output_dir'],('%s.pdf' % title)), extra_args=['-V', 'geometry:margin=2cm', '--latex-engine=xelatex'])
# print('%s.pdf erzeugt' % title)
# DOCX erzeugen
output = pypandoc.convert_file(html_path, 'docx', outputfile=os.path.join(paths['output_dir'],('%s.docx' % title)))
# print('%s.docx erzeugt\ln' % title)
# # MOBI erzeugen
# call(['kindlegen', '-c2', '-o','%s.mobi' % title, html_path])
# print('%s.mobi erzeugt' % title)
# tmp aufräumen
for f in os.listdir(paths['tmp_dir']):
os.remove(os.path.join(paths['tmp_dir'],f))
print('Alles erledigt.')