-
Notifications
You must be signed in to change notification settings - Fork 3
/
commons_template.py
123 lines (113 loc) · 4.87 KB
/
commons_template.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
# -*- coding: utf-8 -*-
def make_datestring(year, month, day):
datestring = "%04d" % year # YYYY
if month is not None:
datestring += "-%02d" % month # YYYY-MM
if day is not None:
datestring += "-%02d" % day # YYYY-MM-DD
return datestring
def _escape(text):
for original, replacement in [
('=', '{{=}}'),
('|', '{{!}}')
]:
try:
text = text.replace(original, replacement)
except AttributeError:
pass
return text
def _trim(text):
return ' '.join(text.split())
def page(metadata, caption):
article_doi = metadata['doi']
pmcid = metadata['pmcid']
pmid = metadata['pmid']
authors = metadata['article-contrib-authors']
article_title = metadata['article-title']
# = metadata['article-abstract']
journal_title = metadata['journal-title']
article_year = metadata['article-year']
article_month = metadata['article-month']
article_day = metadata['article-day']
article_url = metadata['article-url']
license_url = metadata['article-license-url']
# = metadata['article-license-text']
# = metadata['article-copyright-statement']
# = metadata['article-copyright-holder']
categories = metadata['article-categories']
mimetype = 'image' #need this for templating
license_templates = {
u'http://creativecommons.org/publicdomain/zero/1.0/': '{{cc-zero}}',
u'http://creativecommons.org/licenses/by/1.0/': '{{cc-by-1.0}}',
u'http://creativecommons.org/licenses/by-sa/1.0/': '{{cc-by-sa-1.0}}',
u'http://creativecommons.org/licenses/by/2.0/': '{{cc-by-2.0}}',
u'http://creativecommons.org/licenses/by-sa/2.0/': '{{cc-by-sa-2.0}}',
u'http://creativecommons.org/licenses/by/2.5/': '{{cc-by-2.5}}',
u'http://creativecommons.org/licenses/by-sa/2.5/': '{{cc-by-sa-2.5}}',
u'http://creativecommons.org/licenses/by/3.0/': '{{cc-by-3.0}}',
u'http://creativecommons.org/licenses/by-sa/3.0/': '{{cc-by-sa-3.0}}',
u'http://creativecommons.org/licenses/by/4.0/': '{{cc-by-4.0}}',
u'http://creativecommons.org/licenses/by-sa/4.0/': '{{cc-by-sa-4.0}}'
}
if license_url:
license_template = license_templates[license_url]
else:
license_template = ''
text = "=={{int:filedesc}}==\n\n"
text += "{{Information\n"
if caption:
description = _escape(caption)
else:
description = "%s %s" % (_escape('Media belonging to article cited on Wikipedia with DOI:'), _escape(article_doi))
text += "|Description=\n"
if len(description.strip()) > 0:
text+= "{{en|1=%s}}\n" % description
text += "|Date= %s\n" % make_datestring(article_year, article_month, article_day)
label = ("%s file" % mimetype).capitalize()
text += "|Source= [%s %s] from " % (article_url, _escape(label))
text += "{{Cite journal\n"
text += "| author = %s\n" % _escape(authors)
text += "| title = %s\n" % _escape(_trim(article_title))
text += "| doi = %s\n" % _escape(article_doi)
text += "| journal = %s\n" % _escape(journal_title)
text += "| year = %s\n" % _escape(article_year)
if pmid:
text += "| pmid = %s\n" % _escape(pmid)
if pmcid:
text += "| pmc = %s\n" % _escape(pmcid)
text += "}}\n"
text += "|Author= %s\n" % _escape(authors)
text += "|Permission= %s\n" % license_template
text += "|Other_fields={{Information field|name=Provenance|value= {{Recitation-bot}} }}\n"
text += "}}\n\n"
def _capitalize_properly(word):
if len(word) == 1: # single letters should pass through unchanged
return word
if word[1:] == word[1:].lower(): # word has no capital letters inside
return word.lower()
else: # words like 'DNA' or 'HeLa' should not be touched
return word
def _postprocess_category(category):
if '(' in category:
category = category.split('(')[0]
if ',' in category:
category_parts = category.split(',')
category_parts.reverse()
category = ' '.join(category_parts)
processed_category = []
for word in category.strip().split(' '):
wordparts = []
for part in word.split('-'):
wordparts.append(_capitalize_properly(part))
processed_category.append('-'.join(wordparts))
category = ' '.join(processed_category)
return category[0].capitalize() + category[1:]
for category in categories:
category = _postprocess_category(category)
if len(category.split()) > 1: # no single-word categories
text += "[[Category:%s]]\n" % _escape(category)
text += "[[Category:Media from %s]]\n" % _escape(journal_title)
text += "[[Category:Uploaded with reCitation Bot]]\n"
text += '[[Category:Uploaded_with reCitation Bot and needing category review]]\n'
return text