Permalink
Browse files

+ simplify counting code in oa-cache stats by using autovivicatious c…

…ounting dictionary instead of a default dictionary
  • Loading branch information...
erlehmann committed Feb 17, 2013
1 parent 3fba67b commit ec2a0d64064f952bf17a24fe023df0e682a78c25
Showing with 47 additions and 89 deletions.
  1. +1 −5 helpers/__init__.py
  2. +17 −0 helpers/autovividict.py
  3. +29 −84 oa-cache
View
@@ -1,7 +1,4 @@
-#!/usr/local/bin/python
-# -*- coding: utf-8 -*
-
-from sys import stderr
+from autovividict import autovividict
def make_datestring(year, month, day):
datestring = "%04d" % year # YYYY
@@ -10,4 +7,3 @@ def make_datestring(year, month, day):
if day is not None:
datestring += "-%02d" % day # YYYY-MM-DD
return datestring
-
View
@@ -0,0 +1,17 @@
+from collections import defaultdict
+
+# counting dictionary that serves as 0 for addition
+# this allows incrementing unknown keys: d['k'] += 1
+class countdict(defaultdict):
+ def __init__(self, *args, **kwargs):
+ self.value = 0
+ super(countdict, self).__init__(*args, **kwargs)
+ def __repr__(self):
+ return str(dict(self))
+ def __add__(self, x):
+ return self.value + x
+
+# autovivicatious counting dictionary, allowing dynamic creation of keys
+# explained at <http://en.wikipedia.org/wiki/Autovivification#Python>
+def autovividict():
+ return countdict(autovividict)
View
113 oa-cache
@@ -19,7 +19,7 @@ import pprint
import subprocess
-from helpers import media, make_datestring
+from helpers import autovividict, media, make_datestring
from model import session, setup_all, create_all, set_source, \
Article, Category, Journal, SupplementaryMaterial
@@ -306,28 +306,28 @@ if action == "stats":
p = progressbar.ProgressBar(maxval=len(materials))
completed = 0
licenses = {
- 'free': {},
- 'non-free': {},
+ 'free': autovividict(),
+ 'non-free': autovividict(),
}
licensing_publishers = {
- 'url': {},
- 'url-from-text': {},
- 'text': {},
- 'none': {}
+ 'url': autovividict(),
+ 'url-from-text': autovividict(),
+ 'text': autovividict(),
+ 'none': autovividict()
}
mimetypes = {
- 'free': {},
- 'non-free': {},
- 'misreported': {}
+ 'free': autovividict(),
+ 'non-free': autovividict(),
+ 'misreported': autovividict()
}
mimetypes_publishers = {
- 'correct': {},
- 'incorrect': {},
- 'unknown': {}
+ 'correct': autovividict(),
+ 'incorrect': autovividict(),
+ 'unknown': autovividict()
}
mimetypes_prefix_publishers = {
- 'free': {},
- 'non-free': {}
+ 'free': autovividict(),
+ 'non-free': autovividict()
}
for material in materials:
license_url = material.article.license_url
@@ -343,87 +343,32 @@ if action == "stats":
try:
mimetype_composite_reported = material.mimetype_reported + '/' + material.mime_subtype_reported
if mimetype_composite != mimetype_composite_reported:
- try:
- mimetypes['misreported'][mimetype_composite][mimetype_composite_reported] += 1
- except KeyError:
- try:
- mimetypes['misreported'][mimetype_composite][mimetype_composite_reported] = 1
- except KeyError:
- mimetypes['misreported'][mimetype_composite] = {
- mimetype_composite_reported: 1
- }
- try:
- mimetypes_publishers['incorrect'][doi_prefix] += 1
- except KeyError:
- mimetypes_publishers['incorrect'][doi_prefix] = 1
+ mimetypes['misreported'][mimetype_composite][mimetype_composite_reported] += 1
+ mimetypes_publishers['incorrect'][doi_prefix] += 1
else: # mimetype is correct
- try:
- mimetypes_publishers['correct'][doi_prefix] += 1
- except KeyError:
- mimetypes_publishers['correct'][doi_prefix] = 1
+ mimetypes_publishers['correct'][doi_prefix] += 1
except TypeError: # oa-get update-mimetypes was not run
- try:
- mimetypes_publishers['unknown'][doi_prefix] += 1
- except KeyError:
- mimetypes_publishers['unknown'][doi_prefix] = 1
+ mimetypes_publishers['unknown'][doi_prefix] += 1
if license_url in config.free_license_urls:
- try:
- licenses['free'][license_url] += 1
- except KeyError:
- licenses['free'][license_url] = 1
- try:
- mimetypes['free'][mimetype_composite] += 1
- except KeyError:
- mimetypes['free'][mimetype_composite] = 1
- try:
- mimetypes_prefix_publishers['free'][mimetype]
- except KeyError:
- mimetypes_prefix_publishers['free'][mimetype] = {}
- try:
- mimetypes_prefix_publishers['free'][mimetype][doi_prefix] += 1
- except KeyError:
- mimetypes_prefix_publishers['free'][mimetype][doi_prefix] = 1
+ licenses['free'][license_url] += 1
+ mimetypes['free'][mimetype_composite] += 1
+ mimetypes_prefix_publishers['free'][mimetype][doi_prefix] += 1
else:
- try:
- licenses['non-free'][license_url] += 1
- except KeyError:
- licenses['non-free'][license_url] = 1
- try:
- mimetypes['non-free'][mimetype_composite] += 1
- except KeyError:
- mimetypes['non-free'][mimetype_composite] = 1
- try:
- mimetypes_prefix_publishers['non-free'][mimetype]
- except KeyError:
- mimetypes_prefix_publishers['non-free'][mimetype] = {}
- try:
- mimetypes_prefix_publishers['non-free'][mimetype][doi_prefix] += 1
- except KeyError:
- mimetypes_prefix_publishers['non-free'][mimetype][doi_prefix] = 1
+ licenses['non-free'][license_url] += 1
+ mimetypes['non-free'][mimetype_composite] += 1
+ mimetypes_prefix_publishers['non-free'][mimetype][doi_prefix] += 1
if license_url is not None:
if license_text is not None or \
copyright_statement is not None:
- try:
- licensing_publishers['url-from-text'][doi_prefix] += 1
- except KeyError:
- licensing_publishers['url-from-text'][doi_prefix] = 1
+ licensing_publishers['url-from-text'][doi_prefix] += 1
else: # URL was given, no text lookup necessary
- try:
- licensing_publishers['url'][doi_prefix] += 1
- except KeyError:
- licensing_publishers['url'][doi_prefix] = 1
+ licensing_publishers['url'][doi_prefix] += 1
else:
if license_text is not None or \
copyright_statement is not None:
- try:
- licensing_publishers['text'][doi_prefix] += 1
- except KeyError:
- licensing_publishers['text'][doi_prefix] = 1
+ licensing_publishers['text'][doi_prefix] += 1
else: # no licensing information at all
- try:
- licensing_publishers['none'][doi_prefix] += 1
- except KeyError:
- licensing_publishers['none'][doi_prefix] = 1
+ licensing_publishers['none'][doi_prefix] += 1
completed += 1
p.update(completed)

0 comments on commit ec2a0d6

Please sign in to comment.