Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 537bdc280f
Fetching contributors…

Cannot retrieve contributors at this time

executable file 115 lines (94 sloc) 3.61 kb
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import csv, progressbar
from os import path
from sys import argv, stderr
from urllib2 import urlopen, urlparse, Request, HTTPError
BUFSIZE = 1024000 # (1024KB)
from model import session, setup_all, create_all, set_source, \
Article, Journal, SupplementaryMaterial
try:
action = argv[1]
target = argv[2]
except IndexError: # no arguments given
stderr.write("""
oa-get – Open Access Media Importer download operations
usage: oa-get download-metadata [source] |
oa-get download-media [source]
""")
exit(1)
try:
assert(action in ['download-media', 'download-metadata'])
except AssertionError: # invalid action
stderr.write("Unknown action “%s”.\n" % action)
exit(2)
try:
exec "from sources import %s as source_module" % target
except ImportError: # invalid source
stderr.write("Unknown source “%s”.\n" % target)
exit(3)
set_source(target)
setup_all(True)
import config
if action == 'download-metadata':
source_path = config.get_metadata_raw_source_path(target)
url = None
for result in source_module.download_metadata(source_path):
if result['url'] != url:
url = result['url']
stderr.write("Downloading “%s”, saving into directory “%s” …\n" % \
(url, source_path))
p = progressbar.ProgressBar(maxval=result['total'])
p.update(result['completed'])
if action == 'download-media':
media_path = config.get_media_raw_source_path(target)
materials = SupplementaryMaterial.query.filter_by(
downloaded=False
).all()
for material in materials:
license_url = material.article.license_url
if license_url == '':
continue
if not license_url in config.free_license_urls:
stderr.write('Unknown, possibly non-free license: <%s>\n' %
license_url)
continue
mimetype = mimetype = material.mimetype
if mimetype != 'video':
continue
url = material.url
try:
req = Request(url, None, {'User-Agent' : 'oa-get/2012-07-21'})
remote_file = urlopen(req)
except HTTPError as e:
stderr.write('When trying to download <%s>, the following error occured: “%s”.\n' % \
(url.encode('utf-8'), str(e)))
exit(4)
total = int(remote_file.headers['content-length'])
completed = 0
url_path = urlparse.urlsplit(url).path
local_filename = path.join(media_path, \
url_path.split('/')[-1])
# if local file has same size as remote file, skip download
try:
if (path.getsize(local_filename) == total):
stderr.write("Skipping <%s>.\n" % url.encode('utf-8'))
material.downloaded = True
session.commit()
continue
except OSError: # local file does not exist
pass
stderr.write("Downloading <%s>, saving into directory “%s” …\n" % \
(url.encode('utf-8'), media_path))
p = progressbar.ProgressBar(maxval=total)
with open(local_filename,'wb') as local_file:
while True:
chunk = remote_file.read(BUFSIZE)
if chunk != '':
local_file.write(chunk)
completed += len(chunk)
p.update(completed)
else:
break
material.downloaded = True
session.commit()
Jump to Line
Something went wrong with that request. Please try again.