In [None]:
# Get pdf files and image files from production api to dev server.

In [None]:
from django.conf import settings
from tqdm import tqdm
import requests
import os
from django.core.files.base import ContentFile

def delete_old_content(mindate = '2017-06-01'):
    """Prune down content in the dev database"""
    assert settings.DEBUG, 'DO NOT RUN IN PRODUCTION'
    Story.objects.filter(created__date__lte=mindate).delete()
    ImageFile.objects.photos().filter(**kwargs).delete()
    Issue.objects.filter(publication_date__lte=mindate).delete()
    Contributor.objects.filter(byline=None).delete()
    ImageFile.objects.filter(contributor=None, storyimage=None, frontpagestory=None).delete()
    
    print(
        Issue.objects.count(),
        ImageFile.objects.count(),
        Contributor.objects.count(),
    )

In [None]:
def save_request_data(file_field, response):
    filename = os.path.basename(file_field.name)
    file_field.save(filename, response.raw, save=True)
    
def get_file_url(endpoint, pk, field, apibase='http://universitas.no/api/'):
    apiurl = os.path.join(*map(str, (apibase, endpoint, pk, '')))
    return requests.get(apiurl).json().get(field)

def get_pdf_from_api(pi, overwrite=False):
    try: 
        file = pi.pdf.file
    except FileNotFoundError:
        print(f'missing: {pi.pdf}')
    else:
        print(f'exists: {pi.pdf}')
        return
    pdfurl = get_file_url('pdfs', pi.pk, 'pdf')
    response = requests.get(pdfurl, stream=True)
    save_request_data(pi.pdf, response)
    print(pi.get_cover_page())
    
def get_imagefile_from_api(img, overwrite=False):
    try: 
        file = img.source_file.file
    except FileNotFoundError:
        overwrite = True
    if overwrite: 
        url = get_file_url('images', img.pk, 'large')
        response = requests.get(url, stream=True)
        save_request_data(img.source_file, response)
    return img.source_file.name

def download_images(qs=None, overwrite=False):
    if qs is None:
        qs = ImageFile.objects.all()
    prog = tqdm(qs, '-'*30)
    for item in prog:
        prog.set_description_str(f'{str(item)[:30]:<30}', False)
        get_imagefile_from_api(item, overwrite) 
        
def download_pdfs(qs=None, overwrite=False):
    if qs is None:
        qs = PrintIssue.objects.all()
    prog = tqdm(qs, '-'*30)
    for item in prog:
        prog.set_description_str(f'{str(item)[:30]:<30}', False)
        get_pdf_from_api(item, overwrite) 
        
download_images()
download_pdfs()