In [1]:
from django.conf import settings
from tqdm import tqdm

def delete_old_content():
    assert settings.DEBUG, 'DO NOT RUN IN PRODUCTION'
    mindate = '2017-06-01'
    Story.objects.filter(created__date__lte=mindate).delete()
    ImageFile.objects.photos().filter(**kwargs).delete()
    Issue.objects.filter(publication_date__lte=mindate).delete()
    Contributor.objects.filter(byline=None).delete()
    ImageFile.objects.filter(contributor=None, storyimage=None, frontpagestory=None).delete()

In [2]:
print(
    Issue.objects.count(),
    ImageFile.objects.count(),
    Contributor.objects.count(),
)

14 321 57


In [12]:
import requests
import os
from django.core.files.base import ContentFile

def save_request_data(file_field, response):
    filename = os.path.basename(file_field.name)
    file_field.save(filename, response.raw, save=True)
    
def get_file_url(endpoint, pk, field, apibase='http://universitas.no/api/'):
    apiurl = os.path.join(*map(str, (apibase, endpoint, pk, '')))
    return requests.get(apiurl).json().get(field)

def get_pdf_from_api(pi, overwrite=False):
    try: 
        file = pi.pdf.file
    except FileNotFoundError:
        print(f'missing: {pi.pdf}')
    else:
        print(f'exists: {pi.pdf}')
        return
    pdfurl = get_file_url('pdfs', pi.pk, 'pdf')
    response = requests.get(pdfurl, stream=True)
    save_request_data(pi.pdf, response)
    print(pi.get_cover_page())
    
def get_imagefile_from_api(img, overwrite=False):
    try: 
        file = img.source_file.file
    except FileNotFoundError:
        overwrite = True
    if overwrite: 
        url = get_file_url('images', img.pk, 'large')
        response = requests.get(url, stream=True)
        save_request_data(img.source_file, response)
    return img.source_file.name

def download_images(qs=None, overwrite=False):
    if qs is None:
        qs = ImageFile.objects.all()
    prog = tqdm(qs, '-'*30)
    for item in prog:
        prog.set_description_str(f'{str(item)[:30]:<30}', False)
        get_imagefile_from_api(item, overwrite) 
        
def download_pdfs(qs=None, overwrite=False):
    if qs is None:
        qs = PrintIssue.objects.all()
    prog = tqdm(qs, '-'*30)
    for item in prog:
        prog.set_description_str(f'{str(item)[:30]:<30}', False)
        get_pdf_from_api(item, overwrite) 
        
download_images()
download_pdfs()




------------------------------:   0%|          | 0/321 [00:00<?, ?it/s][A[A

Grande-obalph1.jpg            : 100%|██████████| 321/321 [00:00<00:00, 5465.79it/s][A[A

------------------------------:   0%|          | 0/8 [00:00<?, ?it/s][A[A

missing: pdf/d12f44d49e0e11e7800e0242ac12000b/universitas_2017-16.pdf


14:59:23 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/2c0c1512a5df11e783a60242ac12000b/universitas_2017-16.pdf error: File is empty.

14:59:23 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/2c0c1512a5df11e783a60242ac12000b/universitas_2017-16.pdf error: File is empty.



/media/pdf/d12f44d49e0e11e7800:  12%|█▎        | 1/8 [00:08<01:00,  8.61s/it]

pdf/covers/universitas_2017-16.jpg
missing: pdf/d6ed40c49e0e11e7800e0242ac12000b/universitas_2017-18.pdf


[A[A14:59:31 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/314299aca5df11e783a60242ac12000b/universitas_2017-18.pdf error: File is empty.

14:59:31 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/314299aca5df11e783a60242ac12000b/universitas_2017-18.pdf error: File is empty.



/media/pdf/d6ed40c49e0e11e7800:  25%|██▌       | 2/8 [00:16<00:50,  8.49s/it]

pdf/covers/universitas_2017-18.jpg
missing: pdf/dbc842749e0e11e7800e0242ac12000b/universitas_2017-16_mag.pdf


[A[A14:59:38 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/3628ebbaa5df11e783a60242ac12000b/universitas_2017-16_mag.pdf error: File is empty.

14:59:38 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/3628ebbaa5df11e783a60242ac12000b/universitas_2017-16_mag.pdf error: File is empty.



/media/pdf/dbc842749e0e11e7800:  38%|███▊      | 3/8 [00:21<00:37,  7.47s/it]

pdf/covers/universitas_2017-16_mag.jpg
missing: pdf/e2ba3a569e0e11e7800e0242ac12000b/universitas_2017-19_mag.pdf


[A[A14:59:44 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/39208bdea5df11e783a60242ac12000b/universitas_2017-19_mag.pdf error: File is empty.

14:59:44 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/39208bdea5df11e783a60242ac12000b/universitas_2017-19_mag.pdf error: File is empty.



/media/pdf/e2ba3a569e0e11e7800:  50%|█████     | 4/8 [00:27<00:27,  6.87s/it]

pdf/covers/universitas_2017-19_mag.jpg
missing: pdf/e634c4a89e0e11e7800e0242ac12000b/universitas_2017-19.pdf


[A[A14:59:50 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/3c5f58b6a5df11e783a60242ac12000b/universitas_2017-19.pdf error: File is empty.

14:59:50 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/3c5f58b6a5df11e783a60242ac12000b/universitas_2017-19.pdf error: File is empty.



/media/pdf/e634c4a89e0e11e7800:  62%|██████▎   | 5/8 [00:35<00:21,  7.19s/it]

pdf/covers/universitas_2017-19.jpg
missing: pdf/ea57be5a9e0e11e7800e0242ac12000b/universitas_2017-17.pdf


[A[A14:59:58 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/411164daa5df11e783a60242ac12000b/universitas_2017-17.pdf error: File is empty.

14:59:58 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/411164daa5df11e783a60242ac12000b/universitas_2017-17.pdf error: File is empty.



/media/pdf/ea57be5a9e0e11e7800:  75%|███████▌  | 6/8 [00:43<00:14,  7.35s/it]

pdf/covers/universitas_2017-17.jpg
missing: pdf/eefcfd8a9e0e11e7800e0242ac12000b/universitas_2017-21.pdf


[A[A15:00:06 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/45b50aa0a5df11e783a60242ac12000b/universitas_2017-21.pdf error: File is empty.

15:00:06 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/45b50aa0a5df11e783a60242ac12000b/universitas_2017-21.pdf error: File is empty.



/media/pdf/eefcfd8a9e0e11e7800:  88%|████████▊ | 7/8 [00:51<00:07,  7.69s/it]

pdf/covers/universitas_2017-21.jpg
missing: pdf/f59925c49e0e11e7800e0242ac12000b/universitas_2017-20.pdf


[A[A15:00:14 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/4adad9baa5df11e783a60242ac12000b/universitas_2017-20.pdf error: File is empty.

15:00:14 2017-09-30 [DEBUG]  universitas    models.py:415  (remove_thumbnail)
	instance: /media/pdf/4adad9baa5df11e783a60242ac12000b/universitas_2017-20.pdf error: File is empty.



/media/pdf/f59925c49e0e11e7800: 100%|██████████| 8/8 [01:00<00:00,  8.11s/it]

pdf/covers/universitas_2017-20.jpg


[A[A

[A[A

In [None]:
for im in imagefiles:
    fn = get_imagefile_from_api(im, True)
    imagefiles.set_description(f'{fn[:60]:<60}')
    im.md5 = None
    im.size = None
    im.imagehash = None
    im.md5, im.imagehash, im.size
    im.save()
    