In [4]:
a, b = ImageFile.objects.filter(source_file__icontains='knaus')[:2]
merge_instances(a,b)


<ImageFile: knausgaard-omslag-sa-mye-lengsel.jpg>

In [1]:
from django.db import transaction
from django.apps import apps
from django.contrib.contenttypes.fields import GenericForeignKey
from django.db.models.fields.related import ManyToManyField

@transaction.atomic()
def merge_instances(primary_object, *alias_objects):
    """Merge several model instances into one, the `primary_object`.
    Use this function to merge model objects and migrate all of the related
    fields from the alias objects the primary object.
    Usage:
        from django.contrib.auth.models import User
        primary_user = User.objects.get(email='good@example.com')
        duplicate_user = User.objects.get(email='good+duplicate@example.com')
        merge(primary_user, duplicate_user)
    Based on: https://djangosnippets.org/snippets/382/
    """
    generic_fields = get_generic_fields()

    # get related fields
    many_to_many_fields, related_fields = discrimine(
        lambda field: isinstance(field, ManyToManyField),
        primary_object._meta._get_fields(forward=False, include_hidden=True)
    )

    # Loop through all alias objects and migrate their references to the
    # primary object
    for alias_object in alias_objects:
        # Migrate all foreign key references from alias object to primary
        # object.
        for related_object in related_fields:
            # The variable name on the alias_object model.
            alias_varname = related_object.get_accessor_name()
            # The variable name on the related model.
            obj_varname = related_object.field.name
            related_objects = getattr(alias_object, alias_varname)
            for obj in related_objects.all():
                setattr(obj, obj_varname, primary_object)
                obj.save()

        # Migrate all many to many references from alias object to primary
        # object.
        for related_many_object in many_to_many_fields:
            alias_varname = related_many_object.get_accessor_name()
            obj_varname = related_many_object.field.name
            related_many_objects = getattr(alias_object, alias_varname)
            for obj in related_many_objects.all():
                getattr(obj, obj_varname).remove(alias_object)
                getattr(obj, obj_varname).add(primary_object)

        # Migrate all generic foreign key references from alias object to
        # primary object.
        for field in generic_fields:
            filter_kwargs = {}
            filter_kwargs[field.fk_field] = alias_object._get_pk_val()
            filter_kwargs[field.ct_field] = field.get_content_type(alias_object)
            related_objects = field.model.objects.filter(**filter_kwargs)
            for generic_related_object in related_objects:
                setattr(generic_related_object, field.name, primary_object)
                generic_related_object.save()

        if alias_object.id:
            alias_object.delete()

    return primary_object


def get_generic_fields():
    """Return a list of all GenericForeignKeys in all models."""
    generic_fields = []
    for model in apps.get_models():
        for field_name, field in model.__dict__.items():
            if isinstance(field, GenericForeignKey):
                generic_fields.append(field)
    return generic_fields


def discrimine(pred, sequence):
    """Split a collection in two collections using a predicate.
    >>> discrimine(lambda x: x < 5, [3, 4, 5, 6, 7, 8])
    ... ([3, 4], [5, 6, 7, 8])
    """
    positive, negative = [], []
    for item in sequence:
        if pred(item):
            positive.append(item)
        else:
            negative.append(item)
    return positive, negative


In [4]:
a,b = ImageFile.objects.filter(source_file__icontains='andenas')

In [7]:
merge_instances(a,b)

<ImageFile: 25-NYH-andenas-02-HDH.jpg>

In [17]:
from collections import Counter
def duplicates(queryset, attr, order_by=()):
    counter = Counter(queryset.values_list(attr, flat=True))
    duped_values = [el for el in counter if counter[el] > 1]
    sets = []
    for value in duped_values:
        items = queryset.filter(**{attr: value}).order_by(*order_by)
        sets.append(items)
    return sets

images = ImageFile.objects.exclude(_md5=None)
md5_dupes = duplicates(images, '_md5', ['created'])

def print_dupes(dupe_images, attr):
    print(f'{attr}-dupes: {len(dupe_images)}')
    for qs in dupe_images:
        print()
        for img in qs:
            print(getattr(img, attr), img)

print_dupes(md5_dupes, '_md5')

_md5-dupes: 1571

9d16556c2ab53401b64159616936dd97 b02785e5-341b-4928-b087-a926fcefb037.jpg
9d16556c2ab53401b64159616936dd97 b02785e5-341b-4928-b087-a926fcefb037.jpg

81eb27930909740ce737b748fc744a6c 18-ANM-overgrowth-ontz.jpg
81eb27930909740ce737b748fc744a6c 18-ANM-overgrowth-ontz.jpg

72a4f28350ad0540c9a8abd2036f9ee8 Studietid4-KUL-OD.jpg
72a4f28350ad0540c9a8abd2036f9ee8 Studietid4-KUL-OD.jpg

a9a8698d449ee8f81e1e35cce5bb0538 Anne-Karine-Nymoen.jpg
a9a8698d449ee8f81e1e35cce5bb0538 Anne-Karine-Nymoen.jpg

f5ed6a1da758ce905ca96e398df41ca0 04-MAG-magflyktning-13-MKS.jpg
f5ed6a1da758ce905ca96e398df41ca0 04-MAG-magflyktning-13-MKS.jpg

a1a87ed45423125b513bc27ed0bb831f Therese-8.jpg
a1a87ed45423125b513bc27ed0bb831f Therese-8.jpg

eddaa9a907a6b4bb0d5c2f561b6460be 05-NYH-nyhrealistli-10-SGS.jpg
eddaa9a907a6b4bb0d5c2f561b6460be 05-NYH-nyhrealistli-10-SGS.jpg

36cbbc2549ae03a90f77d62ca6e9ccf9 hoerselshemmed.jpg
36cbbc2549ae03a90f77d62ca6e9ccf9 hoerselshemmed.jpg

e73d1dc13cb9afe0f0ba28d19b6fff

cdec598d8f42d1a4618a86e18e3d7ed3 08-ANM-Stiffi-arrangement-HH-04.jpg
cdec598d8f42d1a4618a86e18e3d7ed3 08-ANM-Stiffi-arrangement-HH-04.jpg
cdec598d8f42d1a4618a86e18e3d7ed3 08-ANM-Stiffi-arrangement-HH-04.jpg

3d0fdfc18a7d520232463e2a38cd66cb anm-KEK-paskalev-parkteateret-1.jpg
3d0fdfc18a7d520232463e2a38cd66cb anm-KEK-paskalev-parkteateret-1.jpg

3c4a0934b73a05eb8a42efcbd4ce3d9b 06-NYH-HH-5.jpg
3c4a0934b73a05eb8a42efcbd4ce3d9b 06-NYH-datatilsynet-HH-5.jpg

c7886f1369bf6a69d1d3758f5ebedc89 illustrasjon-2.jpg
c7886f1369bf6a69d1d3758f5ebedc89 illustrasjon-2.jpg

0168e8815d82b0b15534983d58b7f6c9 04-NETT-Idrett-SGS.jpg
0168e8815d82b0b15534983d58b7f6c9 04-NETT-Idrett-SGS.jpg
0168e8815d82b0b15534983d58b7f6c9 04-NETT-Idrett-SGS.jpg
0168e8815d82b0b15534983d58b7f6c9 04-NETT-Idrett-SGS.jpg

6d97d1ae0d0801cd989d45308d90ab0e 03-KULWaxOnWaxoff-CB-9-1.jpg
6d97d1ae0d0801cd989d45308d90ab0e 03-KULWaxOnWaxoff-CB-9-1.jpg
6d97d1ae0d0801cd989d45308d90ab0e 03-KULWaxOnWaxoff-CB-9-1-2.jpg
6d97d1ae0d0801cd989d453

4fecfd5be0e6ac3dd2d467c92f34ca8a IMG-0738.jpg
4fecfd5be0e6ac3dd2d467c92f34ca8a IMG-0738.jpg

f5a3bee65753d7408cba76741954267e fasit.jpg
f5a3bee65753d7408cba76741954267e Bylarm2016-nett-05.jpg

53a000397c57bbb0653a056e9d86ed69 Siv-Jakobsen.jpg
53a000397c57bbb0653a056e9d86ed69 Bylarm2016-nett-17.jpg

9c01ee141740409b950ecae3e1783499 16225735024-b29030ba5d-o.jpg
9c01ee141740409b950ecae3e1783499 016225735024-b29030ba5d-o.jpg

37fe6f4cd722e7199c50935cd4eba853 09-NYH-10oslosomstudentby-08-HD.jpg
37fe6f4cd722e7199c50935cd4eba853 10-NYH-10oslosomstudentby-08-HD.jpg

e8277ab4b104482cbf488dc80cf8782b 09-NYH-10oslosomstudentby-06-HD.jpg
e8277ab4b104482cbf488dc80cf8782b 10-NYH-10oslosomstudentby-06-HD.jpg

fff338c4f180363d8e13c6f688199912 09-NYH-10oslosomstudentby-05-HD.jpg
fff338c4f180363d8e13c6f688199912 10-NYH-10oslosomstudentby-05-HD.jpg

c04bf3723a4e35a407c4e153deb60484 IMG-1832.jpg
c04bf3723a4e35a407c4e153deb60484 11-DEB-forskerintervju-LinnCecilieAnkerSorensen8.jpg

9d4b891c0a4e4751b042a36d

1a5a73aa23f242b847f63fb91d34e7f6 WIEL3368-kopi.jpg
1a5a73aa23f242b847f63fb91d34e7f6 WIEL3368-kopi.jpg

c39d97be626917d963face47ec9b6dc1 03-KULWaxOnWaxoff-CB-5.jpg
c39d97be626917d963face47ec9b6dc1 03-KULWaxOnWaxoff-CB-5.jpg

df7c56c2217280fd32c46031d6ab60a6 bukkreklame2.jpg
df7c56c2217280fd32c46031d6ab60a6 bukkreklame2.jpg

86535966f2a9b3f8dfc044d2c4335b52 DSC05628.jpg
86535966f2a9b3f8dfc044d2c4335b52 DSC05628.jpg

b2414360b26ccc030d82dac3331db4bc DSC05709.jpg
b2414360b26ccc030d82dac3331db4bc DSC05709.jpg
b2414360b26ccc030d82dac3331db4bc DSC05709.jpg
b2414360b26ccc030d82dac3331db4bc DSC05709.jpg

ceb24318deaba10cc11f870f0591272f GabrielleLegrandGjerdset-retrolys-Foto-UnniIrmelinKvam-SiO.jpg
ceb24318deaba10cc11f870f0591272f GabrielleLegrandGjerdset-retrolys-Foto-UnniIrmelinKvam-SiO.jpg
ceb24318deaba10cc11f870f0591272f GabrielleLegrandGjerdset-Foto-UnniIrmelinKvam-SiO.jpg

63a2d6d5101ca6f9faec4ab2f883afe2 01-KUL-kulundersakt-02-HE.jpg
63a2d6d5101ca6f9faec4ab2f883afe2 01-KUL-kulundersakt-0

eb55bee454d5f9164cb1293d299e9ef4 06-NYH-HH-33.jpg
eb55bee454d5f9164cb1293d299e9ef4 06-NYH-sensur-nf-HH-4.jpg

f8a50a5cc75072d588344979ad209037 06-NYH-HH-41.jpg
f8a50a5cc75072d588344979ad209037 06-NYH-sensur-osstokkeHH-12.jpg

521a7b6809b94b48b3778010bd234cd5 06-NYH-HH-40.jpg
521a7b6809b94b48b3778010bd234cd5 06-NYH-sensur-osstokkeHH-10.jpg

f3f71185dbf11502ce25c4c4344c045d 06-NYH-HH-39.jpg
f3f71185dbf11502ce25c4c4344c045d 06-NYH-sensur-osstokkeHH-11.jpg

f0dc58508f3de993f89aa6b18f30c4d8 KUL-3-NO4-NW-8.jpg
f0dc58508f3de993f89aa6b18f30c4d8 KUL-3-NO4-NW-8.jpg

1dc4baacc0c83cbab7edf49adf740153 KUL-6-MUSIKK-NLA-3-NW.jpg
1dc4baacc0c83cbab7edf49adf740153 0KUL-6-MUSIKK-NLA-3-NW.jpg

708689dab61cb490198f216514617574 06-NYH-HH-12.jpg
708689dab61cb490198f216514617574 06-NYH-OSI-MarianneP-HH-03.jpg

55a76f920f0ec8123317ae795e98992b 06-NYH-HH-10.jpg
55a76f920f0ec8123317ae795e98992b 06-NYH-OSI-MarianneP-HH-01.jpg

ca519313cc78930a65a966f7c5587e27 06-NYH-HH-26.jpg
ca519313cc78930a65a966f7c5587e27 06-N

c447431d5fb32d50ba1e7ab8ad0e0544 IMG-45652000.jpg
c447431d5fb32d50ba1e7ab8ad0e0544 09-KUL-min-studietid-Brundtland-DK-03.jpg
c447431d5fb32d50ba1e7ab8ad0e0544 09-KUL-min-studietid-Brundtland-DK-03.jpg

e2d4f47d636d8acb7bb75bd05557fc97 unspecified.jpg
e2d4f47d636d8acb7bb75bd05557fc97 unspecified-1.jpg
e2d4f47d636d8acb7bb75bd05557fc97 unspecified-1.jpg

336eb7a006550fdda1cadd63c849aa9a Urortfinalen-DarligVane-AC-5.jpg
336eb7a006550fdda1cadd63c849aa9a Urortfinalen-DarligVane-AC-5.jpg
336eb7a006550fdda1cadd63c849aa9a Urortfinalen-DarligVane-AC-5.jpg

d08b94ded6455f6b87faaa161b95e6dd IMG-46782000.jpg
d08b94ded6455f6b87faaa161b95e6dd 09-KUL-Islamsatsing-DK-03.jpg

1b3b504fe755f55148e58929c202194a 08-KUL-foreleser-AN-1.jpg
1b3b504fe755f55148e58929c202194a 08-KUL-foreleser-AN-1.jpg

7bf13c66a234ceb4f51b742c3d3b29ef KUL-7-ANNE-NW-2-0.jpg
7bf13c66a234ceb4f51b742c3d3b29ef KUL-7-ANNE-NW-2-0.jpg

7e9132e4a6b48ff703e95628702d67a7 07-KUL-foreleser-MD.jpg
7e9132e4a6b48ff703e95628702d67a7 07-KUL-foreles

126e7f1b54f93f72f687ce296351c1be SV-0005-KB.jpg
126e7f1b54f93f72f687ce296351c1be SV-0005-KB.jpg

4b5b7a2748cf1a9e22cccffe7cb38a8d 18-Cabaret-07-STL.jpg
4b5b7a2748cf1a9e22cccffe7cb38a8d 18-Cabaret-07-STL.jpg

5a65c5e75c2e086e95dfc7dd136e5afb 20-kult-gahr-store01-web-KB.jpg
5a65c5e75c2e086e95dfc7dd136e5afb 20-kult-gahr-store01-web-KB.jpg

11d2648f5fd74b6bc2edacf466e3632f 30-NYH-grontuio-01-MAD.jpg
11d2648f5fd74b6bc2edacf466e3632f 30-NYH-grontuio-02-MAD.jpg

47072d3913c76e37f98824537abbf9bb 14Soppel.jpg
47072d3913c76e37f98824537abbf9bb Soppel.jpg

edabd5ec52fb9f9b437218dc7da0c05e 14-NYH-nyhrektornih-07-HHH.jpg
edabd5ec52fb9f9b437218dc7da0c05e 14-NYH-nyhrektornih-07-HHH.jpg

f909c574836b149b2c36f02e012f957b Ingeborg-Grindheim-Slinde.jpg
f909c574836b149b2c36f02e012f957b Ingeborg-Grindheim-Slinde.jpg

1baff4959f731723e9808d6be76f8101 snill-gutt-26.jpg
1baff4959f731723e9808d6be76f8101 snill-gutt-26.jpg
1baff4959f731723e9808d6be76f8101 snill-gutt-26.jpg
1baff4959f731723e9808d6be76f8101 Marcus-

480e1c8ecfc2fda74ffc34709693bf81 17-Mag-blindernsomventested-karpediem-02-STL.jpg
480e1c8ecfc2fda74ffc34709693bf81 17-OyeblikkKarpediem-03-STL.jpg

277fb7e34e25fb38c747eef1faa04dbd OL-5124-utsnitt-ren-bakgrunn-farge.jpg
277fb7e34e25fb38c747eef1faa04dbd OL-5124-utsnitt-ren-bakgrunn-farge-2.jpg

7485682c2b01f626c51a3522656494fb 11-NYH-roeisaksen-09-HD.jpg
7485682c2b01f626c51a3522656494fb 12-NYH-roeisaksen-09-HD.jpg

91ec514bcdb0f172b95a52f45399b0b9 09-KUL-stengtbarkhio-03-HD.jpg
91ec514bcdb0f172b95a52f45399b0b9 MG-8572.jpg
91ec514bcdb0f172b95a52f45399b0b9 12-KUL-stengtbarkhio-03-HD.jpg

c59fedd458d07c0affca35a1dbdfef5d 09-KUL-stengtbarkhio-02-HD.jpg
c59fedd458d07c0affca35a1dbdfef5d MG-8579.jpg
c59fedd458d07c0affca35a1dbdfef5d 12-KUL-stengtbarkhio-02-HD.jpg

6b9ad9399dfa81547490ebe83d81011f megsospol.jpg
6b9ad9399dfa81547490ebe83d81011f megsospol.jpg

0f8a24700ecb74391c4aedf235c256ec IMG-1850.jpg
0f8a24700ecb74391c4aedf235c256ec 11-DEB-forskerintervju-LinnCecilieAnkerSorensen11.jpg

49216

ac6280f42edd0fc749692b444e47dc57 25-nyh-dekan-ontologi-Web.jpg
ac6280f42edd0fc749692b444e47dc57 25-nyh-dekan-ontologi-Web.jpg

f8b82c5b76a63b679fba855fc87e8322 10-stu-skatte-money-web-MP.jpg
f8b82c5b76a63b679fba855fc87e8322 10-stu-skatte-money-web-MP.jpg

9daa14762ebedebf38a8a11cc84cb496 17-nyh-bolgsaken-arkiv-Web-BO.jpg
9daa14762ebedebf38a8a11cc84cb496 17-nyh-bolgsaken-arkiv-Web-BO.jpg

72639324de833ba9bcde1042c7141163 26-nyh-Statsbudsjett-Rektor-ArkivAO.jpg
72639324de833ba9bcde1042c7141163 27-minstudietid-Rektor-ArkivAO.jpg

740e56e6991da6ed5008c1fd28b56466 26-debatt-VillaEika-Web-SL.jpg
740e56e6991da6ed5008c1fd28b56466 26-debatt-VillaEika-Web-SL.jpg

55af91249f51e71e8253d19d05770dd7 25-KUL-minstudietid-05-NWL.jpg
55af91249f51e71e8253d19d05770dd7 25-KUL-minstudietid-05-NWL.jpg

14f07a8e799110a404deff172ca6f022 25-KUL-minstudietid-04-NWL.jpg
14f07a8e799110a404deff172ca6f022 25-KUL-minstudietid-04-NWL.jpg

2fcc17430c5c3b0fe4f1f62f06e01e23 29kultmalemiaBWWebHBR.jpg
2fcc17430c5c3b0fe4f1f

f3956c98b624f30fa38d0d58d75937fe 14-Vi-spor-02-STL.jpg
f3956c98b624f30fa38d0d58d75937fe 14-Vi-spor-02-STL.jpg

08b7baf759b8cc934714ab5944801254 12-nyh-gris19-SR.jpg
08b7baf759b8cc934714ab5944801254 12-nyh-gris19-SR.jpg

926095ba09ac3b5da3dbf343971e1dc1 25-nyh-uionetts2-web-TSH.jpg
926095ba09ac3b5da3dbf343971e1dc1 25-nyh-uionetts2-web-TSH.jpg

4ebcb7d6b62a968537686ce16af8e4b2 26-KUN-Kunst-pa-HiOA-VIB01.jpg
4ebcb7d6b62a968537686ce16af8e4b2 26-KUN-Kunst-pa-HiOA-VIB01.jpg

b5398a1bb54942b24126b9cf904fa2bc nett-studentparlament-tine-tang-engvik01-AN.jpg
b5398a1bb54942b24126b9cf904fa2bc nett-studentparlament-tine-taang-engvik01-AN.jpg

97130b621f74408e9d694c5d3ab3b9c7 26-nyh-sverigehaelvete3.jpg
97130b621f74408e9d694c5d3ab3b9c7 26-nyh-sverigehaelvete3.jpg

9d5edb70c8280a0fd5e130ecd555ecb9 15-magapekv-06-UK.jpg
9d5edb70c8280a0fd5e130ecd555ecb9 15-magapekv-05-UK.jpg

25960aa97f5e24109c97004ae986e573 16-anm-psyk-7-BO.jpg
25960aa97f5e24109c97004ae986e573 16-anm-psyk-7-BO.jpg

83e2dde379d9a803e2e

65c6228de00cc7098968e53a2d9532d6 VildeBorse-JohnDee003.jpg
65c6228de00cc7098968e53a2d9532d6 05-ANM-johndee-VIB-4.jpg

6053a3f37c2daaede753d1a1f9a9c489 IMG-0857.jpg
6053a3f37c2daaede753d1a1f9a9c489 IMG-0857.jpg
6053a3f37c2daaede753d1a1f9a9c489 IMG-0857.jpg
6053a3f37c2daaede753d1a1f9a9c489 IMG-0857.jpg

baae917a7467caf0df904ef5d519edcf IMG-0882.jpg
baae917a7467caf0df904ef5d519edcf IMG-0882.jpg
baae917a7467caf0df904ef5d519edcf IMG-0882.jpg
baae917a7467caf0df904ef5d519edcf IMG-0882.jpg
baae917a7467caf0df904ef5d519edcf IMG-0882.jpg
baae917a7467caf0df904ef5d519edcf IMG-0882.jpg

968a496e4e0f1bdd292c2255e2a39138 02-URIX-studentgjeld-DK-13.jpg
968a496e4e0f1bdd292c2255e2a39138 02-URIX-studentgjeld-DK-13.jpg

77342e27800185aaec2a41f0536ab6aa IMG-3093.jpg
77342e27800185aaec2a41f0536ab6aa IMG-3093.jpg

1a1a9aa50c1a9467656bfc89bb9effe8 29-NYH-nyhbrannutry-06-EW.jpg
1a1a9aa50c1a9467656bfc89bb9effe8 29-NYH-nyhbrannutry-05-EW.jpg

6c178771685a89740c3322f258bff3f9 28-NYH-nyhstudentle-01-HG.jpg
6c178771

8f34dcdc0bc3858270b61968ddfc9c6a Heftet.jpg
8f34dcdc0bc3858270b61968ddfc9c6a Heftet.jpg
8f34dcdc0bc3858270b61968ddfc9c6a Heftet.jpg

511cc65aa99fae7ae8517e34805dfaa6 22-MAG-magportrettc-04-IS.jpg
511cc65aa99fae7ae8517e34805dfaa6 22-MAG-magportrettc-04-IS.jpg

65fb3429bd77242ae3881416d1e9f7bd 23-NYH-nyhmiljopart-04-PP.jpg
65fb3429bd77242ae3881416d1e9f7bd 23-NYH-nyhmiljopart-04-PP.jpg

dee181904ea345744ccb9fe2ada62067 18-ottersen-01-HE.jpg
dee181904ea345744ccb9fe2ada62067 18-ottersen-01-HE_Vhx2cZ9.jpg

70ed917ab44145580035c1c2706bd2f9 Gjokeredet4.jpg
70ed917ab44145580035c1c2706bd2f9 Gjokeredet4.jpg

9bad6dbd989113480e5e00a8a73e9150 IMG-46582000.jpg
9bad6dbd989113480e5e00a8a73e9150 09-KUL-Islamsatsing-DK-02.jpg

99c12c9342103e68eab9a08ca226616b 06-NYH-HH-20.jpg
99c12c9342103e68eab9a08ca226616b 06-NYH-OSI-MarianneP-HH-11.jpg

a2e79422e2916149b84554d90e2b140a KUL-6-MUSIKK-DAGNY-2-NW.jpg
a2e79422e2916149b84554d90e2b140a 06-KUL-dagnyartist-NW-3.jpg

adb6b7107b3da28f42ae7c3576786abf IMG-1655.j

e997e782058c777d1787582c86a409e9 2e8c146e-f654-449a-bee4-c9edabda1a33.jpg
e997e782058c777d1787582c86a409e9 Ola-Magnussen-Rydje.jpg

2d2297a2222e413e7bd0f387caa39d19 unnamed-3.jpg
2d2297a2222e413e7bd0f387caa39d19 unnamed-3.jpg
2d2297a2222e413e7bd0f387caa39d19 unnamed-3.jpg

29a475e772bc43fe3617db113fb4c25d 04-MAG-magflyktning-07-MKS.jpg
29a475e772bc43fe3617db113fb4c25d 04-MAG-magflyktning-07-MKS.jpg

161d40f0566e838d48b649b466a8d841 04-MAG-magflyktning-06-MKS.jpg
161d40f0566e838d48b649b466a8d841 04-MAG-magflyktning-06-MKS.jpg

ee0631fcd35972a1d3966cbd07d479ee 04-MAG-magflyktning-05-MKS.jpg
ee0631fcd35972a1d3966cbd07d479ee 04-MAG-magflyktning-05-MKS.jpg

d8663e51bcf9990fdaeb15b6f0850f2b 04-MAG-magflyktning-04-MKS.jpg
d8663e51bcf9990fdaeb15b6f0850f2b 04-MAG-magflyktning-04-MKS.jpg

f9a9b968d78f1346bca500fb0af5fb14 04-MAG-magflyktning-03-MKS.jpg
f9a9b968d78f1346bca500fb0af5fb14 04-MAG-magflyktning-03-MKS.jpg

360491fcdb42de0d2318751d0164bb06 04-MAG-magflyktning-02-MKS.jpg
360491fcdb42de0d2

db3ce30fe81573905e35015b7d94bb11 10-KUL-kultbylarmwo-05-RP.jpg
db3ce30fe81573905e35015b7d94bb11 10-KUL-kultbylarmwo-05-RP.jpg

184d277d69b59e4f9b67521d85e09aef Fil-22.01.2016-20.19.48.jpg
184d277d69b59e4f9b67521d85e09aef Fil-22.01.2016-20.19.48.jpg
184d277d69b59e4f9b67521d85e09aef Fil-22.01.2016-20.19.48.jpg
184d277d69b59e4f9b67521d85e09aef Fil-22.01.2016-20.19.48.jpg
184d277d69b59e4f9b67521d85e09aef Fil-22.01.2016-20.19.48.jpg

c29601371b948a493e00d696bfbdcd14 22-KUL-kulturwester-02-SBV.jpg
c29601371b948a493e00d696bfbdcd14 22-KUL-kulturwester-02-SBV.jpg

2682ec20dbe0ce9f31a0b9143aa74c88 10-KUL-kultbylarmwo-01-RP.jpg
2682ec20dbe0ce9f31a0b9143aa74c88 10-KUL-kultbylarmwo-01-RP.jpg

0004a71e4b660e9e2dbc87f697ed4b6d 10-KUL-minstudietid-03-HB.jpg
0004a71e4b660e9e2dbc87f697ed4b6d 10-KUL-minstudietid-03-HB.jpg
0004a71e4b660e9e2dbc87f697ed4b6d 10-KUL-minstudietid-03-HB.jpg

2ed5789b92977475b76050c528da1356 aleksander.jpg
2ed5789b92977475b76050c528da1356 aleksander.jpg

8524b5ca0c473b51c6d5937b

217d4c762146e8ee8cf0b6fd45e08255 11-NYH-roeisaksen-06-HD.jpg
217d4c762146e8ee8cf0b6fd45e08255 12-NYH-roeisaksen-06-HD.jpg

20c7c3ed2008ef9a1b38d2b6151672fe 11-NYH-roeisaksen-05-HD.jpg
20c7c3ed2008ef9a1b38d2b6151672fe 12-NYH-roeisaksen-05-HD.jpg

30ddf3840ea2e05f72779ffc93d9406a 11-NYH-roeisaksen-04-HD.jpg
30ddf3840ea2e05f72779ffc93d9406a 12-NYH-roeisaksen-04-HD.jpg

14984f78ef20e0332780633b1f18358f 11-NYH-roeisaksen-03-HD.jpg
14984f78ef20e0332780633b1f18358f 12-NYH-roeisaksen-03-HD.jpg

6ec257b5e5cac9d9fdb9734d5cadb729 11-NYH-roeisaksen-02-HD.jpg
6ec257b5e5cac9d9fdb9734d5cadb729 12-NYH-roeisaksen-02-HD.jpg

e5343ec39a0d5ed8300e3e18218880a4 MG-8547.jpg
e5343ec39a0d5ed8300e3e18218880a4 12-KUL-studentpraktikant-10-HD.jpg

cdff0134c38db004cf03df9fd9910c1d MG-8543.jpg
cdff0134c38db004cf03df9fd9910c1d 12-KUL-studentpraktikant-09-HD.jpg

9c4ed772f4c7a3845d733d083ae85a1e MG-8475.jpg
9c4ed772f4c7a3845d733d083ae85a1e 12-KUL-studentpraktikant-08-HD.jpg

b88653ea89befbbc95e50d2040478560 MG-8470.jp

f2c9614329bd3dcc0a7936873bd0001a Arif-01.jpg
f2c9614329bd3dcc0a7936873bd0001a Arif-01-HB.jpg

eafce94b235862434801b79d5d25e83a OL-0937.jpg
eafce94b235862434801b79d5d25e83a OYEBLIKKET-HB.jpg

0e404328335194200f2687703476b3be NYH-Karakter-AN.jpg
0e404328335194200f2687703476b3be NYH-Karakter-AN.jpg
0e404328335194200f2687703476b3be NYH-Karakter-AN.jpg
0e404328335194200f2687703476b3be NYH-Karakter-AN.jpg

b24baf6ca5f7d3eb3e1fede291f3bff5 MG-8062-8.LSN.jpg
b24baf6ca5f7d3eb3e1fede291f3bff5 MG-8062-8.LSN.jpg

fee6a6c08637396d93ea99a2ebf3adc9 Rektor-0631.jpg
fee6a6c08637396d93ea99a2ebf3adc9 Rektor-0631.jpg

ca338d871a7928b2affa839ae6103393 WIE-tassen-7.jpg
ca338d871a7928b2affa839ae6103393 WIE-tassen-7.jpg
ca338d871a7928b2affa839ae6103393 WIE-tassen-7.jpg
ca338d871a7928b2affa839ae6103393 WIE-tassen-7.jpg

95d2ba0aa4bb82b80a9c30db7581dcf2 NYH-Lillo4-EDD.jpg
95d2ba0aa4bb82b80a9c30db7581dcf2 NYH-Lillo4-EDD.jpg

e16a5eac9d6f861ce9e23ea68019208e 18425994283-08666b3e66-o.jpg
e16a5eac9d6f861ce9e23ea680

79d6d6004da396d57ea7a378e8f7dccc Universitas-august.jpg
79d6d6004da396d57ea7a378e8f7dccc Universitas-august.jpg

2343c3dae1a61788b5ad2a527c997dc9 Simen-AA-Universitas-fransk-utgave-m-tekst-og-farge-5.jpg
2343c3dae1a61788b5ad2a527c997dc9 Simen-AA-Universitas-fransk-utgave-m-tekst-og-farge-5.jpg

393695f99914f80999b6987e8962e00a 21-KUL-minstudietid-1-RP.jpg
393695f99914f80999b6987e8962e00a 21-KUL-minstudietid-1-RP.jpg
393695f99914f80999b6987e8962e00a 21-KUL-minstudietid-1-RP.jpg

c242c1d60a7af7554221d6f9bc73e985 22-NYH-Intervjuet-AN.jpg
c242c1d60a7af7554221d6f9bc73e985 22-NYH-Intervjuet-AN.jpg

df065e77d69f76f5fc0c99cdffd38c17 22-NYH-Intervjuet-2-AN.jpg
df065e77d69f76f5fc0c99cdffd38c17 22-NYH-Intervjuet-2-AN.jpg

0b3d647706a5302ca9ad3b8114cd0aa2 11-KUL-dommedagpaui-01-AM.jpg
0b3d647706a5302ca9ad3b8114cd0aa2 11-KUL-dommedagpaui-01-AM.jpg
0b3d647706a5302ca9ad3b8114cd0aa2 11-KUL-dommedagpaui-01-AM.jpg

c6b940f4ee2bbb667c4c50cba62ad35c deeyah-khan.jpg
c6b940f4ee2bbb667c4c50cba62ad35c deeyah-

9f8372c8e4e8d198f938c79d95fbc337 24-KUL-vinkurs-02-DK.jpg
9f8372c8e4e8d198f938c79d95fbc337 24-KUL-vinkurs-02-DK.jpg

2f1c8abcd1fd0fbadac0f6cf5edbb1d3 30-nyh-BI-02-HDH.jpg
2f1c8abcd1fd0fbadac0f6cf5edbb1d3 30-nyh-BI-02-HDH.jpg

95f113d93a00ba108ba755c2b8a362a7 24-KUL-Sikh-HMLS-6.jpg
95f113d93a00ba108ba755c2b8a362a7 24-KUL-Sikh-HMLS-6.jpg

046b37d369ce7b005c33b247b85c1235 24-KUL-Sikh-HMLS-4.jpg
046b37d369ce7b005c33b247b85c1235 24-KUL-Sikh-HMLS-4.jpg

ecd9c920a26e06c4e88d487325e34928 NYH-ottersen.jpg
ecd9c920a26e06c4e88d487325e34928 NYH-ottersen.jpg
ecd9c920a26e06c4e88d487325e34928 NYH-ottersen.jpg
ecd9c920a26e06c4e88d487325e34928 NYH-ottersen.jpg
ecd9c920a26e06c4e88d487325e34928 nyh-ottersen-29344394274-o.jpg

cb6224ad4305034cc20e0fcbe6b64166 0ibsen-ungarsk.jpg
cb6224ad4305034cc20e0fcbe6b64166 0ibsen-ungarsk.jpg

d89dc78ec2af5ae9d05a1c4fc1fc5b08 24-NYH-lesesal-BKB-7.jpg
d89dc78ec2af5ae9d05a1c4fc1fc5b08 24-NYH-lesesal-BKB-7.jpg

8a3a95016a30c971e6cf12c231e46676 24-legeflytting-MD.jpg
8a3a9

96418ca4ca77da49f033efc802e3fe98 101016-008.jpg
96418ca4ca77da49f033efc802e3fe98 26-DEB-Forskerintervju-VIB06.jpg

9949a202d9c62a163a6cc5bb03e01362 101016-007.jpg
9949a202d9c62a163a6cc5bb03e01362 26-DEB-Forskerintervju-VIB05.jpg

339a8a2e6806f8fa105331ac79f2c4ad 101016-006.jpg
339a8a2e6806f8fa105331ac79f2c4ad 26-DEB-Forskerintervju-VIB04.jpg

431dc5561d17277475989c148168b695 101016-005.jpg
431dc5561d17277475989c148168b695 26-DEB-Forskerintervju-VIB03.jpg

be203b419b84e683d97e255814e0d768 101016-003.jpg
be203b419b84e683d97e255814e0d768 26-DEB-Forskerintervju-VIB02.jpg

750258ffbcc825be63589fa515e40f0c 101016-002.jpg
750258ffbcc825be63589fa515e40f0c 26-DEB-Forskerintervju-VIB01.jpg

d2e835def55d1cc9affa92db6b39c4f9 101016-047.jpg
d2e835def55d1cc9affa92db6b39c4f9 26-KU-gronnsak-VIB28.jpg

826100c0f27c71b83ffc1c114742c67e 101016-046.jpg
826100c0f27c71b83ffc1c114742c67e 26-KU-gronnsak-VIB27.jpg

ce688fc86dbfa246b93d65cd531b16b9 101016-045.jpg
ce688fc86dbfa246b93d65cd531b16b9 26-KU-gronnsak-

a9d50ee9c9eebbb21abaa0af6d8c2f17 25-amn-spisesteder-XP15.jpg
a9d50ee9c9eebbb21abaa0af6d8c2f17 25-amn-spisesteder-XP15.jpg

36498d1bba12911d180cd3d1eda24db6 vgqdqmf40.jpg
36498d1bba12911d180cd3d1eda24db6 vgqdqmf40.jpg

895414bac84aa0fc100ca4e9d90bfa66 26-KUN-Kunst-pa-HiOA-VIB16.jpg
895414bac84aa0fc100ca4e9d90bfa66 26-KUN-Kunst-pa-HiOA-VIB16.jpg

9129b9d309e390cf3075f16126726c15 2000.jpg
9129b9d309e390cf3075f16126726c15 BC193662000.jpg

337d341fa858bcc26e00eab216373e1d last-ned-2.jpg
337d341fa858bcc26e00eab216373e1d last-ned-2-2.jpg

4bc3d05da4d28f69dc40f102badabd4c jenny-hvla.jpg
4bc3d05da4d28f69dc40f102badabd4c jenny-hvla.jpg
4bc3d05da4d28f69dc40f102badabd4c jenny-hvla.jpg

7058e9bf16daa29757303322ab5bd57a HiOA-info.jpg
7058e9bf16daa29757303322ab5bd57a hioa.jpg

2d928861b8030f10de4c0ac68adbab39 27-anm-lego-XP-4.jpg
2d928861b8030f10de4c0ac68adbab39 27-anm-lego-XP-4.jpg
2d928861b8030f10de4c0ac68adbab39 27-anm-lego-XP-4.jpg

6d7ef08a72110db73a8773b6f52e194a nattverd-gamle-tassere.jpg
6d7e

54b0633d0c9974d21da38314df241638 28-KUL-NamrahSaleem-RP01.jpg
54b0633d0c9974d21da38314df241638 MG-6626.jpg

c4ab6bbf63836d28652d68019c7f4cd3 vet-601.jpg
c4ab6bbf63836d28652d68019c7f4cd3 vet-HB-601.jpg

5ab879e0cee4135c446571c5b5f19cbe vet-593.jpg
5ab879e0cee4135c446571c5b5f19cbe vet-HB-593.jpg

554c4c85fe90e50de636f3b8d1aeffff vet-584.jpg
554c4c85fe90e50de636f3b8d1aeffff vet-HB-584.jpg

2602e1f696f47e24a31f481cc334da63 vet-576.jpg
2602e1f696f47e24a31f481cc334da63 vet-HB-576.jpg

f8e5bed259142c67b11a6c9b44a5c56f vet-529.jpg
f8e5bed259142c67b11a6c9b44a5c56f vet-HB-529.jpg

1be331279a6055dbd52353124fe6ce71 west-avis-372.jpg
1be331279a6055dbd52353124fe6ce71 west-avis-HB-372.jpg

dfe2b8511d8bd4f841bc3ed0237e150c west-avis-227.jpg
dfe2b8511d8bd4f841bc3ed0237e150c west-avis-HB-227.jpg

71b36b8923e54abb9df4000e6f17786b vet-482.jpg
71b36b8923e54abb9df4000e6f17786b vet-HB-482.jpg

70e1408a69653921286db6a636434d30 vet-214.jpg
70e1408a69653921286db6a636434d30 vet-HB-214.jpg

21e263c00a08dca1d804e5

19cd5252464ab4d3456d3cade3728f4e Trump-Rally-11.jpg
19cd5252464ab4d3456d3cade3728f4e Trump-Rally-11.jpg

1acf4848096e187dcbcba6e8f39ed191 Trump-Rally-15-MATTIS.jpg
1acf4848096e187dcbcba6e8f39ed191 Trump-Rally-15-MATTIS.jpg

d31448fff1d90b90a9b6257bbb09c41e Trump-Rally-13-1.jpg
d31448fff1d90b90a9b6257bbb09c41e Trump-Rally-13-1.jpg

efca3533e97b5fb661cad63f809caa5e tidslinje3.jpg
efca3533e97b5fb661cad63f809caa5e tidslinje3.jpg
efca3533e97b5fb661cad63f809caa5e tidslinje3.jpg

fe67c8c15260a7d573f86bb52ea36f1f 29-NYH-datelesesal-AN-3.jpg
fe67c8c15260a7d573f86bb52ea36f1f 29-NYH-datelesesal-AN-3.jpg

cd9b305b5b6607d7919472e73af62f19 29-NYH-datelesesal-AN-2.jpg
cd9b305b5b6607d7919472e73af62f19 29-NYH-datelesesal-AN-2.jpg

d441f84a1ccb8cb01041e3c86d488031 29-NYH-datelesesal-AN-1.jpg
d441f84a1ccb8cb01041e3c86d488031 29-NYH-datelesesal-AN-1.jpg

f0ce59d4744110f1d2c6a00993e08aa8 Marianne-Andenaes-foto-Skjalg-Bohmer-Vold-trykk.jpg
f0ce59d4744110f1d2c6a00993e08aa8 Marianne-Andenaes-foto-Skjalg-Bohme

5afd13584a75c621e4ed96052416c7de Kultur-menneskerett-HB-95.jpg
5afd13584a75c621e4ed96052416c7de 32-KUL-menneskerett-HB-03.jpg

70e7bba251592add9065872cd5241d62 Kultur-menneskerett-HB-78.jpg
70e7bba251592add9065872cd5241d62 32-KUL-menneskerett-HB-01.jpg

c5f455ea158b2a889aba36c7b241074d Kultur-menneskerett-HB-176.jpg
c5f455ea158b2a889aba36c7b241074d 32-KUL-menneskerett-HB-06.jpg

74b1f04c29a65fe805f9a3ac331dcd52 Kultur-menneskerett-HB-127.jpg
74b1f04c29a65fe805f9a3ac331dcd52 32-KUL-menneskerett-HB-04.jpg

f06ec8a5ba6af75cf1122899706cefa8 fempagata-SAIH-XP-3.jpg
f06ec8a5ba6af75cf1122899706cefa8 fempagata-SAIH-XP-3.jpg

37cd807795be53bda72b1e862f61ade0 fempagata-SAIH-XP-2.jpg
37cd807795be53bda72b1e862f61ade0 fempagata-SAIH-XP-2.jpg

a773270696040b1ce2e01ffddbd27076 fempagata-SAIH-XP-1.jpg
a773270696040b1ce2e01ffddbd27076 fempagata-SAIH-XP-1.jpg

de68d5ffac64b5dc7a7e94f25e94c3db IngaRiseth-SAIH-XP-2.jpg
de68d5ffac64b5dc7a7e94f25e94c3db IngaRiseth-SAIH-XP-2.jpg

ff576099216a92f8393225ccbeba

07c33adc857ea333581214fb7778a1a0 IMG-45932000.jpg
07c33adc857ea333581214fb7778a1a0 09-KUL-min-studietid-Brundtland-DK-06.jpg

d95614e8074f580d015d7277d9e58afb IMG-45902000.jpg
d95614e8074f580d015d7277d9e58afb 09-KUL-min-studietid-Brundtland-DK-05.jpg

efa3a8d1a5ac879fd5d8a557176e0449 IMG-45482000.jpg
efa3a8d1a5ac879fd5d8a557176e0449 09-KUL-min-studietid-Brundtland-DK-02.jpg

894e50f31a97d9acb51b2f12695159fa IMG-46862000.jpg
894e50f31a97d9acb51b2f12695159fa 09-KUL-Islamsatsing-DK-04.jpg

ce6b18d6accc8b6542e5771e579da1cd 10-MAG-Katolikk-XP-5.jpg
ce6b18d6accc8b6542e5771e579da1cd 10-MAG-Katolikk-XP-5.jpg

f6b1e9475d9d01d0baeb2cc593743798 32-NYH-dns-AN-4.jpg
f6b1e9475d9d01d0baeb2cc593743798 32-NYH-dns-AN-4.jpg

de3b74f120ec5cc62e11630ff1c682b5 Ole-Petter-Ottersen-2.jpg
de3b74f120ec5cc62e11630ff1c682b5 10-MAG-OlePetterOttersen-AS-02.jpg

a1707b70cb507d9efd95429ab74f381f Ole-Petter-Ottersen-1.jpg
a1707b70cb507d9efd95429ab74f381f 10-MAG-OlePetterOttersen-AS-01.jpg

915f87e77aa504b9bda6b9f57f70

In [52]:
import imagehash
img = ImageFile.objects.first()
imh = imagehash.hex_to_hash(img._imagehash)
imh.hash

array([[ True,  True, False, False, False,  True, False, False],
       [False,  True,  True, False,  True, False,  True, False],
       [ True,  True, False, False,  True, False, False,  True],
       [False,  True, False, False,  True,  True, False, False],
       [False,  True, False, False, False,  True, False,  True],
       [False, False, False, False,  True,  True, False, False],
       [ True, False, False,  True,  True, False, False,  True],
       [ True,  True,  True,  True, False, False,  True,  True]], dtype=bool)

In [144]:
# Do search
def closest_image(image_file, haystack):
    
    imh = imagehash.hex_to_hash(image_file._imagehash)
    def keyfunc(item): 
        value = imh - item[1]
        # print(value, end=' ')
        return value
        
    best = sorted(haystack, key=keyfunc)[:10]
    return [(keyfunc(item), ImageFile.objects.get(pk=item[0])) for item in best]

img = ProfileImage.objects.order_by('?').first()
matches = closest_image(img, data)
print('\n'.join(f'{val:<2} {img.pk:<5} {img}' for val, img in matches))
display(list(zip(*matches))[1])

0  33534 Joakim-P-Berg.jpg
15 35551 Morten-Schwencke.jpg
15 35966 Ida-Wammer.jpg
15 4108  09-tore-oksholen.jpg
16 41264 Eirik-Bryhn-Jacobsen.jpg
17 41112 Vebjorn-Wold.jpg
17 35978 13170013-10156925034270442-502688036-O.jpg
17 1207  09-BADSTUE.jpg
17 32396 Studenthagen-08.jpg
18 11142 17-visporPortrett.jpg


In [150]:
ImageFile.objects.pending().count()
ImageFile.objects.count()

27919

## Merge duplicates

In [47]:
def assert_equal(objects, attrs=()):
    master, *others = objects
    for other in others:
        for attr in attrs:
            master_attr = getattr(master, attr)
            other_attr = getattr(other, attr)
            if master_attr != other_attr:
                raise ValueError(f'{master}.{attr}={master_attr}\n{other}.{attr}={other_attr}')
    
def merge_imghash_dupes():
    images = ImageFile.objects.exclude(_imagehash='')
    dupes = duplicates(images, '_imagehash', ['-full_width','-_size','created'])
    c = len(dupes)
    for qs in dupes:
        items = list(qs)
        #try: 
        #    assert_equal(items, attrs=['original', '_imagehash'])
        #except ValueError as e:
        #    print(e)
        #    continue
        merge_instances(*items)
        print(c)
        c -= 1
        for item in items:
            print(f'{item._imagehash} {item.size} {item.md5} {item.full_width} {item}')

merge_imghash_dupes()
            
            

121
692131313327cb8d 118062 70960096b0203cd38a8a6b1d3395bb1e 1500 04-KUL-bareegil-SN-4.jpg
692131313327cb8d 117250 a06f087ce4619867035bc0201a97db68 1500 ARA6564.jpg
120
a48472b47262220c 193433 5d92ce158af8ac0f2faace772a7676ae 1500 04-KUL-bareegil-SN-1.jpg
a48472b47262220c 192620 1436eabbf9464e3e025074075d302500 1500 ARA6525.jpg
119
a9d953c929626373 428540 e01122d4cff3d397aa39bcc4a8b3ebef 1500 05-DEB-forskerintervju-EBJ-9.jpg
a9d953c929626373 427284 16c0dc7afa4eb15c167725123a58e5ef 1500 DSCF0298-copy.jpg
118
2ccde6a3b4c7b631 147374 7996719498ef20975cadc66bd8ba6987 1500 05-DEB-forskerintervju-EBJ-7.jpg
2ccde6a3b4c7b631 146117 0bd4c6f0a332486faaf9449a848abbad 1500 DSCF0301.jpg
117
e8d9d3c9297263fb 416410 4a4e646c7cade894366c6a0e27fae318 1500 05-DEB-forskerintervju-EBJ-6.jpg
e8d9d3c9297263fb 415146 cc0fe9acebf77928a198ab8fcba05290 1500 DSCF0298.jpg
116
5a2cece373365afb 232635 e8b1ade817e8d606cc7528c85e36cb62 1500 05-DEB-forskerintervju-EBJ-2.jpg
5a2cece373365afb 231371 3c059447851ae9d6b57a

73
4f0d867e1b9b0607 171286 3567d6c5a76f55abff99fc8b180d4f7b 1500 JH77029.jpg
4f0d867e1b9b0607 166140 3b584d44b683d91e6c503a4f6f9d848f 1500 07-KUL-skitur-GO-18.jpg
72
63b243238bedec16 299718 b5256c1c734e0d2f0b41e5c98e4dd204 1500 07-KUL-skitur-GO-16.jpg
63b243238bedec16 298541 fad0a056146efcd95f2430f66a1f801d 1500 JH76951.jpg
71
242c3aaba3d52b13 232743 94a189a47e3c4ab62418cec2a07d2625 1500 07-KUL-skitur-GO-12.jpg
242c3aaba3d52b13 231566 6b9f5ef71fb969fd28f6e90084d27172 1500 JH76914.jpg
70
360f172f064f2533 128094 70331f0d9b62dfec93807ef3364d506b 1500 JH77180.jpg
360f172f064f2533 122947 751cfb813346c489979a2d71602f5bb4 1500 07-KUL-skitur-GO-20.jpg
69
8d4c5d5c6614a4a6 278335 faa74c7c6339479a15b8af2d0f8bc0be 1500 JH76841.jpg
8d4c5d5c6614a4a6 273188 12da16625495dbd4ff1e7e4552cf8bf4 1500 07-KUL-skitur-GO-06.jpg
68
c71931b3f1ada95e 176025 d3c7f2446afa73ec3052c4502fa1853d 1500 07-NYH-toksikologi-AN-06.jpg
c71931b3f1ada95e 174731 341d7d203bba923a9793c2ff2b106b7a 1500 07-NYH-Toksikologi-AN-6.jpg
6

27
43d9d9b9a56d3a26 45806 8a01737c8cb2b63c9185808c79a0a6cf 1024 Illustrasjon-kronikk-Karlsoen-foto-kjetil-blom.jpg
43d9d9b9a56d3a26 44982 69eed8d4e3158b473227169571e908ff 1024 ole-p-ottersen-02.jpg
26
afa6a56d4dc66265 735229 0dcb6885642c65e5295aa486d3651e1d 2048 netturix09.jpg
afa6a56d4dc66265 175039 d97b461c471e2dac651c9aa33a1ac90f 1500 netturix09.jpg
25
4f6727676fcfc795 51482 9f0471ea69392afa62b5bb125ddbc4d3 1024 30-MAG-repdialektfo-11-HE.jpg
4f6727676fcfc795 51180 c5939fdac959667f376a84cc02028ef6 1024 30-MAG-repdialektfo-06-HE.jpg
24
f4dad2614a634969 85426 082d05943c3059a4b2a90e10d52857d4 900 21-arbeidsmengde-paa-vethoeyskolen-01-RRJ.jpg
f4dad2614a634969 85305 5277df35be31bcfe2fc0e829c0681cfd 900 32-veterinAErhoeyskolen-Kristoffer-Forbergarkivfoto-01-RRJ.jpg
23
aaa262b270691a40 49313 34e2f9fecfd5d9412b11de5167339a21 838 31-adnotam-Rugaas.jpg
aaa262b270691a40 74260 57fdf961c97e3dd0bc01c30b12b7a1f5 503 14-adnotam-Rugaas-Web.jpg
22
040b3a32191e86c6 1394852 b26afc254919dd8c8b62b3bb34125

In [1]:
#finne personer som kan merges

from fuzzywuzzy import process
from django.db.models import Count
contributors = Contributor.objects.annotate(
        num_bylines=Count('byline')
    ).order_by('-num_bylines')

#one_hit_wonders = contributors.filter(num_bylines=1)
one_hit_wonders = []
with open('one_hit_wonders.txt') as fp:
    for line in fp.readlines():
        id = int(line.split()[0])
        one_hit_wonders.append(id)
        
    

In [3]:
nn = Contributor.objects.get(display_name="Nomen Nescio")
for bl in nn.byline_set.all():
    bylines = bl.story.byline_set.all()
    for byline in bylines:
        print(byline) 


@bl: Av: Nomen Nescio, KS)
@bl: Av: Nomen Nescio, LR)
@bl: Av: Nomen Nescio, HR)
@bl: Av: Nomen Nescio, NH)
@bl: Av: Nomen Nescio, PVRH)
@bl: Av: Nomen Nescio, IKL)
@bl: Av: Nomen Nescio, HR)
@bl: Av: Nomen Nescio, BHK)
@bl: Av: Nomen Nescio, Kjetil Strømme)
@bl: Av: Nomen Nescio, IKL)
@bl: Av: Nomen Nescio, BHK)
@bl: Av: Nomen Nescio, HR)
@bl: Av: Nomen Nescio, IKL)
@bl: Av: Nomen Nescio, HR)
@bl: Av: Nomen Nescio, BHK)
@bl: Tekst: Kjetil Strømme)
@bl: Foto: Berit Andreassen)
@bl: Av: Nomen Nescio, @maging: – Kan jeg skrive på russisk? undrer Nils Lid Hjort, professor i matematikk. Student Lars Sydnes sier ingen ting, han er allerede godt i gang med sin ode til kvinnen)
@bl: Av: Nomen Nescio, PVRH)
@bl: Av: Nomen Nescio, TL)
@bl: Av: Nomen Nescio, BHK)
@bl: Av: Nomen Nescio, HR)
@bl: Av: Nomen Nescio, Manus og regi: Bjarne Henning Kvåle. Kamera: Khai Nguyen)
@bl: Av: Nomen Nescio, PVRH)
@bl: Av: Nomen Nescio, HR)
@bl: Av: Nomen Nescio, PHP)
@bl: Av: Nomen Nescio, BHK)
@bl: Av: Nomen N

@bl: Av: Nomen Nescio, SEH)
@bl: Av: Nomen Nescio, AJ)
@bl: Av: Nomen Nescio, SEH)
@bl: Av: Nomen Nescio, AJ)
@bl: Av: Nomen Nescio, DSH)
@bl: Av: Nomen Nescio, KRN)
@bl: Av: Nomen Nescio, DSH)
@bl: Av: Nomen Nescio, KRN)
@bl: Av: Nomen Nescio, SB)
@bl: Av: Nomen Nescio, AJ)
@bl: Av: Nomen Nescio, SB)
@bl: Av: Nomen Nescio, AJ)
@bl: Av: Nomen Nescio, by: Naboens tre naboer (ved en av dem))
@bl: Av: Nomen Nescio, by: Leder i Studentparlamentet i Oslo Ingrid Stranger–Thorsen)
@bl: Av: Nomen Nescio, ES)
@bl: Av: Nomen Nescio, DSH)
@bl: Av: Nomen Nescio, KRN)
@bl: Av: Nomen Nescio, DSH)
@bl: Av: Nomen Nescio, KRN)
@bl: Av: Nomen Nescio, by: Lilly Susanne Stensland ( Foto: Balkaran Singh)
@bl: Av: Nomen Nescio, AJ)
@bl: Av: Nomen Nescio, IMJ)
@bl: Av: Nomen Nescio, IK)
@bl: Av: Nomen Nescio, IMJ)
@bl: Av: Nomen Nescio, IK)
@bl: Av: Nomen Nescio, by: Naboens tre naboer (ved en av dem))
@bl: Av: Nomen Nescio, by: Professor Emeritus Esther Bøk skriver for Universitas om vitenskap som forbigås 

@bl: Av: Nomen Nescio, CE)
@bl: Av: Nomen Nescio, by: Finn H. Eriksen (En av dem. Hemmelig))
@bl: Av: Nomen Nescio, text and styling: Camilla Svendsen Skriung Foto: Åshild Bekke Eidem Modeller: Dina Thomassen)
@bl: Av: Lance Morgas)
@bl: Av: Nomen Nescio, AGI)
@bl: Av: Nomen Nescio, HH)
@bl: Av: Nomen Nescio, AGI)
@bl: Av: Nomen Nescio, HH)
@bl: Av: Nomen Nescio, AJL)
@bl: Av: Nomen Nescio, ÅBE)
@bl: Av: Nomen Nescio, AJL)
@bl: Av: Nomen Nescio, ÅBE)
@bl: Av: Nomen Nescio, AJL)
@bl: Av: Nomen Nescio, ÅBE)
@bl: Av: Nomen Nescio, AJL)
@bl: Av: Nomen Nescio, ÅBE)
@bl: Av: Nomen Nescio, AGI)
@bl: Av: Nomen Nescio, HH)
@bl: Av: Nomen Nescio, MNP)
@bl: Av: Nomen Nescio, CE)
@bl: Av: Nomen Nescio, AGI)
@bl: Av: Nomen Nescio, HH)
@bl: Av: Nomen Nescio, MNP)
@bl: Av: Nomen Nescio, CE)
@bl: Av: Nomen Nescio, AGI)
@bl: Av: Nomen Nescio, HH)
@bl: Av: Nomen Nescio, MNP)
@bl: Av: Nomen Nescio, CE)
@bl: Av: Nomen Nescio, AGI)
@bl: Av: Nomen Nescio, HH)
@bl: Av: Nomen Nescio, MNP)
@bl: Av: Nomen Nesci

@bl: Av: Lektorprogrammets Programutvalg, ved leiar Ane Krogsæter Aarre)
@bl: Av: Nomen Nescio, by: Henrik Asheim er leder i Unge Høyre og gjesteskribent i Universitas)
@bl: Av: Sine Astad, leder av studentforeningen ved Handelshøyskolen BI i Oslo)
@bl: Av: Nomen Nescio, SBIO)
@bl: Tekst: Halvor Kongshavn, overbibliotekar ved Humsam-biblioteket)
@bl: Av: Nomen Nescio, by: Bibliotek for humaniora og samfunnsvitenskap: Halvor Kongshavn, overbibliotekar ved Humsam-biblioteket)
@bl: Av: Nomen Nescio, UB)
@bl: Tekst: Halvor Kongshavn, overbibliotekar ved Humsam-biblioteket)
@bl: Av: Nomen Nescio, by: Bibliotek for humaniora og samfunnsvitenskap: Halvor Kongshavn, overbibliotekar ved Humsam-biblioteket)
@bl: Av: Nomen Nescio, UB)
@bl: Av: Nomen Nescio, by: Iver Aastebøl er leder i Rød Ungdom og gjesteskribent i Universitas)
@bl: Av: Nomen Nescio, by: Velferdstingets Arbeidsutvalg ved leder Mari Berdal Djupvik)
@bl: Av: Nomen Nescio, by: Aksel Braanen Sterri er leder i Studentparlamentet ved 

In [None]:
for id in one_hit_wonders[:10]:
    person = Contributor.objects.get(pk=id)
    story = person.byline_set.first().story
    #print(story)
    print(story.id)
    print(person)
    print(json.loads(story.legacy_html_source)[0]['fields']['byline'])
    print(story.legacy_prodsys_source)

In [None]:
import json
list(story.bylines.all())

In [None]:
everyone = Contributor.objects.values_list('display_name', flat=True)
for contributor in contributors[:100]:
    best = process.extract(contributor.display_name, everyone, limit=2)
    print(best)
    

In [74]:
imgs= ImageFile.objects.filter(source_file__icontains="unspe")
display(list(imgs))


In [23]:
url = imgs[0].source_file.url

In [24]:
import requests

In [None]:
import requests
import concurrent.futures

URLS = [im.source_file.url for im in ImageFile.objects.all()]

URLS = [im.source_file.url for im in ImageFile.objects.all()]
def get_http_status(url, timeout=10):
    return requests.head(url, timeout = 5).status_code

# We can use a with statement to ensure threads are cleaned up promptly
broken_urls = []
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    # Start the load operations and mark each future with its URL
    future_to_url = {executor.submit(get_http_status, url): url for url in URLS}
    for future in concurrent.futures.as_completed(future_to_url):
        url = future_to_url[future]
        try:
            status_code = future.result()
            if status_code == 404:
                broken_urls.append(url)
        except Exception as exc:
            print('%r generated an exception: %s' % (url, exc))


In [4]:
from pathlib import Path
import imagehash
import PIL
import requests

In [5]:
broken = ImageFile.objects.filter(cropping_method=0)

In [6]:
def path_to_imghash(path, size = 11):
    img = PIL.Image.open(str(path)).convert('L').resize((size, size))
    return imagehash.dhash(img)

In [7]:
files = []
replacements = list(Path('/var/staging/replace/').glob('*/*.*'))
c = len(replacements)
for img in replacements:
    rep = {'image': img, 'hash': path_to_imghash(img)}
    files.append(rep)
    print(c, img)
    c = c-1

36 /var/staging/replace/22/30-NYH-arbeiderpartiet-AN.jpg
35 /var/staging/replace/6/unnamed.jpg
34 /var/staging/replace/2/unnamed.jpg
33 /var/staging/replace/35/28-uriks-usa-dk-3.jpg
32 /var/staging/replace/25/graver-1.jpg
31 /var/staging/replace/21/netturix08.jpg
30 /var/staging/replace/18/29-NYH-Gerdlivvalla-AN-1.jpg
29 /var/staging/replace/29/sj-018228.jpg
28 /var/staging/replace/8/unnamed.jpg
27 /var/staging/replace/4/unnamed.jpg
26 /var/staging/replace/20/netturix04.jpg
25 /var/staging/replace/33/02-URIX-studengjeld-DK-10.jpg
24 /var/staging/replace/26/netturix05.jpg
23 /var/staging/replace/9/unnamed.jpg
22 /var/staging/replace/15/unnamed.jpg
21 /var/staging/replace/5/unnamed.jpg
20 /var/staging/replace/13/unnamed.jpg
19 /var/staging/replace/14/unnamed.jpg
18 /var/staging/replace/10/unnamed.jpg
17 /var/staging/replace/17/02-URIX-studengjeld-DK-8.jpg
16 /var/staging/replace/27/nattverd-gamle-tassere.jpg
15 /var/staging/replace/31/12-KUL-doktor-SGS.jpg
14 /var/staging/replace/3/unnam

In [8]:
# Make searchspace
broken = ImageFile.objects.filter(cropping_method=0).exclude(_imagehash__in=['err', ''])
vals = broken.values( '_imagehash', 'id' )
haystack = [(v['id'], imagehash.hex_to_hash(v['_imagehash'])) for v in vals]
len(broken)

427

In [10]:
from django.core.files import File

def replace_source_file(image_file, path):
    with open(str(path), 'rb') as source:
        content = File(source)
        image_file.original.save(path.name, content, False)
        image_file.crop_method = ImageFile.CROP_PENDING
        image_file.save()

def closest_image(_imagehash, n=1, cutoff=10, haystack=haystack):
    
    candidates = [(_imagehash - h, pk, h) for pk, h in haystack if _imagehash - h < cutoff]
    return [(d, ImageFile.objects.get(pk=pk)) for d, pk, h in sorted(candidates)[:n]]

wats = []


for f in files[3:]: 
    imghash = f['hash']
    path = f['image']
    data = closest_image(imghash, n=1, cutoff=15)
    if not data:
        matches = broken.filter(source_file__endswith=path.name)
        if matches.count() == 1:
            imgfile = matches[0]
            print(f'{imghash-imgfile.imagehash}   {imgfile} {imgfile.pk} {path})')
            replace_source_file(imgfile, path)
        else:
            print('wat')
            
    else:
        diff, imgfile = data[0]
        print(f'{diff} {imgfile} {imgfile.pk} {path})')
        replace_source_file(imgfile, path)
    


4 28-uriks-usa-dk-3jpg-30660124926-o.jpg 39121 /var/staging/replace/35/28-uriks-usa-dk-3.jpg)


  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))


wat
13 netturix08.jpg 40074 /var/staging/replace/21/netturix08.jpg)


  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))


16   29-NYH-Gerdlivvalla-AN-1.jpg 39026 /var/staging/replace/18/29-NYH-Gerdlivvalla-AN-1.jpg)
4 sj-018162.jpg 42040 /var/staging/replace/29/sj-018228.jpg)
wat
wat
7 netturix04.jpg 40069 /var/staging/replace/20/netturix04.jpg)


  "Skipping tag %s" % (size, len(data), tag))


wat
6 netturix05.jpg 40076 /var/staging/replace/26/netturix05.jpg)


  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))


wat
wat
wat
wat
wat
wat
8 netturix04.jpg 40069 /var/staging/replace/17/02-URIX-studengjeld-DK-8.jpg)
18   nattverd-gamle-tassere.jpg 38913 /var/staging/replace/27/nattverd-gamle-tassere.jpg)
5 unnamed-3.jpg 42405 /var/staging/replace/31/12-KUL-doktor-SGS.jpg)


  "Skipping tag %s" % (size, len(data), tag))


wat
14 Nyhet-delmal-HB-87.jpg 39652 /var/staging/replace/34/32-NYH-petrine-HB-04.jpg)
6 netturix02.jpg 40071 /var/staging/replace/23/netturix02.jpg)
11 netturix06.jpg 40077 /var/staging/replace/24/netturix06.jpg)


  "Skipping tag %s" % (size, len(data), tag))


30   30-NYH-cannabis-02.jpg 39278 /var/staging/replace/28/30-NYH-cannabis-02.jpg)
3 F291-Nylig-historikk-Cover-300dpi.jpg 39153 /var/staging/replace/30/F291-Nylig-historikk-Cover-300dpi.jpg)
7 netturix07.jpg 40073 /var/staging/replace/16/netturix07.jpg)
wat
8 27056939861-166dd81f68-o.jpg 39239 /var/staging/replace/32/27056939861-166dd81f68-o.jpg)


  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))


wat
12 21-NYH-sexisitisk2-AGS.jpg 37053 /var/staging/replace/19/21-NYH-sexisitisk-2-AGS.jpg)
wat
wat
wat


In [50]:
from IPython.display import HTML
def display(images):
    html = ''
    for image in images:
        title = f'src: {image.original}, hash: {image._imagehash}, md5: {image._md5}'
        html += f'<img title="{title}" style="display: inline-block; margin: 5px" src="{image.preview.url}" />'
    return HTML(html)

In [11]:
for img in broken:
    status = requests.head(img.original.url).status_code
    if status == 200:
        img.cropping_method = 1
        img.save()
        print(img.original)

2017/15/Urortfinalen-DarligVane-AC-5.jpg
2017/15/unspecified-3.jpg
2017/15/DSC05628.jpg
2017/15/DSC05709.jpg
2017/15/Syria.2BEn-2Bstor-2Bkrig-2Bi-2Ben-2Bliten-2Bverden.jpg
2017/15/skjermdumpchristiana.jpg
2017/15/NYH-6-BAROMETER-MAREN-NW-3.jpg
2017/15/07-NYH-toalett-AN-4.jpg
2017/15/Screenshot-2017-02-12-15-45-29.png
2017/15/16810669-10158392959105093-1687119375-o.png
2017/15/16775963-10158392966445093-1790151043-o.jpg
2017/15/Simen-Flotvik-Mathisen.jpg
2017/8/KUL-7-yYVIN-NW-2-1.jpg
2017/15/Bryan34.jpg
2017/15/unspecified-2.jpg
2017/15/Thomas-Dybdahl-TheGreatPlains-Booklet.jpg
2017/15/20161003-174532-3.jpg
2004/21/21-subtonic.jpg
2010/07/07-nyh-villaeika-nav02-KB.jpg
2016/10/129761899-N06.jpg
2016/30/F291-Nylig-historikk-Cover-300dpi.jpg
2017/15/Capture.png
2017/15/MikeFurstenberg15-Foto-UnniIrmelinKvam-SiO.jpg
2017/15/netturix08.jpg
2017/15/Andreas-Oftedal.jpg
2017/15/AVE2111.jpg
2010/17/17-nyh-doktorgrad02-web-EM.jpg
2013/0.jpg
2017/15/IMG-0857.jpg
2017/15/IMG-9055.jpg
2017/15/IMG-08

In [19]:
broken = ImageFile.objects.filter(cropping_method=0).order_by('-created')
with open('stillmissing.txt', 'wt') as fp:
    for img in broken.order_by('-created'):
        fp.write(f'{img.pk} {img._md5} {img._imagehash} {img.original}\n')
        

In [6]:
ImageFile.objects.pending().count()

0

In [21]:
!cat stillmissing.txt

42763 017ef36b6727cd9e703f6dd4b07a125b a1a2b29068b6b2f8 2017/14/unspecified-1.jpg
42682 6541dd7413ca392771e5c269c653783f 8abbb898d191b5a4 2017/14/knaus.jpg
42652 c5b6eb3cbb4a2892034c295aa95f3578 79c98bcdc586b4b0 2017/14/9788202480677.jpg
42647 dbdaa40d28173071c62146bc5bf1d132 0abaa3a6ac698ba3 2017/14/18336884-10210753378383815-1316556153-n.jpg
42580 128e789f01d6ba711427c09e30188840 bb718b9773b55825 2017/13/kaffe-2.jpg
42505 9238b8657072b013ceccc001e4a64637 04b39aaab469d9c5 2017/13/mAEre-20vaere-20mine-20dotre-HOY.jpg
42405 7dd92b1d84e87b5498df7612a1eeb536 2f8e2e4fafa5e667 2017/15/12-KUL-doktor-SGS.jpg
42404 33b74d8307b05dc48965ff26fddd0ff9 512f2dafacaf975b 2017/12/unnamed.jpg
42399 dd1492e56eafc896ba34b4443fd2d417 939797b799bb1a60 2017/12/hester.jpg
42295 414df40141b6f34d348859956427099b 2d6c652c6c242497 2017/12/WesterACT-AS-14.jpg
42268 812e3b8092177c3bf2ce2e56a6fda213 e60b1a0c66632735 byline-photo/Stian-Skarheim-Magelssen.jpg
42243 c6d4a0b923d3b3efcfa4022b07330a0c 4d14951d