In [1]:
Stint.objects.count()

114

In [13]:
from django.core.exceptions import ObjectDoesNotExist
from utils.merge_model_objects import merge_instances

for person in Contributor.objects.all():
    user = person.get_user()
    if user:
        try:
            contributor = user.contributor
        except ObjectDoesNotExist:
            person.user = user
            person.save()
        else:
            print(f'{user.username:<30} has person   {contributor}')
            if contributor != person:
                merge_instances(contributor, person)
                


HannaSkotheim                  has person   Hanna Skotheim
jspreston                      has person   Joakim Stene Preston
madsran                        has person   Mads Randen
ellaregine                     has person   Regine Stokstad
sondremyhre                    has person   Sondre Myhre
mathigr                        has person   Mathias Gravdehaug
xueqipang                      has person   Xueqi Pang
haavard24                      has person   Håvard Røsæg
kennethkandolfhaug             has person   Kenneth Haug
gardoterholm                   has person   Gard Oterholm
urix                           has person   Emilie Solberg
hannahberg                     has person   Hannah Berg
mariapettrem                   has person   Maria Pettrém
juliebrundtland                has person   Julie Brundtland Løvseth
folkejarnbert                  has person   Arvid Folke Järnbert
henrikgiaever                  has person   Henrik Giæver
racheleantonsen                has person   Rach

In [None]:
import re, pprint
from datetime import date
from apps.issues.models import PrintIssue
from apps.contributors.models import Stint
from django.db.models.query import QuerySet
import logging
logger = logging.getLogger('jup')
logger.setLevel('DEBUG')

def find_staff(text, titles=(r'daglig leder', r'\w*redaktør', r'\w*leder', r'\w*sjef')):
    """Find editorial staff names from colophones in extracted pdf text."""
    pattern = (r'^(?P<title>' f'({"|".join(titles)})' r'):\s*(?P<name>.+)$')
    regex = re.compile(pattern, flags=re.M | re.I)
    text = text.replace('\x08', '')  # pdf might contain non printing characters, such as backspace
    matches = regex.finditer(text)
    return { d['title'].lower().strip(): d['name'].strip() for d in [m.groupdict() for m in matches] }


def get_content(print_issue: PrintIssue, first_page=2, last_page=999) -> str:
    """Get full text of a pdf issue"""
    try:
        return print_issue.get_page_text_content(first_page, last_page)
    except Exception as e:
        logger.exception('Error reading pdf:')
        return ''

def find_positions(print_issue: PrintIssue) -> dict:
    content = get_content(print_issue)
    return find_staff(content)

def add_to_stint(title:str, name:str, date:date) -> Stint:
    """Create or update a Stint so that the date is included in the period."""
    try:
        contributor = Contributor.objects.get(display_name=name)
    except Contributor.DoesNotExist:
        contributor, created = Contributor.get_or_create(name)
        
        action = "created" if created else "found"
        logger.warning(f'could not find "{name}" => {action} "{contributor}"')
    position, _ = Position.objects.get_or_create(title=title)
    defaults = {'start_date': date, 'end_date': date}
    stint, _ = Stint.objects.get_or_create(
        defaults=defaults, position=position, contributor=contributor)
    stint.end_date = max(stint.end_date, date)
    stint.start_date = min(stint.start_date, date)
    stint.save()
    return stint


def find_stints_from_pdfs(qs:QuerySet=None, dry_run=False):
    if qs is None:
        qs = PrintIssue.objects.all()
    for print_issue in qs:
        data = find_positions(print_issue)
        date = print_issue.issue.publication_date
        logger.debug(f'{date} {print_issue}\n' +
                     f'\n'.join(f'{p}: {data[p]}' for p in data))
        if dry_run:
            continue
        for title, name in data.items():
            add_to_stint(title, name, date)

find_stints_from_pdfs(dry_run=False)

In [None]:
logger.setLevel('DEBUG')
logger.debug('hi')

In [None]:
Byline.objects.filter(story__story_type__section__title='Kultur', credit='by').update(credit='text')
Byline.objects.filter(story__story_type__section__title='Anmeldelser', credit='by').update(credit='text')
Byline.objects.filter(story__story_type__section__title='Nyheter', credit='by').update(credit='text')
Byline.objects.filter(story__story_type__section__title='Debatt', credit='text').update(credit='by')

In [None]:
def add_stints_from_bylines(position, credit, exclude=['Debatt'], byline_cutoff=1):
    """Add stints based on byline credits"""
    for person in Contributor.objects.all():
        bylines = person.byline_set.filter(
            credit__contains=credit
        ).exclude( # unpublished
            story__publication_date=None
        ).exclude( # no opinion pieces
            story__story_type__section__title__in=exclude
        ).order_by(
            'story__publication_date'
        )
        if bylines.count() > byline_cutoff:  # ignore people few bylines
            first_byline = bylines.first().story.publication_date.date()
            last_byline = bylines.last().story.publication_date.date()
            stint = Stint.objects.create(
                start_date=first_byline, 
                end_date=last_byline,
                contributor=person, 
                position=position,
            )
            stint = join_stints(Stint.objects.filter(contributor=person, position=position))
            logger.debug(f'{stint} {bylines.count()}')
      
from utils.merge_model_objects import merge_instances 

def join_stints(stints):
    last = None
    for stint in stints.order_by('start_date'):
        if last and last.end_date > stint.start_date:
            last.start_date = min(stint.start_date, last.start_date)
            last.end_date = max(stint.end_date, last.end_date)
            last.save()
            last = merge_instances(last, stint)
        else: 
            last = stint
    return last
    
logger.setLevel('DEBUG')
for title, credit in  [('journalist', 'text'), ('fotograf', 'photo'),
('oversetter', 'translation'),  ('illustratør', 'illustration')]:
    position, _ = Position.objects.get_or_create(title=title)
    add_stints_from_bylines(position, credit)
        
    

In [None]:
from django.utils import timezone

def update_active(inactive_limit=timezone.timedelta(days=150)):
    today = timezone.now().date()
    for contributor in Contributor.objects.all():
        all_bylines = contributor.byline_set.exclude(story__publication_date=None)
        debatt_bylines = all_bylines.filter(story__story_type__section__title='Debatt')
        status = Contributor.UNKNOWN
        if all_bylines.count() == 0:
            print(contributor)
        elif all_bylines.count() == debatt_bylines.count():
            status = contributor.EXTERNAL
        elif contributor.stint_set.count():
            latest = contributor.stint_set.latest('start_date')
            if not latest.end_date:
                  
                delta = today - latest.end_date
                if delta < timezone.timedelta(0):
                    
                    
            if today - latest.end_date > inactive_limit:
                status = contributor.RETIRED
            if today - latest.end_date > inactive_limit:
            else:
                status = contributor.ACTIVE
        if status != contributor.status:
            contributor.status=status 
            contributor.save(update_fields=['status'])
        
        print(f'{str(contributor):<40} {contributor.get_status_display():<10} {contributor.byline_set.count():>5}')

update_active()
unknowns = Contributor.objects.filter(status=Contributor.UNKNOWN)
active = Contributor.objects.filter(status=Contributor.ACTIVE)
retired = Contributor.objects.filter(status=Contributor.RETIRED)
print(unknowns.count(), active.count(), retired.count())

In [None]:
delta = timezone.timedelta(1)
delta < timezone.timedelta()

In [None]:
for person in unknowns:
    print(person)
    for stint in person.stint_set.all():
        print(stint)
    print()
    #for byline in person.byline_set.all():
    #    print(byline, byline.story.publication_date, byline.story.section)

In [None]:
# Fjern ubrukte artikkeltyper

for st in StoryType.objects.all():
    if st.story_set.count() == 0:
        print(st)
        st.delete()
        
for sec in Section.objects.all():
    story_types = sec.storytype_set.all()
    if story_types.count() == 0:
        sec.delete()
    print(sec)
    for story_type in story_types:
        print('   ', story_type, story_type.story_set.count())