In [1]:
import torch

import pyprob
from pyprob import Model
import pyprob.distributions as dists

from calendar import monthrange
import datetime



In [187]:
class DateParser(Model):
    def __init__(self):
        super().__init__(name="Date with Unkown Format")

    def forward(self):

        # all dates are between 0 AD and 4000 AD
        # sanple each digit such that the year is usually close to 2019
        year_1 = int(pyprob.sample(dists.Categorical(torch.tensor(
            [0.05, 0.4, 0.4, 0.03, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02]
        ))).item())
        year_2 = int(pyprob.sample(dists.Categorical(torch.tensor(
            [0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1, 0.5] if year_1 == 1 else
            [0.5, 0.1, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05] if year_1 == 2 else
            [0.1]*10
        ))).item())
        year_3 = int(pyprob.sample(dists.Categorical(torch.tensor([0.1]*10))).item())
        year_4 = int(pyprob.sample(dists.Categorical(torch.tensor([0.1]*10))).item())
        year = int("".join(str(d) for d in [year_1, year_2, year_3, year_4]))

        # sample month and day given the year
        month = int(pyprob.sample(dists.Categorical(torch.tensor([1/12]*12))).item()) +1
        num_days = monthrange(year, month)[1]                      # number of days in this month
        day_probs = [1/num_days]*num_days + [0.]*(31-num_days)     # probs of which day it is (in fixed length vector)
        day = int(pyprob.sample(dists.Categorical(torch.tensor(day_probs))).item()) + 1

        # sample format used to write day, month and year
        yy = pyprob.sample(dists.Categorical(torch.tensor([0.5, 0.5]))).item()  # either yy or yyyy
        m = pyprob.sample(dists.Categorical(torch.tensor([0.33, 0.33, 0.33]))).item()   # either m, mm or e.g. 'JAN'
        d = pyprob.sample(dists.Categorical(torch.tensor([0.5, 0.5]))).item()   # either d or dd
        
        # put day, month and year in right format
        if d:
            day = str(day)
        else:  # dd format
            day = str(day).zfill(2)
        # do month
        if m == 0:
            month = str(month)
        elif m == 1:
            month = str(month).zfill(2)
        else:
            abbreviations = {1: 'JAN', 2: 'FEB', 3: 'MAR', 4: 'APR', 5: 'MAY', 6: 'JUN',
                             7: 'JUL', 8: 'AUG', 9: 'SEP', 10: 'OCT', 11: 'NOV', 12: 'DEC'}
            month = abbreviations[month]
        # do year
        if yy:
            year = str(year).zfill(2)[-2:]
        else:  # yyyy
            year = str(year).zfill(4)
        
        # sample order of day, month, year
            

        return f"{day}/{month}/{year}"


In [185]:
a = dists.Categorical(torch.tensor([1/12]*12))

In [188]:
for i in DateParser().prior_distribution(20).values_numpy():
    print(i)

Time spent  | Time remain.| Progress             | Trace | Traces/sec
0d:00:00:00 | 0d:00:00:00 | #################### | 20/20 | 260.27       
28/09/1921
20/JUN/2526
05/DEC/77
24/11/2863
24/8/90
12/7/2086
11/6/62
21/NOV/34
12/11/87
10/08/7461
25/APR/71
24/8/49
28/4/2050
27/12/0555
10/SEP/28
27/JAN/1954
02/4/1610
5/06/80
5/MAY/54
03/10/96


In [170]:
'234'.zfill(2)[-2:]

'34'