In [2]:
pip install faker

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from faker import Faker
from datetime import datetime
from typing import List
import random
import time

In [4]:
# Creating Faker instance and specifying it to polish locale
fake = Faker(locale="pl_PL")

In [5]:
# Implementation of generate_ssns method, it generates given amount of ssns
def generate_ssns(index):
    ssn_array = []
    for _ in range(index):
        ssn_array.append(fake.ssn())

    ssn_series = pd.Series(ssn_array)

    return ssn_series

In [6]:
# Implementing two methods copied from faker documentation, I will use them to implement generate_unique_ssns method
def calculate_month(birth_date: datetime) -> int:
    year = int(birth_date.strftime("%Y"))
    month = int(birth_date.strftime("%m")) + ((int(year / 100) - 14) % 5) * 20
    return month

In [7]:
def checksum(digits: List[int]) -> int:
    weights_for_check_digit = [9, 7, 3, 1, 9, 7, 3, 1, 9, 7]
    check_digit = 0

    for i in range(0, 10):
        check_digit += weights_for_check_digit[i] * digits[i]

    check_digit %= 10

    return check_digit

In [8]:
# Implementation of generate_unique_ssns method, I inspired it on ssn provider from polish locale in faker module
# I assume that you can enter only "male" or "female" as gender values
def generate_unique_ssns(index, sex, birth_start, birth_end):
    ssn_array = []
    start = datetime.strptime(birth_start, "%Y-%m-%d")
    end = datetime.strptime(birth_end, "%Y-%m-%d")

    for i in range(index):
        birth_date = fake.date_between(start_date=start, end_date=end)

        year_without_century = int(birth_date.strftime("%y"))
        month = calculate_month(birth_date)
        day = int(birth_date.strftime("%d"))

        pesel_digits = [
            int(year_without_century / 10),
            year_without_century % 10,
            int(month / 10),
            month % 10,
            int(day / 10),
            day % 10,
        ]

        for _ in range(3):
            pesel_digits.append(random.randint(0, 9))

        if sex == "male":
            pesel_digits.append(random.choice([1, 3, 5, 7, 9]))
        else:
            pesel_digits.append(random.choice([0, 2, 4, 6, 8]))

        pesel_digits.append(checksum(pesel_digits))

        ssn_str = "".join(str(digit) for digit in pesel_digits)
        ssn_array.append(ssn_str)

    ssn_series = pd.Series(ssn_array)

    return ssn_series

In [9]:
# Implementation of calls of generate_ssns method, with their execution times
start = time.time()
print(generate_ssns(1000))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0      78051772500
1      12252815112
2      07211072435
3      11272892927
4      76072526458
          ...     
995    98011465190
996    10300548690
997    81022750942
998    85121525472
999    08252063310
Length: 1000, dtype: object
Time elapsed: 0.054852962493896484 seconds


In [10]:
start = time.time()
print(generate_ssns(10000))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0       03272414830
1       15272919444
2       13312417921
3       98072332882
4       16320941237
           ...     
9995    06262939324
9996    04231938426
9997    03270536981
9998    06262483878
9999    95072865314
Length: 10000, dtype: object
Time elapsed: 0.4587724208831787 seconds


In [11]:
start = time.time()
print(generate_ssns(100000))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0        13291185499
1        21301006788
2        11280194671
3        84090905960
4        12232417008
            ...     
99995    72112423761
99996    99090648821
99997    74041580911
99998    00302388987
99999    95030669619
Length: 100000, dtype: object
Time elapsed: 4.215328693389893 seconds


In [12]:
# Implementation of calls of generate_unique_ssns method for males born from 1990-01-01 to 1990-01-19, with their execution times
start = time.time()
print(generate_unique_ssns(1000, "male", "1990-01-01", "1990-01-19"))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0      90011148954
1      90011096158
2      90010966450
3      90011714496
4      90011156832
          ...     
995    90010187390
996    90011068670
997    90010356158
998    90010720557
999    90011687455
Length: 1000, dtype: object
Time elapsed: 0.12267184257507324 seconds


In [13]:
start = time.time()
print(generate_unique_ssns(10000, "male", "1990-01-01", "1990-01-19"))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0       90010787897
1       90011854938
2       90011667657
3       90011264298
4       90011746275
           ...     
9995    90010171551
9996    90010963679
9997    90010162078
9998    90010779452
9999    90011722633
Length: 10000, dtype: object
Time elapsed: 1.3380661010742188 seconds


In [14]:
start = time.time()
print(generate_unique_ssns(100000, "male", "1990-01-01", "1990-01-19"))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0        90010838036
1        90011458835
2        90011143577
3        90011049590
4        90010544195
            ...     
99995    90011757330
99996    90010348157
99997    90011745151
99998    90010517359
99999    90011740170
Length: 100000, dtype: object
Time elapsed: 13.411237239837646 seconds


In [15]:
# Implementation of calls of generate_unique_ssns method for females born from 1990-01-01 to 1990-01-19, with their execution times
start = time.time()
print(generate_unique_ssns(1000, "female", "1990-01-01", "1990-01-19"))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0      90011332980
1      90010452823
2      90011483082
3      90011550902
4      90010140001
          ...     
995    90010465249
996    90010829201
997    90011323186
998    90011608827
999    90010174523
Length: 1000, dtype: object
Time elapsed: 0.13962531089782715 seconds


In [16]:
start = time.time()
print(generate_unique_ssns(10000, "female", "1990-01-01", "1990-01-19"))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0       90010933489
1       90011322185
2       90010480806
3       90011441286
4       90011642007
           ...     
9995    90011165209
9996    90010114525
9997    90010815440
9998    90011418345
9999    90010470421
Length: 10000, dtype: object
Time elapsed: 1.2759861946105957 seconds


In [17]:
start = time.time()
print(generate_unique_ssns(100000, "female", "1990-01-01", "1990-01-19"))
end = time.time()
print("Time elapsed: "+str(end-start)+" seconds")

0        90011209486
1        90011506460
2        90011002386
3        90011720204
4        90011334647
            ...     
99995    90010687302
99996    90011135149
99997    90011505506
99998    90010508883
99999    90011224081
Length: 100000, dtype: object
Time elapsed: 13.103292465209961 seconds


In [18]:
# Implementation of validate_ssn method, again I inspired it on ssn provider from polish locale in faker module
def validate_ssn(ssn, sex, birth_date):
    birth_date = datetime.strptime(birth_date, "%Y-%m-%d")

    year_without_century = int(birth_date.strftime("%y"))
    month = calculate_month(birth_date)
    day = int(birth_date.strftime("%d"))

    pesel_digits = [
        int(year_without_century / 10),
        year_without_century % 10,
        int(month / 10),
        month % 10,
        int(day / 10),
        day % 10,
    ]

    ssn_list = [int(x) for x in str(ssn)]
    for j in [6, 7, 8]:
        pesel_digits.append(ssn_list[j])

    if sex == "male" and (ssn_list[9] % 2) == 1:
        pesel_digits.append(ssn_list[9])
    elif sex == "female" and (ssn_list[9] % 2) == 0:
        pesel_digits.append(ssn_list[9])
    else:
        print("Provided SSN is not valid! :C")
        return None

    pesel_digits.append(ssn_list[10])

    ssn_to_validate = "".join(str(digit) for digit in pesel_digits)

    if ssn_to_validate == ssn:
        print("Provided SSN is valid! :D")
    else:
        print("Provided SSN is not valid! :C")

In [19]:
# Testing validate_ssn method on some ssns I took from the calls of generate_unique_ssns method
validate_ssn("90010742166", "female", "1990-01-07")

Provided SSN is valid! :D


In [20]:
validate_ssn("90010742166", "male", "1990-01-07")

Provided SSN is not valid! :C


In [21]:
validate_ssn("90010359632", "female", "1990-01-03")

Provided SSN is not valid! :C


In [22]:
validate_ssn("90010359632", "male", "1990-01-03")

Provided SSN is valid! :D


In [23]:
validate_ssn("90010742166", "female", "1990-01-10")

Provided SSN is not valid! :C


In [25]:
validate_ssn("90010359632", "male", "1990-01-11")

Provided SSN is not valid! :C
