In [12]:
import io
from typing import List

import pandas as pd
import checker

In [98]:
def is_valid_egn(egn):
    weights = [2, 4, 8, 5, 10, 9, 7, 3, 6]
    check_digit = int(egn[-1])
    calculated_check_digit = sum(
        [int(egn[i]) * weights[i] for i in range(9)]
    ) % 11 % 10
    return check_digit == calculated_check_digit


def perform_checks(xs: pd.DataFrame) -> List[str]:
    # Check column names.
    if xs.columns.str.lower().to_list() != ['номер на страница', 'име', 'егн', 'адрес', 'дата']:
        return ['INVALID_COLUMN_NAMES']
    errors = []
    # Check if all address pages are integers.
    try:
        xs['Номер на страница'].astype(int)
    except ValueError:
        errors.append('INVALID_PAGE_NUMBER')
    # Check if all EGNs are valid.
    if not xs['ЕГН'].map(lambda x: is_valid_egn(list(map(int, str(x))))).all():
        errors.append('INVALID_EGN')
    # Check if all addresses contain at least a single digit.
    if not xs['Адрес'].map(lambda x: any(map(str.isdigit, x))).all():
        errors.append('INVALID_ADDRESS')
    return errors

In [65]:
def f(name):
    with open(name, 'rb') as f:
        return checker.read_table(name, f)
xs = f('/tmp/one.csv')
perform_checks(xs)

[]

In [37]:
try:
    xs['Адрес'].astype(int)
except ValueError:
    print('yep')
# xs['Номер на страница'].astype(int)

yep


In [71]:
ys = pd.concat([xs, xs, xs, xs, xs])

In [84]:
import string
string.isdigit

AttributeError: module 'string' has no attribute 'isdigit'

In [94]:
ys = ys.copy()
ys.iloc[2, 3] = 'kur'

In [95]:
ys

Unnamed: 0,Номер на страница,Име,ЕГН,Адрес,Дата
0,1,Панчо Владигеров,9602122829,"гр. София, ул. Майска роза №69",2023-02-12
0,1,Панчо Владигеров,9602122829,"гр. София, ул. Майска роза №69",2023-02-12
0,1,Панчо Владигеров,9602122829,kur,2023-02-12
0,1,Панчо Владигеров,9602122829,"гр. София, ул. Майска роза №69",2023-02-12
0,1,Панчо Владигеров,9602122829,"гр. София, ул. Майска роза №69",2023-02-12


In [97]:
ys['Адрес'].map(lambda x: any(map(str.isdigit, x))).all()

False

In [79]:
ys['ЕГН'].map(lambda x: is_valid_egn(list(map(int, str(x))))).all()

True

In [127]:
xs = pd.read_csv('/tmp/xxx.csv', sep=';', names=['a', 'b', 'c', 'd', 'x', 'e', 'f', 'g', 'h'])['x'].unique()

In [144]:
xs = pd.Series(xs).str.lower()

In [150]:
xs = xs.map(lambda s: s.replace('гр.', '').replace('с.', ''))

In [157]:
xs = pd.Series(xs.unique())

In [161]:
xs = xs.sort_values()

In [171]:
xs = xs[xs.map(lambda s: not any(n in s for n in ['кв.', 'к.к.', 'ж. к.']))]

In [175]:
xs = xs.unique()
xs = pd.Series(xs)
xs = xs.sort_values()

In [186]:
xs = xs[~xs.map(lambda s: '.' in s)]

In [194]:
xs = xs.unique()
xs = pd.Series(xs)
xs = xs.sort_values()

In [200]:
xs.name = 'Settlements'

In [201]:
xs.to_csv('/tmp/settlements.csv', columns=['Settlements'], index=False)

In [202]:
pd.read_csv('/tmp/settlements.csv')

Unnamed: 0,settlements
0,абланица
1,абрит
2,аврамово
3,аврен
4,агатово
...,...
3577,ясно поле
3578,ястреб
3579,ястребино
3580,ястребово
