# IMO checksum
* IMO number checker
    - For example, for IMO 9074729: (9×7) + (0×6) + (7×5) + (4×4) + (7×3) + (2×2) = 139
* IMO number generator
* IMO amount of IMO numbers possible between two endpoints
* 2024-02-03

# Import

## Import packages

In [1]:
import pandas as pd
from random import randint

## Import test data

In [None]:
imo_numbers = [
    9224764,
    9839179,
    9839143,
    2131212,
    213213,
    7677124,
    6809082,
    4660961,
    12,
    9226281,
    "",
]
df = pd.DataFrame(imo_numbers, columns=["imo_number"])

## Import functions

In [None]:
def imo_checker_binary(string: str) -> str:
    """
    validates a single IMO number
    args: a string, a possible single IMO number
    returns: binary True/False
    """
    try:
        # cast a string as an integer, if impossible return false, see except clause
        string_test = int(string)
        string = str(string)

        # extract checksum digit from IMO number
        checksum = str(
            (int(string[-7]) * 7)
            + (int(string[-6]) * 6)
            + (int(string[-5]) * 5)
            + (int(string[-4]) * 4)
            + (int(string[-3]) * 3)
            + (int(string[-2]) * 2)
        )

        # check if last digit of checksum is equal to last digit of IMO number (return true/false)
        if checksum[-1] == string[-1]:
            return True
        else:
            return False

    except (ValueError, IndexError) as e:
        return False


def imo_checker(string: str) -> str:
    """
    checks if a string is an IMO number and returns valid IMO number
    args: a char possible IMO number, char length 7
    returns: a valid imo number or none
    """
    string = str(string)

    # extract checksum digit from IMO number
    checksum = str(
        (int(string[-7]) * 7)
        + (int(string[-6]) * 6)
        + (int(string[-5]) * 5)
        + (int(string[-4]) * 4)
        + (int(string[-3]) * 3)
        + (int(string[-2]) * 2)
    )

    # check if last digit of checksum is equal to last digit of IMO number (return IMO number)
    if checksum[-1] == string[-1]:
        return string


def gen_imo_number(amount_imo_numbers: int) -> list:
    """
    generates valid synthetic IMO numbers
    args: number of IMO numbers
    returns: list of valid IMO numbers
    """

    # initiate list
    valid_imo_number_list = []

    # while loop to create desired amount of IMO numbers
    while len(valid_imo_number_list) < amount_imo_numbers:
        random_integer = randint(5000000, 9999999)
        valid_imo_number = imo_checker(random_integer)
        if valid_imo_number is not None:
            valid_imo_number_list.append(valid_imo_number)
    return valid_imo_number_list


def convert_number_to_arabic(string):
    try:
        return convert_numbers.english_to_arabic(string)
    except ValueError as e:
        return np.nan


def convert_arabic_to_number(string):
    try:
        return convert_numbers.arabic_to_english(string)
    except ValueError as e:
        return np.nan

# Validate IMO numbers

In [None]:
df["test"] = df["imo_number"].apply(imo_checker_binary)
df

Unnamed: 0,imo_number,test
0,9224764.0,True
1,9839179.0,True
2,9839143.0,True
3,2131212.0,False
4,213213.0,False
5,7677124.0,False
6,6809082.0,True
7,4660961.0,False
8,12.0,False
9,9226281.0,True


# Generate IMO numbers

In [None]:
%%time
gen_imo_numbers = gen_imo_number(10000)
df_gen = pd.DataFrame(gen_imo_numbers, columns=["imo_number"])
df_gen.head(20)

CPU times: total: 141 ms
Wall time: 146 ms


Unnamed: 0,imo_number
0,5652066
1,8527420
2,8021919
3,7245348
4,9598816
5,7365356
6,7430943
7,8882260
8,5516511
9,8219827


# How many IMO numbers are valid options

In [None]:
%%time

list_of_numbers = list(range(5000000, 10000000))
print(len(list_of_numbers))

df_options = pd.DataFrame(list_of_numbers, columns=["imo_number"])
df_options["test"] = df_options["imo_number"].apply(imo_checker_binary)

df_options[(df_options["test"] == True)].shape

5000000
CPU times: total: 5.48 s
Wall time: 5.61 s


(500000, 2)

In [None]:
df_true = df_options[(df_options["test"] == True)]

In [None]:
df_true["test"] = df_true["imo_number"].astype(str).str[5:7]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_true['test'] = df_true['imo_number'].astype(str).str[5:7]


In [9]:
df_true.head(10)

Unnamed: 0,imo_number,test
5,5000005,5
17,5000017,17
29,5000029,29
31,5000031,31
43,5000043,43
55,5000055,55
67,5000067,67
79,5000079,79
81,5000081,81
93,5000093,93


In [10]:
# df_true['test'].unique()