In [None]:
# The code below was written by Chairul Rozikin @ Populix

In [None]:
import os
from datetime import datetime

import boto3
import polars as pl
from dotenv import load_dotenv
# import pandas as pd (alternative for Polars)


In [None]:
load_dotenv()
aws_region = os.getenv("AWS_REGION")
access_key = os.getenv("COGNITO_ACCESS_KEY")
secret_key = os.getenv("COGNITO_SECRET_KEY")

In [None]:
# Boto3 docs can be checked in here: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/pinpoint/client/phone_number_validate.html

pinpoint = boto3.client(
    'pinpoint',
    region_name=aws_region,
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_key,
    )

In [None]:
def format_phone_number(original_number):
  if original_number.startswith("0"):
    return "+62" + original_number[1:]
  else:
    return "+62" + original_number


def validate_phone_number(formatted_number):
    # General docs of Amazon Pinpoint Validation Phone Number can be checked in here: https://docs.aws.amazon.com/pinpoint/latest/developerguide/validate-phone-numbers.html
    # API Response and Status can be checked in here: https://docs.aws.amazon.com/pinpoint/latest/apireference/phone-number-validate.html
    response = pinpoint.phone_number_validate(
        NumberValidateRequest={
            'IsoCountryCode': 'ID',
            'PhoneNumber': f'{formatted_number}'
        }
    )
    status = response['NumberValidateResponse']['PhoneType']
    if status == "INVALID":
      return "INVALID"
    else:
      return "VALID"


def process_phone_number(number):
  formatted_number = format_phone_number(str(number))
  validate_phone_number(formatted_number)
  return formatted_number


def check_time_for_anomaly(input_time):
    if isinstance(input_time, str):
        try:
            input_time = datetime.strptime(input_time, "%Y-%m-%d %H:%M:%S")
        except ValueError:
            try:
                input_time = datetime.strptime(input_time, "%I:%M %p")
            except ValueError:
                raise ValueError("The input string must match a valid datetime format.")
    
    if not isinstance(input_time, datetime):
        raise ValueError("The input must be a valid datetime object or a string representing a datetime.")
    
    hour = input_time.hour

    if 23 <= hour < 24 or 0 <= hour < 5:
    # if 1 <= hour < 5:
        return "Anomaly"
    else:
        return "Normal"

In [None]:
# Alternative if using Pandas
# df = pd.read_csv("sample_qc_offline.csv")
# df['recruitment_date'] = pd.to_datetime(df['recruitment_date'], format='%m/%d/%y %I:%M %p')
# df['phone_number'] = df['phone_number'].astype(str)
# df['formatted_phone_number'] = df['phone_number'].apply(format_phone_number)
# df['status'] = df['formatted_phone_number'].apply(validate_phone_number)
# df

In [None]:
df = pl.read_csv("sample_qc_offline.csv")
processed_df = df.with_columns(pl.col("recruitment_date").str.to_datetime("%d/%m/%y %I:%M %p"),
                         pl.col("phone_number").cast(pl.String))
processed_df = processed_df.with_columns(pl.col("phone_number").map_elements(format_phone_number, return_dtype=pl.String))
processed_df = processed_df.with_columns(pl.col("phone_number").map_elements(validate_phone_number, return_dtype=pl.String).alias("validation_status"))
processed_df = processed_df.with_columns(pl.col("recruitment_date").map_elements(check_time_for_anomaly, return_dtype=pl.String).alias("time_status"))
processed_df

In [None]:
# processed_df = processed_df.with_columns(pl.col("phone_number").str.slice(-5).alias("last_5_digits"))
processed_df = processed_df.with_columns(pl.col("phone_number").str.head(-5).alias("prefix"))
processed_df = processed_df.with_columns(pl.col("prefix").is_duplicated().alias("sequence_number"))

# Drop temporary columns
processed_df = processed_df.drop(["prefix"])

processed_df