# Checking quality of phone numbers

In [53]:
'''
This script can be used to check whether a given set of
phone numbers if possible and/or valid for a particular country
based on libphonenumber (Google's common Java, C++ and JavaScript library for 
parsing, formatting, and validating international phone numbers.)
This demo notebook uses a python third-party port available from:
https://github.com/google/libphonenumber

The country selected is Egypt with country code "EG"
'''

'\nThis script can be used to check whether a given set of\nphone numbers if possible and/or valid for a particular country\nbased on libphonenumber (Google\'s common Java, C++ and JavaScript library for \nparsing, formatting, and validating international phone numbers.)\nThis demo notebook uses a python third-party port available from:\nhttps://github.com/google/libphonenumber\n\nThe country selected is Egypt with country code "EG"\n'

## 1. Notebook setup

#### Import libraries needed

In [3]:
import numpy as np
import pandas as pd
import pickle
import datetime as datetime
import pyodbc
import urllib
import sqlalchemy
import requests
import time
import re
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 0)
import phonenumbers

#### Import table with phonenumbers

In [4]:
# import individual table and filter for focal points only
# dfi = pd.read_sql_query("SELECT {} FROM dbo.Filteredprogres_individual".format(icol), db_connection)
# dfi = dfi[dfi.progres_relationshiptofpname=="Focal Point"]

# For this example, we will create a sample table with progres ids and phone numbers
dfi = pd.DataFrame()
dfi["progres_id"] = ["1", "2", "3", "4" , "5" , "6"]
dfi["progres_primaryphonenumber"] = ["111800938", "20111800938", 
                                     "1125078356", "201125078356", 
                                     "114973124", "152835755"]


In [5]:
dfi

Unnamed: 0,progres_id,progres_primaryphonenumber
0,1,111800938
1,2,20111800938
2,3,1125078356
3,4,201125078356
4,5,114973124
5,6,152835755


## 2. Clean phone numbers as needed 

##### Depending on the kinds of irregularities found in the phone numbers field

In [6]:
## If multiple phone numbers are listed, separated by various symbols:

# dfi["split_phone"] = dfi["progres_primaryphonenumber"].apply(lambda x: x.split('*')[0] if x!=None else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.split('/')[0] if x!=None else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.split(',')[0] if x!=None else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.split(';')[0] if x!=None else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.strip('\n') if x!=None else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.strip('\t') if x!=None else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.split(' - ')[0] if x!=None and len(x.split(' - ')[0])>6 else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.split('-')[0] if x!=None and len(x.split('-')[0])>6 else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.replace('-','') if x!=None else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.replace('.','') if x!=None else x)
# dfi["split_phone"] = dfi["split_phone"].apply(lambda x: x.replace(' ','') if x!=None else x)
# dfi["split_phone_length"]= dfi["split_phone"].apply(lambda x: len(x) if x!=None else None)

In [7]:
## Further cleaning as needed

# dfi["cleaned_phone"] = dfi["split_phone"].apply(lambda x: x if x==None else( 
#                                                           x[4:] if x[:4]=="+200" else(
#                                                            x[3:] if x[:3]=="+20" else(
#                                                               x[1:] if x[:1]=="0" else x))))
# dfi["cleaned_phone"] = dfi["cleaned_phone"].apply(lambda x: x.strip('\n') if x!=None else x)
# dfi["cleaned_phone"] = dfi["cleaned_phone"].apply(lambda x: x.strip('\t') if x!=None else x)
# dfi["cleaned_phone"] = dfi["cleaned_phone"].apply(lambda x: x.strip('`') if x!=None else x)
# dfi["cleaned_phone"] = dfi["cleaned_phone"].apply(lambda x: x.split('@')[0] if x!=None else x)
# dfi["cleaned_phone"] = dfi["cleaned_phone"].apply(lambda x: x.strip('`') if x!=None else x)
# dfi["cleaned_phone"] = dfi["cleaned_phone"].apply(lambda x: re.sub(r'[!@#$(),\n"%^*?\:;~`\D]', '', x) if x!=None else None)
# # re.sub(r'[!@#$(),\n"%^*?\:;~`\D]', '', '32423abdD+,;-/.343السويس')
# dfi["cleaned_phone_length"] = dfi["cleaned_phone"].apply(lambda x: len(x) if x!=None else x)
# dfi.cleaned_phone_length.value_counts(dropna=False)

In [8]:
# Since we are using a 
dfi["cleaned_phone"] = dfi["progres_primaryphonenumber"]

#### Function to check phone numbers

In [9]:
def check_phone_number_possible_valid(dataframe, field_to_check, country_code):
    '''
    This function takes a table "dataframe" with a phone number field "field_to_check"
    for country with code "country_code" and outputs a table that can tell us whether each field
    is possible and/or valid for that country
    '''
    phones = pd.DataFrame()
    for index in dataframe.index.tolist():
        tempo = pd.DataFrame()
        if dataframe.loc[index, field_to_check]!=None: 
            try:
                x = phonenumbers.parse(dfi.loc[index,field_to_check], None)
            except:
                x = phonenumbers.parse(dfi.loc[index, field_to_check], country_code)
            tempo.loc[index,"phone_no"] = dfi.loc[index, field_to_check]
            tempo.loc[index,"possible"] = phonenumbers.is_possible_number(x)
            tempo.loc[index,"validity"] = phonenumbers.is_valid_number(x)
        else:
            tempo.loc[index,"phone_no"] = dfi.loc[index, field_to_check]
            tempo.loc[index,"possible"] = None
            tempo.loc[index,"validity"] = None
        phones = pd.concat([phones, tempo])
        phones["phone_no_length"] = phones["phone_no"].apply(lambda x: len(x))
    return phones

#### Call the function

In [10]:
phones_possible_valid = check_phone_number_possible_valid(dfi, "cleaned_phone", "EG")

In [11]:
phones_possible_valid.head(6)

Unnamed: 0,phone_no,possible,validity,phone_no_length
0,111800938,True,False,9
1,20111800938,True,False,11
2,1125078356,True,True,10
3,201125078356,True,True,12
4,114973124,True,False,9
5,152835755,True,True,9
