In [3]:
#import required libraries
import math
import numpy as np
import pandas as pd
import re
import pyphonetics
import phonetics
import pydata_google_auth
import Monkey_Type_Detection as mtd
from arcgis.gis import GIS
from getpass import getpass
import skimpy
import wikipedia as wiki
import requests
from arcgis.geocoding import batch_geocode, Geocoder, get_geocoders, batch_geocode, geocode
from google_trans_new import google_translator
import detect_delimiter


def shopify_data_preprocess(df):
    #two address lines - 
    df.loc[df['shipping_address_address1'].isnull()==False,'shipping_address_address1']=df['shipping_address_address1'].str.lower()
    df.loc[df['shipping_address_address2'].isnull()==False,'shipping_address_address2']=df['shipping_address_address2'].str.lower()
    df.loc[df['shipping_address_address1'].isnull()==True,'shipping_address_address1']='' #set empty address
    df.loc[df['shipping_address_address2'].isnull()==True,'shipping_address_address2']=''
    #get overall address
    df.loc[df['shipping_address_address1'].isnull()==False,'whole_shipping_address']=df['shipping_address_address1']+','+df['shipping_address_address2']

    #check for invalid address - frequency
    df.loc[df['shipping_address_city'].isnull()==False,'shipping_address_city']=df['shipping_address_city'].str.lower()
    df.rename(columns={'whole_shipping_address':'address_string','shipping_address_city':'drop_location_city','shipping_address_zip':'drop_location_pincode'},inplace=True)
    
    address_short=df[['address_string','drop_location_city','drop_location_pincode']]
    return address_short

def woocom_data_preprocess(df):
    woocom_data.loc[woocom_data['drop_location_address'].isnull()==False,'address_string']=woocom_data['drop_location_address'].str.lower()
    woocom_data.loc[woocom_data['drop_location_address'].isnull()==True,'address_string']='' #if null set empty string
    woocom_data.rename(columns={'drop_location_pin':'drop_location_pincode'},inplace=True)
    address_short=df[['address_string','drop_location_city','drop_location_pincode']]
    return address_short

def shipments_data_preprocess(df):
    #two address lines - 
    df.loc[df['drop_location_address_1'].isnull()==False,'drop_location_address_1']=df['drop_location_address_1'].str.lower()
    df.loc[df['drop_location_address_2'].isnull()==False,'drop_location_address_1']=df['drop_location_address_1'].str.lower()
    df.loc[df['drop_location_address_1'].isnull()==True,'drop_location_address_1']='' #set empty address
    df.loc[df['drop_location_address_1'].isnull()==True,'drop_location_address_1']=''
    #get overall address
    df.loc[df['drop_location_address_1'].isnull()==False,'whole_shipping_address']=df['drop_location_address_1']+','+df['drop_location_address_2']
    df.loc[df['drop_location_city'].isnull()==False,'drop_location_city']=df['drop_location_city'].str.lower()

    #check for invalid address - frequency
    df.rename(columns={'whole_shipping_address':'address_string'},inplace=True)
    
    address_short=df[['address_string','drop_location_city','drop_location_pincode']]
    return address_short 

def osm_data_preprocess():
    # key_freq_df=pd.read_csv('key_freq_df.csv')
    #get the frequent tags - 
    #generate cols for all the addresses
    # import osm_data_extraction
    #read generated file - for OSM format data
    osm_data=pd.read_parquet('open_street_map_data/overall_osm_df.pq')
    #here name - address, 
    #location entity/tag - hierarchy of info/location attribute, highway, junction, building, amenity, nature of location , shop type
    #lat long
    osm_data.loc[osm_data['name'].isnull()==False,'name']=osm_data['name'].str.lower()
    osm_data.loc[osm_data['name'].isnull()==True,'name']='' #set location name empty
    #get building name - 
    osm_data.loc[osm_data['building'].isnull()==False,'building']=osm_data['building'].str.lower()
    osm_data.loc[osm_data['building'].isnull()==True,'building']='' #set building name empty
    #get city, district, postalcode, location - place - tag
    osm_data.loc[osm_data['district'].isnull()==False,'district']=osm_data['district'].str.lower()
    osm_data.loc[osm_data['district'].isnull()==True,'district']='' #set district name empty

    osm_data.loc[osm_data['village'].isnull()==False,'village']=osm_data['village'].str.lower()
    osm_data.loc[osm_data['village'].isnull()==True,'village']='' #set village name empty

    osm_data.loc[osm_data['town'].isnull()==False,'town']=osm_data['town'].str.lower()
    osm_data.loc[osm_data['town'].isnull()==True,'town']='' #set town name empty

    osm_data.loc[osm_data['locality'].isnull()==False,'locality']=osm_data['locality'].str.lower()
    osm_data.loc[osm_data['locality'].isnull()==True,'locality']='' #set locality name empty
    
    osm_data.loc[osm_data['city'].isnull()==False,'city']=osm_data['city'].str.lower()
    osm_data.loc[osm_data['city'].isnull()==True,'city']='' #set city name empty

    osm_data.loc[osm_data['postal_code'].isnull()==False,'postal_code']=osm_data['postal_code'].str.lower()
    osm_data.loc[osm_data['postal_code'].isnull()==True,'postal_code']='' #set postal_code name empty
    
    osm_data.loc[osm_data['highway'].isnull()==False,'highway']=osm_data['highway'].str.lower()
    osm_data.loc[osm_data['highway'].isnull()==True,'highway']='' #set highway name empty
    
    osm_data.loc[osm_data['amenity'].isnull()==False,'amenity']=osm_data['amenity'].str.lower()
    osm_data.loc[osm_data['amenity'].isnull()==True,'amenity']='' #set amenity name empty
    
    osm_data.loc[osm_data['place'].isnull()==False,'place']=osm_data['place'].str.lower()
    osm_data.loc[osm_data['place'].isnull()==True,'place']='' #set place name empty
    
    osm_data.loc[osm_data['state'].isnull()==False,'state']=osm_data['state'].str.lower()
    osm_data.loc[osm_data['state'].isnull()==True,'state']='' #set state name empty
    
    osm_data.loc[osm_data['brand'].isnull()==False,'brand']=osm_data['brand'].str.lower()
    osm_data.loc[osm_data['brand'].isnull()==True,'brand']='' #set brand name empty
    
    
    osm_data.loc[osm_data['street'].isnull()==False,'street']=osm_data['street'].str.lower()
    osm_data.loc[osm_data['street'].isnull()==True,'street']='' #set street name empty

    osm_data.loc[osm_data['housenumber'].isnull()==False,'housenumber']=osm_data['housenumber'].astype(str)
    
    osm_data.loc[osm_data['housenumber'].isnull()==False,'housenumber']=osm_data['housenumber'].str.lower()
    osm_data.loc[osm_data['housenumber'].isnull()==True,'housenumber']='' #set housenumber name empty
    
    

    osm_data.loc[osm_data['subdistrict'].isnull()==False,'subdistrict']=osm_data['subdistrict'].str.lower()
    osm_data.loc[osm_data['subdistrict'].isnull()==True,'subdistrict']='' #set subdistrict name empty

    osm_data.loc[osm_data['population'].isnull()==False,'population']=osm_data['population'].astype(str)

    #can be used as an input feature - , can be fetched from google maps API
    
    osm_data.loc[osm_data['population'].isnull()==False,'population']=osm_data['population'].str.lower()
    osm_data.loc[osm_data['population'].isnull()==True,'population']='' #set population name empty

    ##get postal code - from geocoding
    import geopu

    osm_data.rename(columns={''})
    



In [2]:
##consolidate all data to one - use multiprocessing to load data from various sources
open_street_address_db=pd.read_pickle('formatted_open_street_address_db.pkl')
woocom_data=pd.read_pickle('woocom.pkl')
shopify_data=pd.read_pickle('shopify_data.pkl')
formatted_open_street_address_db=pd.read_pickle('formatted_open_street_address_db.pkl')
dataset_promise_merge=pd.read_csv('dataset_promise_merge.csv')
address_shipments_data=pd.read_pickle('address_shipments_data.pkl')

shopify_data_copy=shopify_data.copy()
woocom_data_copy=woocom_data.copy()
address_shipments_data_copy=address_shipments_data.copy()

shopify_data_preprocessed=shopify_data_preprocess(shopify_data)
woocom_data_preprocessed=woocom_data_preprocess(woocom_data)
shipments_data_preprocessed=shipments_data_preprocess(address_shipments_data)

In [14]:
overall_osm_df=pd.DataFrame()
for i in range(315):
    print("i: ",i)
    df=pd.read_parquet('osm_parsed_data_v2/osm_df_'+str(i+1)+'.pq')
    overall_osm_df=pd.concat(overall_osm_df,df)

org_df=pd.read_parquet('overall_osm_df.pq')

i:  0


ValueError: engine must be one of 'pyarrow', 'fastparquet'

In [13]:
shipments_data

NameError: name 'shipments_data' is not defined

In [6]:
def split_by_delimiters(text):
    split_string = re.split(r',|!|;|-|:|_| |\t|\n', text)
    print(text)
    print(split_string)
    return split_string #return lexical units - split by various delimiters


hi! my na-m:e is ni_k, we	come; to datagy
good
bad
['hi', '', 'my', 'na', 'm', 'e', 'is', 'ni', 'k', '', 'we', 'come', '', 'to', 'datagy', 'good', 'bad']


['hi',
 '',
 'my',
 'na',
 'm',
 'e',
 'is',
 'ni',
 'k',
 '',
 'we',
 'come',
 '',
 'to',
 'datagy',
 'good',
 'bad']

In [None]:
def get_nearest_delim(word, text):
    comma_text_split=text.split(',')
    word_start_index=text.find(word)
    word_end_index=word_start_index+len(word)
    word_before_char=[]
    word_before_text=
    if word not in comma_text_split:
        return 0
    elif substr_before_text==text[(word_start_index-1)]:word_end_index]
    
    
word_comma_freq=pd.DataFrame(columns={'word','comma_before_freq','comma_after_freq'})
all_words_comma_freq_track=pd.DataFrame(columns={'word','comma_before_freq','comma_after_freq'})
all_words_list=[]
def insert_comma_by_frequency(text):
    text_list=split_by_delimiters(text) #split by all the delimiters - to get individual words
    for word in text_list:
        if word not in all_words_list:
            all_words_list.append(word) #adding the newly encountered word into large address word corpus
            word_comma_freq['word']=word
            word_comma_freq['comma_before_freq']=0
            word_comma_freq['comma_after_freq']=0
        if word in text_list:


In [None]:
def get_file_ext(file_name):
    file_ext=file_name[len(file_name)-5:].rpartition('.')[1]
    return file_ext

def read_file(file_name,file_ext):
    #extract file name excluding the file extension -
    #add other possible file formats for data source here.
    file_name_without_ext=file_name.rpartition('.')[0]
    df=pd.DataFrame()
    if file_ext=='csv':
        df=pd.read_csv(file_name)
    elif file_ext=='pq':
        df=pd.read_parquet(file_name)
    elif file_ext=='pkl':
        df=pd.read_pickle(file_name)
    elif file_ext=='json':
        df=pd.read_json(file_name)

    else:
        raise Exception("Invalid file format...", file_ext)
    return df

def get_postal_pincode_data():
    
    from skimpy import clean_columns



    postal_pincode_data=pd.read_csv('postal_pincode_data.csv')
    messy_df = postal_pincode_data
    clean_df = clean_columns(messy_df)
    clean_df.columns.tolist()

def regex_match(pincode,pattern):
    if re.match(pincode, pattern):
        return True
    else:
        return False



def check_pincode_correct(pincode, address):
    pincode=str(pincode) #convert the input pincode - into string format, if not already - the first digit in pincode is zero - it will be removed, and pincode - will be read as 5 digits
    #check if it has 6 digits
    if len(pincode)<6:
        raise Exception("Invalid pincode - pincode contains less than 6 digits")
    elif len(pincode)>6:
        raise Exception("Pincode cannot have more than 6 digits...")
    elif pincode[0]=='0':
        raise Exception("First digit of pincode cannot be zero...")


    pincode_pattern=r"^[1-9]{1}[0-9]{2}\\s{0,1}[0-9]{3}$"
    pincode_pattern_valid=regex_match(pincode_pattern,pincode)
    if (pincode_pattern_valid==False):
        raise Exception("Pincode Format not valid...")
    else: #entered pincode format is valid
        #now check if the entered pincode present in promise or postal database -
        postal_pincode_df=pd.read_csv('postal_pincode_data.csv')
        #convert both postal pincode and promise pincode list - to str types - to append both lists
        postal_pincode_df['pincode']=postal_pincode_df['pincode'].astype(str)
        postal_pincode_list=list(postal_pincode_df['pincode'].drop_duplicates().to_numpy())
        print("len of postal_pincode_list: ",len(postal_pincode_list))

        promise_pincode_df=pd.read_csv('promise_pincode_data.csv')
        promise_pincode_df['pincode']=promise_pincode_df['pincode'].astype(str)
        promise_pincode_list=list(promise_pincode_df['pincode'].drop_duplicates().to_numpy())
        print("len of promise_pincode_list: ",len(promise_pincode_list))
        #get combined list of two pincodes -
        pincode_list=list(set(postal_pincode_list)|set(promise_pincode_list))
        print("len of pincode list: ",len(pincode_list))



        #check if pincode present in the overall pincode list -
        if pincode not in pincode_list:
            raise Exception("Pincode not found in database...")
        else:
            #now if the format of the entered pincode is valid and if it is also found in the overall pincode database...
            #Next step is to see if the pincode matches - the entered address
            #Two ways to check -
            # if the city matches pincode
            # if the locality or other smaller - division - mapped to a pincode - based on postal pincode data
                        """
                        Postal data - attributes -
                        officename	pincode	officeType	Deliverystatus	divisionname regionname	circlename	Taluk	Districtname	statename	Telephone	Related Suboffice	Related Headoffice	longitude	latitude

                        """
        pincode_expected_address_component_df=postal_pincode_df.loc[postal_pincode_df['pincode']==pincode,['officename','officeType','divisionname','regionname','circlename','Taluk','Districtname','statename','Related Suboffice','Related Headoffice','longitude','latitude']]
        postal_data_cols=['officename','officeType','divisionname','regionname','circlename','Taluk','Districtname','statename','Related Suboffice','Related Headoffice','longitude','latitude']
        for col in postal_data_cols:
            #convert dtype to str - for all the address_dataset part strings
            pincode_expected_address_component_df.loc[pincode_expected_address_component_df[col].isnull()==True,col]="None"
            pincode_expected_address_component_df[col]=pincode_expected_address_component_df[col].astype(str)
            #convert the address_dataset substr - to lower
            pincode_expected_address_component_df[col]=pincode_expected_address_component_df[col].str.lower()
            #get pincode's correspondong - address string - address
            pincode_address_mismatch=pincode_address_match_similarity(pincode,address,pincode_expected_address_component_df)










def pincode_address_match_similarity(pincode, address, expected_address):
    #check for expected address' similarity with actual address given for a pincode -

#to map address to the type - for eg. if an address corresponds to an apartment or individual building
        """
        1. solution 1 - use Google Maps API to see if the extracted door number of a specific street name corresponds to an apartment or a house - use Selenium - to extract the address details - corresponding to the search - if we need to avoid a geolocation API

        2. solution 2 - analyze the deliveries history for the same address (door number + street name) and if the total number of deliveries is significantly higher than the average, it should be an indicator that it refers to an apartment or office space, if it significantly lower it may point to it being a house
        """
def set_typology_feature_for_address(address):
    pass

#to classify an address - residential or a commercial/industrial area

"""
1. infer from address corresponding details - using geolocation API/extract addition location info - using selenium from maps.google.com - used for tagging area type -
2. analyze - the neigbouring addresses - if they are tagged - by area type and infer - area type - based on the nearest address info we have
"""
def set_area_feature_for_address(address):
    pass

def locality_set_for_given_pincode(pincode):
    """
    refer to ACT db - to get the range of localities a pincode is mapped to - along with
    only use delivered address examples without any address issue or pincode address mismatch issue identified - also
    use - courier partner's lat long and address lat long - from shopify or geolocation api mapping (used for training data)
    if the distance between the two lat long coordinates - less than a threshold -
    """
    pass




#clean data - remove missing, incorrect pincodes,duplicate pincodes
#get pincode, taluk mapping
def geolocation_mapping_fwd(x): #address to lat long mapping
    #using geopy, using arcgis, using position stack
    """ http://api.positionstack.com/v1/forward
        ? access_key ="""
    import requests
    #try getting location data from google maps
    response = requests.get('https://google.com/')
    print(response)
    """<Response [200]>
        & query = 1600 Pennsylvania Ave NW, Washington DC"""

def geolocation_mapping_reverse(x): #lat long to address -> address entity mappingdef composite_function(f, g):
        # return lambda x : f(g(x))
        pass




###



def pincode_validation(pincode):
    pincode_format_valid=check_pincode_format(pincode)

def address_raw_data(data_source_file):
    #identify the type of file -
    #get file extention -
    file_ext=data_source_file.get_file_ext()
    #get address data - from different sources -
    # 1. Shipment History
    # 2. Shopify and Woocom Orders data - both swift/non-swift fulfilled - shipping and billing address, pickup addresses
    # 3. External Data Sources -
    #   a. data gov - websites - indian address data sources (need additional sources - MapMyIndia,
    #   b. OSM Indian Address data
    #   c. Require - list of zone - ward, street level info - whenever available for a city
    #   c. Amazon - lat long mapping for the delivered addresses - uses GPS Coordinates (?)
    #   d. Delhivery - lat long mapping for the delivered addresses - uses GPS Coordinates

    address_data=read_file_name(data_source_file,file_ext)
    #if valid data source exist in the given data source -
    if address_data.empty==False:
        #extract the required fields -
        # use std naming for address data attributes - across all data sources
        # address_string, pincode, city, state
        # use promise data for mapping - pincode - to city, state
        pass
    pass


def isEnglish(s):
    return s.isascii()

def get_soundex(name):
        """Get the soundex code for the string"""
        name = name.upper()

        soundex = ""
        soundex += name[0]

        dictionary = {"BFPV": "1", "CGJKQSXZ":"2", "DT":"3", "L":"4", "MN":"5", "R":"6", "AEIOUHWY":"."}

        for char in name[1:]:
            for key in dictionary.keys():
                if char in key:
                    code = dictionary[key]
                    if code != soundex[-1]:
                        soundex += code

        soundex = soundex.replace(".", "")
        soundex = soundex[:4].ljust(4, "0")

        return soundex


def text_preprocessing(text):
    #standard stemming, lemmatization - should be avoided in case of address data - as the exact word is required
    #remove special characters, and convert address string to lowercase
    # x = re.search("^A-Za-z0-9", txt)
    ptext=''
    #remove non-ascii characters
    for x in text:
        if isEnglish(x)==True:
            ptext=ptext+x

    preprocessed_text=ptext
    ptext=''

    for x in preprocessed_text:
        if x.isalnum()==True:
            ptext=ptext+x
        #replace special characters with space
        elif x.isalnum()==False:
            ptext=ptext+' '

    preprocessed_text=ptext
    #convert string to lower
    preprocessed_text=preprocessed_text.lower()

    #spelling correction - for city, state,


    #phonetic similarity
def soundex_similarity(text,text_list):
    from pyphonetics import Soundex
    soundex = Soundex()
    for t in text_list:
        replace_text_with=t
        print("t: ",t)
        print("t type: ",type(t))
        text=text.encode('utf-8').decode('utf-8')
        t=t.encode('utf-8').decode('utf-8')
        text_similar=soundex.sounds_like(text,t)
        # text_similarity_score=soundex.phonetics(text,t)
        if text_similar:
            replace_text_with=t
            break
    return replace_text_with
def metaphone_similarity(text,text_list):
    from pyphonetics import Metaphone
    metaphone = Metaphone()
    for t in text_list:
        replace_text_with=t
        text=text.encode('utf-8').decode('utf-8')
        t=t.encode('utf-8').decode('utf-8')
        text_similar=metaphone.sounds_like(text,t)
        # text_similarity_score=metaphone.phonetics(text,t)
        if text_similar:
            replace_text_with=t
            break
    return replace_text_with

def refined_soundex_similarity(text,text_list):
    from pyphonetics import RefinedSoundex
    refined_soundex = RefinedSoundex()
    for t in text_list:
        replace_text_with=t
        text=text.encode('utf-8').decode('utf-8')
        t=t.encode('utf-8').decode('utf-8')
        text_similar=refined_soundex.sounds_like(text,t)
        # text_similarity_score=refined_soundex.phonetics(text,t)

        if text_similar:
            replace_text_with=t
            break
    return replace_text_with

def fuzzy_soundex_similarity(text,text_list):
    from pyphonetics import FuzzySoundex
    fuzzy_soundex = FuzzySoundex()
    for t in text_list:
        replace_text_with=t
        text=text.encode('utf-8').decode('utf-8')
        t=t.encode('utf-8').decode('utf-8')
        text_similar=fuzzy_soundex.sounds_like(text,t)
        # text_similarity_score=fuzzy_soundex.phonetics(text,t)

        if text_similar:
            replace_text_with=t
            break
    return replace_text_with

def levenshtein_similarity(text,text_list):
    from pyphonetics import RefinedSoundex
    levenshtein_similarity = RefinedSoundex()
    for t in text_list:
        replace_text_with=t
        text=text.encode('utf-8').decode('utf-8')
        print("t: ",t)
        print("t type: ",type(t))
        t=t.encode('utf-8').decode('utf-8')
        text_similar=levenshtein_similarity.distance(text,t,metric='levenshtein')
        # text_similarity_score=levenshtein_similarity.phonetics(text,t)
        if text_similar==0:
            replace_text_with=t
            break
    return replace_text_with


def hamming_similarity(text,text_list):
    from pyphonetics import RefinedSoundex
    levenshtein_similarity = RefinedSoundex()

    for t in text_list:
        replace_text_with=t
        text=text.encode('utf-8').decode('utf-8')
        t=t.encode('utf-8').decode('utf-8')
        text_similar=levenshtein_similarity.distance(text,t,metric='hamming')
        # text_similarity_score=levenshtein_similarity.phonetics(text,t)
        if text_similar==0:
            replace_text_with=t
            break
    return replace_text_with
def phonetic_similarity_city_and_state(city, state):
    #get the similarity score using each type of phonetic similarity matching
    #using soundex algorithm
    promise_data=pd.read_parquet('promise_state_name.pq')
    #convert all strings in the df - to lower
    promise_data['city']=promise_data['city'].apply(lambda x: str.lower(x))
    promise_data['stateName']=promise_data['stateName'].apply(lambda x: str.lower(x))
    city_list=list(promise_data.loc[promise_data['city'].isnull()==False,'city'].drop_duplicates().to_numpy())
    state_list=list(promise_data.loc[promise_data['stateName'].isnull()==False,'stateName'].drop_duplicates().to_numpy())
    replace_state_name_with=state
    replace_city_name_with=city
    if replace_state_name_with not in state_list:
        # try mapping a state name - using soundex similarity
            replace_state_name_with=levenshtein_similarity(state,state_list)
            if replace_state_name_with not in state_list:
                replace_state_name_with=hamming_similarity(state,state_list)
                if replace_state_name_with not in state_list:
                    replace_state_name_with=soundex_similarity(state,state_list)
                    if replace_state_name_with not in state_list:
                        replace_state_name_with=metaphone_similarity(state,state_list)
                        if replace_state_name_with not in state_list:
                            replace_state_name_with=refined_soundex_similarity(state,state_list)
                            if replace_state_name_with not in state_list:
                                replace_state_name_with=fuzzy_soundex_similarity(state,state_list)
                                if replace_state_name_with not in state_list:
                                                print("no exact state name match found....")

    if replace_city_name_with not in city_list:
        # try mapping a city name - using soundex similarity

            replace_city_name_with=levenshtein_similarity(city,city_list)
            if replace_city_name_with not in city_list:
                replace_city_name_with=hamming_similarity(city,city_list)
                if replace_city_name_with not in city_list:
                    replace_city_name_with=soundex_similarity(city,city_list)
                    if replace_city_name_with not in city_list:
                        replace_city_name_with=metaphone_similarity(city,city_list)
                        if replace_city_name_with not in city_list:
                            replace_city_name_with=refined_soundex_similarity(city,city_list)
                            if replace_city_name_with not in city_list:
                                replace_city_name_with=fuzzy_soundex_similarity(city,city_list)
                                if replace_city_name_with not in city_list:
                                    replace_city_name_with=levenshtein_similarity(city,city_list)
                                    if replace_city_name_with not in city_list:
                                        replace_city_name_with=hamming_similarity(city,city_list)
                                        if replace_city_name_with not in city_list:
                                            print("no exact city name match found....")
    return replace_city_name_with, replace_state_name_with

# def spell_corrector
def translate_to_english(text):

    translator = google_translator()

    sentence = text
    translate_text = translator.translate(sentence,lang_tgt='en')
    return translate_text
