#CFH - Customer Feedback Hub
## Calling GMB APIs for collecting, storing and enriching feedbakcs

In [None]:
%pip install azure-storage-blob
%pip install google-api-python-client
%pip install google-auth
%pip install google-auth-oauthlib

In [None]:
import pickle, os, datetime
import pandas as pd
import numpy as np
from azure.storage.blob import BlobServiceClient
from io import BytesIO, StringIO
from IPython.display import display
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import googleapiclient

print('-'*80)
print('Starting - ' + str(datetime.datetime.now()))
print('-'*80)

### 1 - Batch Param

In [None]:
### LUIS
luis_apps = {'fr' : '0a5be55e-0c32-482f-abcc-287cb9952f40',
             'en' : '11fe55ad-3644-475e-a838-a49ed6a94ded',
             'nl' : 'f176f20b-a4fa-470c-af05-ea4e72343412' }

## Param spécifiques
account_list = ['accounts/102280495497198834033','accounts/102543083042626102334','accounts/116116273817792355878']  # BE + NL + LU
#account_list = ['accounts/108458503997377848869'] #KE

nb_jours_reprise = 1
get_accounts = True
get_locations = True
get_reviews = True
cfh_scope = "cfhb"
analysis_language =  "en" #"en-us"

## Param génériques
### GMB
SCOPES = ['https://www.googleapis.com/auth/business.manage']
discovery_uri = "https://developers.google.com/my-business/samples/mybusiness_google_rest_v4p9.json"
# discovery_uri = "https://mybusiness.googleapis.com/$discovery/rest?version=v4"
discovery_uri_info = "https://mybusinessbusinessinformation.googleapis.com/$discovery/rest?version=v1"
discovery_uri_manag = "https://mybusinessaccountmanagement.googleapis.com/$discovery/rest?version=v1"

gmb_token_file = 'token_cfh.pickle'
### Process et init varialbes
__next_step__ = True
creds = None
date = datetime.datetime.now()
date = str(date.day + date.month*100 + date.year*10000)
date_ref = str(datetime.datetime.now()-datetime.timedelta(days=nb_jours_reprise))[:10]
### blob & file systems
container_param = 'customervoice-param'
container_stock = 'data'
container_gmb_ref = 'gmb-data-ref'
fl_fbacks = "feedbacks/fbacks_"+date+".csv"
fl_original = "original/data_"+date+".csv"
fl_luis = "luis/luis_"+date+".csv"
fl_phrases = "keyphrases/phrases_"+date+".csv"

########################################################################################
#Getting secrets

#Storage accounts
blob_param = dbutils.secrets.get(scope = cfh_scope, key = "blob-param-string")
blob_stock = dbutils.secrets.get(scope = cfh_scope, key = "blob-stock-string")
#Services cognitifs
Key_Text_Translation = dbutils.secrets.get(scope = cfh_scope, key = "tt-key")
Key_Text_Analysis = dbutils.secrets.get(scope = cfh_scope, key = "ta-key")
Key_LUIS = dbutils.secrets.get(scope = cfh_scope, key = "lu-key")
#SQL DB

# mise en forme
connect_param = {'string' : blob_param,
                 'container' : container_param }



### 2 - Getting functions

In [None]:
%run ./gmb_functions

In [None]:
%run ./Azure_AI_functions

### 3 - GMB Auth

In [None]:
print("Start auth at - " + str(datetime.datetime.now()))
#connect to blob stroing auth token
try :
    blobparam_service_client = BlobServiceClient.from_connection_string(blob_param)
    print("Connexion au Blob PARAMETRAGE : OK")
except:
    __next_step__ = False
    print("Connexion au Azure Blob PARAMETRAGE : KO")

#grant access
try :
    if creds == None and __next_step__:
        blob_client = blobparam_service_client.get_blob_client(container=container_param,
                                                                  blob=gmb_token_file)        
        creds = pickle.loads(blob_client.download_blob().readall())
        if not creds or not creds.valid:
          if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
            print("Credentials Refresh : OK")
        elif creds.valid:
          creds.refresh(Request())
          print("Credentials Refresh : OK")

except:
    __next_step__ = False
    print("Credentials Refresh : KO")

#check credentials& build connection to GMB
if creds.valid and  __next_step__ :
    try :
#       service, flags = sample_tools.init("mybusinessaccountmanagement", "v1", __doc__, __file__, scope="https://www.googleapis.com/auth/business.manage")
        service = build('mybusinessaccountmanagement', 'v1', credentials=creds, discoveryServiceUrl = discovery_uri_manag, static_discovery=False) #https://github.com/googleapis/google-api-python-client/issues/1225
      
#        service = build('mybusiness', 'v4', credentials=creds, discoveryServiceUrl = discovery_uri, static_discovery=False) #https://github.com/googleapis/google-api-python-client/issues/1225
        print("Connected to GMB - " + str(datetime.datetime.now()))   

    except Exception as ex :
        print("error")
        print("Exception - " + str(ex))
        __next_step__ = False
        print("Ending without connection to GMB - " + str(datetime.datetime.now()))

In [None]:
creds.valid

### 4 - Calling GMB

#### 4A- Listing accounts

In [None]:
if get_accounts and  __next_step__  :
    params = {}
    all_accounts = []
    page_token = None
    while True:
        try:
            if page_token:
                params['pageToken'] = page_token
            current_page = service.accounts().list(**params).execute()
            page_token = current_page.get('nextPageToken')
            all_accounts.extend(current_page['accounts'])
    
            if not page_token:
                break
        except Exception as error:
            print('An error occurred: %s' % error)
            break
    
    df_acc = pd.DataFrame(all_accounts)
    print("Accounts extracted - " + str(datetime.datetime.now()))

In [None]:
# discovery_uri_test = "https://mybusinessbusinessinformation.googleapis.com/$discovery/rest?version=v1"
# # discovery_uri_test = "https://mybusinessaccountmanagement.googleapis.com/$discovery/rest?version=v1"
# service = build('mybusinessbusinessinformation', 'v1', credentials=creds, discoveryServiceUrl = discovery_uri_test, static_discovery=False)

In [None]:
# service.accounts().locations().list(parent="accounts/116116273817792355878", readMask="storeCode,name").execute()

#### 4B- Listing places

In [None]:
service = build('mybusinessbusinessinformation', 'v1', credentials=creds, discoveryServiceUrl = discovery_uri_info, static_discovery=False)
if get_locations and  __next_step__ :
    all_locations = []
    df_loc = pd.DataFrame()
    for acc in account_list:
        params = {'parent' : acc, "readMask": "storeCode,name"}    
        page_token = None
        while True:
            try:
                if page_token:
                    params['pageToken'] = page_token
                current_page = service.accounts().locations().list(**params).execute()
                page_token = current_page.get('nextPageToken')
#                 all_locations.extend(current_page["locations"])
                current_locations = pd.DataFrame(current_page["locations"])
                current_locations["name"] = acc + "/" + current_locations["name"]
                df_loc = pd.concat([df_loc, current_locations])
    
                if not page_token:
                    break
            except Exception as error:
                __next_step__ = False
                print('Location extraction : KO')
                print('An error occurred: %s' % error)
                break
    
#     df_loc = pd.DataFrame(all_locations)
    print("Locations extracted - " + str(datetime.datetime.now()))
    
#enrich gmb places with Total internal infos
if get_locations and __next_step__ :
    blob_client = blobparam_service_client.get_blob_client(container='customervoice-param',
                                                  blob="station_list.csv")
    my_string = str( blob_client.download_blob().readall() ,'latin-1')
    data = StringIO(my_string) 
    df_stations = pd.read_csv(data, sep = ";")
    df_stations = df_stations[['Store ID','Country','Country name','REGION','MANAGEMENT MODE','Location Name']]
    
if get_locations:
  try:
    df_stations = df_stations.merge(df_loc[['storeCode','name']],
                                        left_on = 'Store ID',
                                        right_on = 'storeCode',
                                        how = 'right')
    del df_stations['Store ID']
    print('Locations enriched at: ' + str(datetime.datetime.now()) )
  except:
    print('Eror in enriching locations, df_loc may not exists')  

In [None]:
# get the last locations file
if not get_locations and get_reviews :
  try:
    blobstock_service_client = BlobServiceClient.from_connection_string(blob_stock)
    print("Connexion au Blob STOCKAGE : OK")
  except:
    __next_step__ = False
    print("Connexion au Azure Blob STOCKAGE : KO")
    
  blob_client = blobstock_service_client.get_blob_client(container=container_gmb_ref,
                                                  blob="station_enrich.csv")
  my_string = str( blob_client.download_blob().readall() ,'utf-8')
  data = StringIO(my_string) 
  df_stations = pd.read_csv(data, sep = ";")
  print('Location list taken from an existing file at: ' + str(datetime.datetime.now()) )  

#### 4C- Listing reviews

In [None]:
service = build('mybusiness', 'v4', credentials=creds, discoveryServiceUrl = discovery_uri, static_discovery=False)
print("Review extract")
if get_reviews and  __next_step__ :
    print("Start extracting reviews - " + str(datetime.datetime.now()))
    all_reviews = []
    for loc in df_loc['name']:
        params = {}
        page_token = None
    
        while True:
            try:
                if page_token:
                    params['pageToken'] = page_token
                current_page = service.accounts().locations().reviews().list(**params, parent = loc).execute()
                page_token = current_page.get('nextPageToken')
                try:
                    all_reviews.extend(current_page['reviews'])
                except:
                    break
            
                if (not page_token) | (pd.DataFrame(current_page['reviews']).createTime.min()[:10]<date_ref)   :
                    break

            except Exception as error:
                print('Reviews extraction error')
                print(loc + ' is not ready')
                print('An error occurred: %s' % error)
                __next_step__ = False
                break
        
    df_rev = pd.DataFrame(all_reviews)    
    print("reviews extracted - " + str(datetime.datetime.now()))
    df_rev = df_rev.loc[df_rev.createTime >= date_ref]
    df_rev.comment = df_rev.comment.fillna('')  
    df_rev['comment'] = df_rev.comment.apply(linesreturn_delete)
    print("nb of extracted reviews: " + str(len(df_rev)))

### 5 - Storing raw data

In [None]:
print("Start storing at - " + str(datetime.datetime.now()))

#connect to storing Blob
try :
    blobstock_service_client = BlobServiceClient.from_connection_string(blob_stock)
    print("Connecting to Blob STOCKAGE : OK")
except:
    __next_step__ = False
    print("Connecting to Blob STOCKAGE : KO")

#rewrite files
print("Azure Blob storage - upload files")

# accounts lists
blob_client = blobstock_service_client.get_blob_client(container=container_gmb_ref, blob="location_accounts.csv")
if get_accounts : 
    try :
      blob_client.delete_blob()
      print("location_accounts.csv -- deleted")
    except:
      print("first location_accounts.csv ")
    with BytesIO(df_acc.to_csv(sep="|", index=False).encode('utf-8')) as data:
        blob_client.upload_blob(data)
    print("location_accounts.csv -- writen")
    
# raw locations lists
if get_locations :
    blob_client = blobstock_service_client.get_blob_client(container=container_gmb_ref, blob="station_raw.csv")
    try :
      blob_client.delete_blob()
      print("station_raw.csv -- deleted")
    except:
      print("first station_raw.csv ")
    with BytesIO(df_loc.to_csv(sep="|", index=False).encode('utf-8')) as data:
        blob_client.upload_blob(data)
    print("station_raw.csv -- writen")
    # enrich locations lists
    blob_client = blobstock_service_client.get_blob_client(container=container_gmb_ref, blob="station_enrich.csv")
    try :
      blob_client.delete_blob()
      print("station_enrich.csv -- deleted")
    except:
      print("first station_enrich.csv ")
    with BytesIO(df_stations.to_csv(sep="|", index=False).encode('utf-8')) as data:
        blob_client.upload_blob(data)
    print("station_enrich.csv -- writen")
    
# raw reviews list
if get_reviews :
    blob_client = blobstock_service_client.get_blob_client(container=container_stock, blob=fl_original)
    with BytesIO(df_rev.to_csv(sep="|", index=False).encode('utf-8')) as data:
        blob_client.upload_blob(data)
    print(fl_original + " -- writen")
    
print("Storing ended at - " + str(datetime.datetime.now()))
__next_step__ = True

### 6 - Transforming and enriching reviews

#### 6A - Preprocessing

In [None]:
#preprocessing pour analyse services cognigifs
if __next_step__ :
  try:
    df_rev['RESPONSERECOMMANDATION'] = df_rev.starRating.apply(rating_transco)    
    df_rev['USERNAME'] = 'not GDPR compliant' #df_rev.reviewer.apply(extract_username) 
    df_rev['location'] = df_rev.name.apply(extract_location) 
    df_rev['SURVEYDATE'] = df_rev.createTime.apply(to_datetime) 
    df_rev['REFUELDATE'] = np.nan
    df_rev['USERID'] = 'not available'
    df_rev['SOURCE'] = 'Google Reviews'    
    
    df_rev['comment'] = df_rev.comment.apply(ggtranslation_delete)
    df_rev.comment = df_rev.comment.fillna('')
    
    df_rev = df_rev.merge(df_stations,
                            left_on = 'location',
                            right_on = 'name',
                            how = 'left' )

    df_rev = df_rev.rename(columns={'reviewId':'id', 'storeCode':'STATIONID', 'comment' : 'text','Location Name':'STORENAME'})
    df_rev = df_rev[['id','USERID','SOURCE','STATIONID','SURVEYDATE','REFUELDATE','RESPONSERECOMMANDATION','text','USERNAME','Country','Country name','REGION','MANAGEMENT MODE','STORENAME']]
    print('preprocessing OK - '  + str(datetime.datetime.now()) )
  except Exception as ex:
    print('preprocessing KO - '+ str(datetime.datetime.now()) )
    print('Exception:')
    print(ex)
    __next_step__ = False

#### 6B - Param cognitve services

In [None]:
### Text Translation
Region_Text_Translation = 'westeurope'
Host_Text_Translation = 'https://api.cognitive.microsofttranslator.com'
Path_Text_Translation = '/translate?api-version=3.0'
params = "&to=" + analysis_language
Url_Text_Translation = Host_Text_Translation + Path_Text_Translation + params 

### Text Analysis
Region_Text_Analysis = 'westeurope'
Host_Text_Analysis = 'https://'+Region_Text_Analysis+'.api.cognitive.microsoft.com/text/analytics/v2.0'

#### 6C - Enriching Reviews

In [None]:
if __next_step__ :
  print("start processing reviews - " + str(datetime.datetime.now()))
  try:
    df_feedbacks = TheVoice_multi_lines(df_rev,
                     Key_Text_Translation,
                     Region_Text_Translation,
                     Url_Text_Translation,
                     Key_Text_Analysis,
                     Region_Text_Analysis,
                     Host_Text_Analysis,
                     Key_LUIS, luis_apps,
                     connect_param)
    print("reviews processed - " + str(datetime.datetime.now()))
    df_feedbacks["id_unique"]= df_feedbacks.id 
    df_feedbacks.suggested = df_feedbacks.suggested.fillna('')
    df_feedbacks['suggested'] = df_feedbacks.suggested.apply(linesreturn_delete)
  except Exception as ex:
    print('reviews processing KO - '+ str(datetime.datetime.now()) )
    print('Exception:')
    print(ex)
    __next_step__ = False    

#### 6D - Storing results

In [None]:
if type(df_feedbacks) ==pd.DataFrame :
    ids_k = []
    ids_l = []  
    phrases = []
    intents = []
    scores = []  
    for i in range(len(df_feedbacks)):
        if str(df_feedbacks['keyPhrases'].iloc[i]) not in ['nan', 'NaN', '',' '] :
            id_unique = df_feedbacks['id_unique'].iloc[i]
            for elt in df_feedbacks['keyPhrases'].iloc[i]:
                phrases.append(elt)
                ids_k.append(id_unique)
        if str(df_feedbacks['LUIS'].iloc[i]) not in ['nan', 'NaN','',' '] :
            id_unique = df_feedbacks['id_unique'].iloc[i]
            for intent in eval(df_feedbacks['LUIS'].iloc[i]):
                intents.append(intent['intent'])
                scores.append(intent['score'])
                ids_l.append(id_unique)
    #fin d'alimentation des listes de keyphrase et de intentions
            
    df_luis = pd.DataFrame(data = {'id_unique': ids_l, 
                            'intent': intents, 
                            'score': scores})
    df_keyphrases = pd.DataFrame(data = {'id_unique': ids_k, 
                            'key_phrases': phrases })
    
    del [ids_k, ids_l, intents, scores, phrases, elt, intent, i, id_unique]
    

    try:
        blob_client = blobstock_service_client.get_blob_client(container=container_stock, blob=fl_fbacks)
        with BytesIO(df_feedbacks.to_csv(sep="|", index=False).encode('utf-8')) as data:
            blob_client.upload_blob(data)
        blob_client = blobstock_service_client.get_blob_client(container=container_stock, blob=fl_luis)
        with BytesIO(df_luis.to_csv(sep="|", index=False).encode('utf-8')) as data:
            blob_client.upload_blob(data)
        blob_client = blobstock_service_client.get_blob_client(container=container_stock, blob=fl_phrases)
        with BytesIO(df_keyphrases.to_csv(sep="|", index=False).encode('utf-8')) as data:
            blob_client.upload_blob(data)
        print("Storing completed - " + str(datetime.datetime.now()))
        
    except Exception as ex:
      print("Storing failled - " + str(datetime.datetime.now()))    
      print('Exception:')
      print(ex)

In [None]:
df_feedbacks.iloc[0]

In [None]:
df_feedbacks.head()

Unnamed: 0,id,USERID,SOURCE,STATIONID,SURVEYDATE,REFUELDATE,RESPONSERECOMMANDATION,text,USERNAME,Country,Country name,REGION,MANAGEMENT MODE,STORENAME,language,translation,analysed_lang,keyPhrases,Sentiment,Priorite,Priorite score,LUIS,suggested,id_unique
0,AbFvOqnH8NPwm0X7RB2Setz22nIDA9E5TQsI88M7gnRXzG...,not available,Google Reviews,NB005652,2021-03-06 06:22:09.122,,4,,not GDPR compliant,BE,Belgium,CODO/DODO Sud,CODO,WALCOURT,,,,,,,,,,AbFvOqnH8NPwm0X7RB2Setz22nIDA9E5TQsI88M7gnRXzG...
1,AbFvOqkyPY8aco5Sr85nLks1r69o3UMez_7LruBjuxbNvq...,not available,Google Reviews,NB004203,2021-03-04 10:08:46.203,,5,,not GDPR compliant,BE,Belgium,CODO/DODO Sud,DODO,MANHAY,,,,,,,,,,AbFvOqkyPY8aco5Sr85nLks1r69o3UMez_7LruBjuxbNvq...
2,AbFvOqmxvAURhwTmNO_37_GTs2LdBVT2Hv5Gurz9QfDUll...,not available,Google Reviews,NB005106,2021-03-06 20:38:36.259,,4,,not GDPR compliant,BE,Belgium,CODO/DODO Sud,DODO,BANDE HUBERTY,,,,,,,,,,AbFvOqmxvAURhwTmNO_37_GTs2LdBVT2Hv5Gurz9QfDUll...
3,AbFvOqkNKD2JKDqoFsjiq41BaD044n9xQ-JbEj_0XkEgTU...,not available,Google Reviews,NB002172,2021-03-07 22:29:17.383,,2,,not GDPR compliant,BE,Belgium,COCO PROX,COCO,RETIE,,,,,,,,,,AbFvOqkNKD2JKDqoFsjiq41BaD044n9xQ-JbEj_0XkEgTU...
4,AbFvOqkgsoBQcmlSqdKXAS1fWiJqrjxRiZruLMnN1KBDdC...,not available,Google Reviews,NB002172,2021-03-06 21:11:54.612,,5,riendelijke bediening,not GDPR compliant,BE,Belgium,COCO PROX,COCO,RETIE,,,,,,,,,,AbFvOqkgsoBQcmlSqdKXAS1fWiJqrjxRiZruLMnN1KBDdC...
