## Download PLA File

In [12]:
import ftplib
import pandas as pd
import urllib.parse
from datetime import date,timedelta
from ftplib import FTP
from io import StringIO
import paramiko
import os
import pkg.invalid_char_clean as invalid_clean
import pkg.kelkoo_link_transfer as kelkoo_transfer
import requests
import xml.etree.ElementTree as ET
from lxml import etree

In [13]:
adv_lst = ['samsung-uk','vodafone','homebase']

In [14]:
def gtin_processor(gtin_num):
    if gtin_num != gtin_num:
        print('Null Value')
    elif len(str(gtin_num))<13:
        gtin_num = '0'*(13-len(str(gtin_num)))+str(gtin_num)
    elif len(str(gtin_num))>13:
        print("This number is over 13 digits: " + str(gtin_num))
    return str(gtin_num)

In [15]:
# Updating the xml_to_tsv function to exclude tags that start with 'c:'

def updated_xml_to_tsv(xml_content, tsv_filename):
    # Parse the XML content
    root = etree.fromstring(xml_content.encode('utf-8'))

    # Define the namespaces for 'g:'
    namespaces = {
        'g': 'http://base.google.com/ns/1.0'
    }

    # Open the TSV file for writing
    with open(tsv_filename, 'w', encoding='utf_8_sig') as tsv_file:
        # Define the headers from the XML structure without 'c:'
        headers = [
            'id', 'title', 'description', 'link', 'image_link', 'availability',
            'price', 'google_product_category', 'product_type', 'brand', 'gtin',
            'identifier_exists', 'condition', 'age_group', 'color', 'gender',
            'material', 'pattern', 'size', 'custom_label_0', 'custom_label_1',
            'custom_label_2', 'shipping_country', 'shipping_service', 'shipping_price'
        ]
        tsv_file.write('\t'.join(headers) + '\n')

        # Iterate through each <item> in the XML
        for item in root.xpath('//item'):
            data = {
                'id': item.xpath('.//g:id/text()', namespaces=namespaces)[0] if item.xpath('.//g:id/text()', namespaces=namespaces) else "",
                'title': item.xpath('.//title/text()')[0] if item.xpath('.//title/text()') else "",
                'description': item.xpath('.//description/text()')[0] if item.xpath('.//description/text()') else "",
                'link': item.xpath('.//link/text()')[0] if item.xpath('.//link/text()') else "",
                'image_link': item.xpath('.//g:image_link/text()', namespaces=namespaces)[0] if item.xpath('.//g:image_link/text()', namespaces=namespaces) else "",
                'availability': item.xpath('.//g:availability/text()', namespaces=namespaces)[0] if item.xpath('.//g:availability/text()', namespaces=namespaces) else "",
                'price': item.xpath('.//g:price/text()', namespaces=namespaces)[0] if item.xpath('.//g:price/text()', namespaces=namespaces) else "",
                'google_product_category': item.xpath('.//g:google_product_category/text()', namespaces=namespaces)[0] if item.xpath('.//g:google_product_category/text()', namespaces=namespaces) else "",
                'product_type': item.xpath('.//g:product_type/text()', namespaces=namespaces)[0] if item.xpath('.//g:product_type/text()', namespaces=namespaces) else "",
                'brand': item.xpath('.//g:brand/text()', namespaces=namespaces)[0] if item.xpath('.//g:brand/text()', namespaces=namespaces) else "",
                'gtin': item.xpath('.//g:gtin/text()', namespaces=namespaces)[0] if item.xpath('.//g:gtin/text()', namespaces=namespaces) else "",
                'identifier_exists': item.xpath('.//g:identifier_exists/text()', namespaces=namespaces)[0] if item.xpath('.//g:identifier_exists/text()', namespaces=namespaces) else "",
                'condition': item.xpath('.//g:condition/text()', namespaces=namespaces)[0] if item.xpath('.//g:condition/text()', namespaces=namespaces) else "",
                'age_group': item.xpath('.//g:age_group/text()', namespaces=namespaces)[0] if item.xpath('.//g:age_group/text()', namespaces=namespaces) else "",
                'color': item.xpath('.//g:color/text()', namespaces=namespaces)[0] if item.xpath('.//g:color/text()', namespaces=namespaces) else "",
                'gender': item.xpath('.//g:gender/text()', namespaces=namespaces)[0] if item.xpath('.//g:gender/text()', namespaces=namespaces) else "",
                'material': item.xpath('.//g:material/text()', namespaces=namespaces)[0] if item.xpath('.//g:material/text()', namespaces=namespaces) else "",
                'pattern': item.xpath('.//g:pattern/text()', namespaces=namespaces)[0] if item.xpath('.//g:pattern/text()', namespaces=namespaces) else "",
                'size': item.xpath('.//g:size/text()', namespaces=namespaces)[0] if item.xpath('.//g:size/text()', namespaces=namespaces) else "",
                'custom_label_0': item.xpath('.//g:custom_label_0/text()', namespaces=namespaces)[0] if item.xpath('.//g:custom_label_0/text()', namespaces=namespaces) else "",
                'custom_label_1': item.xpath('.//g:custom_label_1/text()', namespaces=namespaces)[0] if item.xpath('.//g:custom_label_1/text()', namespaces=namespaces) else "",
                'custom_label_2': item.xpath('.//g:custom_label_2/text()', namespaces=namespaces)[0] if item.xpath('.//g:custom_label_2/text()', namespaces=namespaces) else "",
                'shipping_country': item.xpath('.//g:shipping/g:country/text()', namespaces=namespaces)[0] if item.xpath('.//g:shipping/g:country/text()', namespaces=namespaces) else "",
                'shipping_service': item.xpath('.//g:shipping/g:service/text()', namespaces=namespaces)[0] if item.xpath('.//g:shipping/g:service/text()', namespaces=namespaces) else "",
                'shipping_price': item.xpath('.//g:shipping/g:price/text()', namespaces=namespaces)[0] if item.xpath('.//g:shipping/g:price/text()', namespaces=namespaces) else ""
            }

            # Write the extracted data to the TSV file
            tsv_file.write('\t'.join([data[field] for field in headers]) + '\n')

# The function has been updated to exclude tags that start with 'c:'
# "Function updated_xml_to_tsv defined successfully."


In [16]:
for i in adv_lst:
    if not os.path.exists(i):
        os.mkdir(i)
    if not os.path.exists('log'):
            os.mkdir('log')
            
    if i == 'samsung-uk':
        lst = open(i+'-ftp.txt','r').readlines()
        username = lst[0].split(':')[1].split('\n')[0]
        password = lst[1].split(':')[1]
        path = './'+i+'/'
        file = (path+'samsung_pla_copy-'+str(date.today()) +'.tsv')
        #check if exists
        if os.path.exists(file):
            print(i+' PLA file is already downloaded')
            continue
            
        ftp = ftplib.FTP('ftp2.feedonomics.com', username, password)
        
        with open(file, "wb") as f:
            ftp.retrbinary('RETR '+ 'exports/samsung_pla_copy.txt', f.write)
            print('samsung_uk_'+str(date.today())+'_PLA file has been downloaded successfully')
            
        fi = invalid_clean.MyClass()
        lst = fi.readlogfile(file)
        
        with open('./log/'+i+'_'+str(date.today())+'_PLA_invalid_char_removal.log','w',encoding = 'utf_8_sig') as f:
            for j in lst:
                f.writelines(j+'\n')
        
        out = open(file.replace('.tsv','_cleaned.tsv'),'w')
        for line in fi._data_bytes: 
            out.write(line.replace('\r','')) 
        out.close()
        
    elif i == 'vodafone':
        path = './'+i+'/'
        file = (path+'google_pla_vodafone_consumer_'+str(date.today()) +'.tsv')
        #check if exists
        if os.path.exists(file):
            print(i+' PLA file is already downloaded')
            continue
        
#         res = requests.get('http://livedata.bigupdata.co.uk/google_pla_vodafone_consumer.txt')  
        res = requests.get('https://feed-download.bigupdata.co.uk/download/?lnk=0ef2b49a301a46c29ff0acf5a913ad75')  
         
        with open (file,'w',encoding='utf_8_sig') as f:
            f.writelines(res.text)
        fi = invalid_clean.MyClass()
        lst = fi.readlogfile(file)
        with open('./log/'+i+'_'+str(date.today())+'_PLA_invalid_char_removal.log','w',encoding = 'utf_8_sig') as f:
            for j in lst:
                f.writelines(j+'\n')
        print('vodafone_'+str(date.today())+'_PLA file has been downloaded successfully')
        out = open(file.replace('.tsv','_cleaned.tsv'),'w')
        for line in fi._data_bytes: 
            out.write(line.replace('\r','')) 
        out.close()
        
    elif i == 'homebase':
        path = './'+i+'/'
        file = (path+'google_pla_homebase_consumer_'+str(date.today()) +'.tsv')
        #check if exists
        if os.path.exists(file):
            print(i+' PLA file is already downloaded')
            continue
        
#         res = requests.get('http://livedata.bigupdata.co.uk/google_pla_vodafone_consumer.txt')
        res = requests.get('https://s2.feedhero.net/output_feeds/gb/homebase_gb/194c1b066786e88bc5f6cc2633623e5b/latest.xml')  
         
        tsv_filename = file
        updated_xml_to_tsv(res.text, tsv_filename)
        
        fi = invalid_clean.MyClass()
        lst = fi.readlogfile(file)
        with open('./log/'+i+'_'+str(date.today())+'_PLA_invalid_char_removal.log','w',encoding = 'utf_8_sig') as f:
            for j in lst:
                f.writelines(j+'\n')
        print('homebase_'+str(date.today())+'_PLA file has been downloaded successfully')
        out = open(file.replace('.tsv','_cleaned.tsv'),'w')
        for line in fi._data_bytes: 
            out.write(line.replace('\r','')) 
        out.close()
        
#     else:
#         # Load raw PLA file from ftp
#         lst = open(i+'-ftp.txt','r').readlines()
#         username = lst[0].split(':')[1].split('\n')[0]
#         password = lst[1].split(':')[1]

#         if os.path.exists(localpath):
#             print(i+' PLA file is already downloaded')
#             continue
#         # Open a transport
#         host,port = "ftp.admarketplace.net",8022
#         transport = paramiko.Transport((host,port))
#         # Auth    
#         username,password = username,password
#         transport.connect(None,username,password)
#         # Go!    
#         sftp = paramiko.SFTPClient.from_transport(transport)
#         # Download
#         sftp.get(filepath,localpath)
#         print(i+'_'+str(date.today())+'_PLA file has been downloaded successfully')
#         if sftp: sftp.close()
#         if transport: transport.close()

#         fi = invalid_clean.MyClass()
#         lst = fi.readlogfile(localpath)
        
#         with open('./log/'+i+'_'+str(date.today())+'_PLA_invalid_char_removal.log','w',encoding = 'utf_8_sig') as f:
#             for j in lst:
#                 f.writelines(j+'\n')
        
#         out = open(localpath.replace('.csv','_cleaned.csv'),'w')
#         for line in fi._data_bytes: 
#             out.write(line.replace('\r','')) 
#         out.close()

samsung_uk_2024-11-18_PLA file has been downloaded successfully
vodafone_2024-11-18_PLA file has been downloaded successfully
homebase_2024-11-18_PLA file has been downloaded successfully


## Kelkoo PLA Transfer & Upload

### Transfer

In [17]:
def read_file(adv):
    files = os.listdir(adv)
    print(files)
    
    for file in files:
        if str(date.today()) in file and 'cleaned' in file:
            path = file
            print(path)
            
    if '.tsv' in path or 'txt' in path:
        df = pd.read_csv('./'+adv+'/'+path,sep='\t',encoding='utf_8_sig', dtype = str)
    else:
        df = pd.read_csv('./'+adv+'/'+path,encoding='utf_8_sig', dtype = str)
#         df = pd.read_csv('./'+adv+'/'+path,encoding='utf_8_sig', dtype = str).drop('Unnamed: 0',1)
    df.columns = [x.lower() for x in list(df.columns)]
    if 'unnamed: 0' in df.columns:
        df = df.drop('unnamed: 0',1)
    return df

In [18]:
for adv in adv_lst:
    DL_url = []
    DL_mobile_url = []
    df = read_file(adv)
    
#     if df is None:
#         print(f"Error: No data returned for {adv}")
#     continue
    
    # Filter 'Out of stock' assetsFcat
    availability_col = 'availability' if 'availability' in df.columns else 'Availability'
    product_url_col = 'product url' if 'product url' in df.columns else 'link'
    mobile_link_col = 'mobile link' if 'mobile link' in df.columns else 'mobile_link'
    
    df_in_stock = df[df[availability_col] != 'Out of Stock']
    df_in_stock = df_in_stock[~df_in_stock[product_url_col].isnull()].reset_index(drop=True)
    
    # Gtin handle
    if adv != 'samsung-uk' and 'gtin' in df_in_stock.columns:
        df_in_stock['gtin'] = [str(gtin_processor(x)) for x in df_in_stock['gtin']]
        
    dic = {}  # This seems to be for mapping category with sub-values, but the logic is not clear in the provided code.
    
    root_url = 'https://publisher_name.ampxdirect.com/'
    
    if adv == 'samsung-uk':
        sub3 = str(adv.split('-')[-1]).replace('uk','gb')
        adv_name = str(adv.split('-')[0])
        DL_url = [kelkoo_transfer.gen_deeplink('kelkoo',root_url,cat,cu=url,fbu=url,sub1='googleshopping',sub2=adv_name,sub3=sub3,adv=adv_name,dic=dic) for cat,url in zip(df_in_stock['category'],df_in_stock['product url'])]
    
    elif adv =='vodafone':
        DL_url = [kelkoo_transfer.gen_deeplink('kelkoo',root_url,cat,cu=url,fbu=url,sub1='googleshopping',sub2=adv,sub3='gb',sub4='desktop',adv=adv,dic=dic) for cat,url in zip(df_in_stock['google_product_category'],df_in_stock['link'])]
#         DL_url = [kelkoo_transfer.gen_deeplink('kelkoo',root_url,cu=url,fbu=url,sub1='googleshopping',sub2=adv,sub3='gb',sub4='desktop',adv=adv,dic=dic) for url in df_in_stock['link']]
        #DL_mobile_url = [kelkoo_transfer.gen_deeplink('kelkoo',root_url,cat,cu=url,fbu=url,sub1='googleshopping',sub2=adv,sub3='gb',adv=adv,dic=dic) if url==url else '' for cat,url in zip(df_in_stock['google_product_category'],df_in_stock['mobile_link'])]

    else:
        try:
            DL_url = [kelkoo_transfer.gen_deeplink('kelkoo',root_url,cat,cu=url,fbu=url,sub1='googleshopping',sub2=adv,sub3='gb',adv=adv,dic=dic) if url==url else '' for cat,url in zip(df_in_stock['google_product_category'],df_in_stock['link'])]
        except:
            DL_url = [kelkoo_transfer.gen_deeplink('kelkoo',root_url,cat,cu=url,fbu=url,sub1='googleshopping',sub2=adv,sub3='gb',adv=adv,dic=dic) if url==url else '' for cat,url in zip(df_in_stock['google product category'],df_in_stock['link'])]
        try:
            DL_mobile_url = [kelkoo_transfer.gen_deeplink('kelkoo',root_url,cat,cu=url,fbu=url,sub1='googleshopping',sub2=adv,sub3='us',adv=adv,dic=dic) if url==url else '' for cat,url in zip(df_in_stock['google_product_category'],df_in_stock['mobile_link'])]
        except:
            try:
                DL_mobile_url = [kelkoo_transfer.gen_deeplink('kelkoo',root_url,cat,cu=url,fbu=url,sub1='googleshopping',sub2=adv,sub3='us',adv=adv,dic=dic) if url==url else '' for cat,url in zip(df_in_stock['google product category'],df_in_stock['mobile link'])]
            except:
                print(adv,'has no mobile link column')
    # ... (similar logic for deeplinks as you provided, but now use the product_url_col and mobile_link_col variables)
    
    # Assign deeplink value to the dataframe
    df_in_stock[product_url_col] = pd.DataFrame(DL_url)
    if mobile_link_col in df_in_stock.columns:
        df_in_stock[mobile_link_col] = pd.DataFrame(DL_mobile_url)

    df_final = df_in_stock[df_in_stock[product_url_col] != ''].reset_index(drop=True)
    df_final = df_final.fillna('NULL')
    
    # Handle sale price
    sale_price_col = 'sale_price' if 'sale_price' in df_final.columns else 'sale price'
    if sale_price_col in df_final.columns:
        df_final[sale_price_col] = [i if y == 'NULL' else y for i, y in zip(df_final['price'], df_final[sale_price_col])]
    else:
        print(f"No sale price column found for {adv}!")
    
    # Save csv file to the local drive
    path = f'./{adv}/'
    df_final.to_csv(path + f'kelkoo_{adv}_{date.today()}_PLA.csv', encoding='utf_8_sig', float_format=str)
    print(f"{adv} is transferred and saved!\n")


['kelkoo_samsung-uk_2024-10-15_PLA.csv', 'kelkoo_samsung-uk_2024-10-22_PLA.csv', 'kelkoo_samsung-uk_2024-10-28_PLA.csv', 'kelkoo_samsung-uk_2024-11-07_PLA.csv', 'samsung_pla_copy-2024-10-15.tsv', 'samsung_pla_copy-2024-10-15_cleaned.tsv', 'samsung_pla_copy-2024-10-22.tsv', 'samsung_pla_copy-2024-10-22_cleaned.tsv', 'samsung_pla_copy-2024-10-28.tsv', 'samsung_pla_copy-2024-10-28_cleaned.tsv', 'samsung_pla_copy-2024-11-06.tsv', 'samsung_pla_copy-2024-11-06_cleaned.tsv', 'samsung_pla_copy-2024-11-07.tsv', 'samsung_pla_copy-2024-11-07_cleaned.tsv', 'samsung_pla_copy-2024-11-18.tsv', 'samsung_pla_copy-2024-11-18_cleaned.tsv']
samsung_pla_copy-2024-11-18_cleaned.tsv
samsung-uk is transferred and saved!

['Google _ Shopping _ Vodafone.txt', 'google_pla_vodafone_consumer_2024-10-15.tsv', 'google_pla_vodafone_consumer_2024-10-15_cleaned.tsv', 'google_pla_vodafone_consumer_2024-10-22.tsv', 'google_pla_vodafone_consumer_2024-10-22_cleaned.tsv', 'google_pla_vodafone_consumer_2024-10-28.tsv', 'goog

## Upload

In [19]:
# Change the directory to where the text file which stores ftp credentials

lst = open('kelkoo-ftp.txt','r').readlines()
username = lst[0].split(':')[1].split('\n')[0]
password = lst[1].split(':')[1]
ftp = ftplib.FTP('ftpkelkoo.kelkoo.net', username, password)

In [20]:
ftp.dir('-t')

-rw-r--r--    1 12668    502      36831452 Nov 07 15:27 AMP_kelkoo_homebase_US.csv
-rw-r--r--    1 12668    502        971471 Nov 07 15:26 AMP_kelkoo_vodafone_UK.csv
-rw-r--r--    1 12668    502       3502104 Nov 07 15:26 AMP_kelkoo_samsung_UK.csv
-rw-r--r--    1 12668    502      38485125 Aug 15  2023 AMP_kelkoo_CalvinKlein_GB.csv
-rw-r--r--    1 12668    502       1503167 Aug 15  2023 AMP_kelkoo_TheBodyShop_GB.csv
-rw-r--r--    1 12668    502        288053 Jul 27  2023 AMP_kelkoo_verizon_US.csv
-rw-r--r--    1 12668    502        583638 Jul 27  2023 AMP_kelkoo_casper_US.csv
-rw-r--r--    1 12668    502       9579740 Jul 27  2023 AMP_kelkoo_reebok_US.csv
-rw-r--r--    1 12668    502      30843559 May 25  2023 AMP_kelkoo_homebase_UK.csv
-rw-r--r--    1 12668    502      31413179 Feb 07  2023 AMP_kelkoo_underarmour_UK.csv
-rw-r--r--    1 12668    502      35221872 Nov 14  2022 AMP_kelkoo_boots_UK.csv
-rw-r--r--    1 12668    502          5171 Aug 08  2022 AMP_kelkoo_voxi_UK.csv
-rw-r--r

In [21]:
for adv in adv_lst: 
#     if 'levis' in adv:
#         path = './levis/'+adv+'/'
#         file_name = path + 'kelkoo_'+adv + '_' + str(date.today())+'_PLA.csv'
#     else:
    path = './'+adv+'/'
    file_name = path + 'kelkoo_'+adv + '_' + str(date.today())+'_PLA.csv'
    print(file_name)
    if adv == 'vodafone':
        with open(file_name, "rb") as f: 
#             print('AMP_kelkoo_'+adv+'_UK.csv')
            ftp.storbinary('STOR ' + 'AMP_kelkoo_'+adv+'_UK.csv', f)
#         with open(file_name.replace('vodafone_','vodafone_payg_'), "rb") as f:
#             ftp.storbinary('STOR ' + 'AMP_kelkoo_'+adv+'payg_UK.csv', f)
    elif 'samsung-uk' in adv:
        with open(file_name, "rb") as f:
            ftp.storbinary('STOR ' + 'AMP_kelkoo_samsung_'+str(adv.split('-')[-1]).upper()+'.csv', f)
#     elif 'levis' in adv:
#         with open(file_name, "rb") as f:
# #             print('AMP_kelkoo_levis_'+str(adv.split('-')[-1]).upper()+'.csv')
#             ftp.storbinary('STOR ' + 'AMP_kelkoo_levis_'+str(adv.split('-')[-1]).upper()+'.csv', f)
    else:
        with open(file_name, "rb") as f:
#             print('AMP_kelkoo_'+adv+'_US.csv')
            ftp.storbinary('STOR ' + 'AMP_kelkoo_'+adv+'_US.csv', f)

    print(adv+'_'+str(date.today())+'_PLA.csv has been uploaded successfully')

./samsung-uk/kelkoo_samsung-uk_2024-11-18_PLA.csv
samsung-uk_2024-11-18_PLA.csv has been uploaded successfully
./vodafone/kelkoo_vodafone_2024-11-18_PLA.csv
vodafone_2024-11-18_PLA.csv has been uploaded successfully
./homebase/kelkoo_homebase_2024-11-18_PLA.csv
homebase_2024-11-18_PLA.csv has been uploaded successfully


In [22]:
ftp.dir('-t')

-rw-r--r--    1 12668    502      36357082 Nov 18 15:13 AMP_kelkoo_homebase_US.csv
-rw-r--r--    1 12668    502        957376 Nov 18 15:12 AMP_kelkoo_vodafone_UK.csv
-rw-r--r--    1 12668    502       3502104 Nov 18 15:12 AMP_kelkoo_samsung_UK.csv
-rw-r--r--    1 12668    502      38485125 Aug 15  2023 AMP_kelkoo_CalvinKlein_GB.csv
-rw-r--r--    1 12668    502       1503167 Aug 15  2023 AMP_kelkoo_TheBodyShop_GB.csv
-rw-r--r--    1 12668    502        288053 Jul 27  2023 AMP_kelkoo_verizon_US.csv
-rw-r--r--    1 12668    502        583638 Jul 27  2023 AMP_kelkoo_casper_US.csv
-rw-r--r--    1 12668    502       9579740 Jul 27  2023 AMP_kelkoo_reebok_US.csv
-rw-r--r--    1 12668    502      30843559 May 25  2023 AMP_kelkoo_homebase_UK.csv
-rw-r--r--    1 12668    502      31413179 Feb 07  2023 AMP_kelkoo_underarmour_UK.csv
-rw-r--r--    1 12668    502      35221872 Nov 14  2022 AMP_kelkoo_boots_UK.csv
-rw-r--r--    1 12668    502          5171 Aug 08  2022 AMP_kelkoo_voxi_UK.csv
-rw-r--r