In [1]:
import xml.etree.ElementTree as ET
import re
GMG_ID_PATTERN = re.compile(r'/games/([\w.-]+)/')
products = {}

# https://community.pcgamingwiki.com/topic/2038-green-man-gaming-feeds/
# > https://s3.amazonaws.com/gmg-epilive/US%20Dollar.xml
# > https://s3.amazonaws.com/gmg-epilive/Sterling.xml
# > https://s3.amazonaws.com/gmg-epilive/Euro.xml
regions = {
    'USD': 'US Dollar.xml',
    'GBP': 'Sterling.xml',
    'EUR': 'Euro.xml',
}

for region_code, region_file in regions.items():
    region_file = '../data_gmg/' + region_file
    tree = ET.parse(region_file)
    root = tree.getroot()
    for product in root:
        data = {
            'id': None,
            'name': None,
            'developer': None,
            'publisher': None,
            'drm': None,
            'steam_id': None,
            'regions': [region_code],
        }

        for product_info in product:
            if product_info.tag == 'deep_link':
                m = re.search(GMG_ID_PATTERN, product_info.text)
                data['id'] = m.group(1) if m is not None else None
            elif product_info.tag == 'product_name':
                data['name'] = product_info.text
            elif product_info.tag == 'manufacturer':
                data['developer'] = product_info.text
            elif product_info.tag == 'brand':
                data['publisher'] = product_info.text
            elif product_info.tag == 'drm':
                data['drm'] = product_info.text
            elif product_info.tag == 'steamapp_id':
                data['steam_id'] = product_info.text if product_info.text != '0' else None
        
        data_old = products.get(data['id'])
        if data_old is not None:
            if data['name'] != data_old['name'] or \
               data['developer'] != data_old['developer'] or \
               data['publisher'] != data_old['publisher'] or \
               data['drm'] != data_old['drm'] or \
               data['steam_id'] != data_old['steam_id']:
                print('OwO, something went horribly wrong')
                print(product)
            data['regions'] = data_old['regions'] + data['regions']
        products[data['id']] = data

In [2]:
import pickle
pickle.dump(products, open('../data_gmg/gmg_feed_products.p', 'wb'))

In [3]:
region_stats = {1: 0, 2: 0, 3: 0}
for product in products.values():
    region_stats[len(product['regions'])] += 1
region_stats

{1: 36, 2: 20, 3: 6208}