In [96]:
import json

with open("products.json") as f:
    products = json.load(f)
products = products["products"]

old_products = {}
with open("products_old.json") as f:
    old = json.load(f)
    for p in old:
        if "salesUnitGtin" not in p:
            continue
        id = p["salesUnitGtin"]
        old_products[id] = p

In [104]:
def map_product(product):
    to_delete = [
        "availabilityNotificationActive",
        "countriesOfOrigin",
        "favorite",
        "frozenProduct",
        "icons",
        "inOrderTemplate",
        "lowestPriceInComparison",
        "crossSellingProducts",
        "myAssortment",
        "netPriceFound",
        "novelty",
        "pikatukkuStock",
        "pikatukkuWebShopProduct",
        "stock",
        "taxPercentage",
        "taxPercentageCatalogPrice",
        "pk",
        "code",
        "priceWithTax",
        "baseUnit",
        "allowedLotSize",
        "comparisonPrice",
        "comparisonUnit",
        "purchasable",
        "campaign",
        "similarProducts",
        "sizeInBaseUnits",
        "sponsored",
        "unit",
        "url",
        "images",
        "name",
        "description",
        "discount",
        "strikeThroughPrice"
    ]
    # from products_old: marketingTexts, names
    try:
        matching_product = old_products[product["ean"]]["synkkaData"]
        if "marketingTexts" in matching_product and len(matching_product["marketingTexts"]) > 0:
            product["marketingTexts"] = matching_product["marketingTexts"]
        if "names" in matching_product:
            product["names"] = matching_product["names"]
    except KeyError:
        pass

    if isinstance(product["price"], dict):
        product["price"] = product["price"]["value"]
    for image in product.get("images", []):
        if image["format"] == "product":
            product["image"] = f'/product_images/{image["savedImage"]}'
            break

    if "categories" in product:
        new_categories = []
        for category in product["categories"]:
            if "dummyCategory" in category and category["dummyCategory"]:
                continue
            c = {}
            c["code"] = category["code"]
            c["name"] = category["name"]
            new_categories.append(c)
        product["categories"] = new_categories

    if "vendorProductCode" in product["vendor"]:
        del product["vendor"]["vendorProductCode"]
    for key in to_delete:
        if key in product:
            del product[key]
    return product


new_products = list(map(map_product, products))
new_products = [p for p in new_products if p is not None]
with open("products_cleaned.json", "w") as f:
    json.dump(new_products, f, indent=2)

In [98]:
new_products

[{'categories': [{'code': '1021',
    'dummyCategory': False,
    'level': 0,
    'name': 'Peeled root vegetables and onions'},
   {'code': '102',
    'dummyCategory': False,
    'level': 0,
    'name': 'Root vegetables'},
   {'code': '10',
    'dummyCategory': False,
    'level': 0,
    'name': 'Fruits and Vegetables'},
   {'code': '0', 'dummyCategory': False, 'level': 0, 'name': 'Front page'},
   {'code': 'salaattipoydan-tuotteet',
    'dummyCategory': False,
    'level': 0,
    'name': 'Products for salad table'},
   {'code': 'sadonkorjuu',
    'dummyCategory': False,
    'level': 0,
    'name': 'Harvest season'},
   {'code': 'satokausikalenteri-joulukuu',
    'dummyCategory': False,
    'level': 0,
    'name': "December's harvest season calendarÂ® "},
   {'code': 'hevi-tarjoukset',
    'dummyCategory': False,
    'level': 0,
    'name': 'December HeVi Deals'}],
  'ean': '6430021093356',
  'price': 8.35,
  'vendor': {'code': '2062', 'name': 'VÃ–RLÃ–N VIHANNES OY'},
  'id': '0fca1f99