# Create Layer Config Backup

This notebook outlines how to run a process to create a remote backup of gfw layers.

Rough process:

- Run this notebook from the `gfw/data` folder
- Wait...
- Check `_metadata.json` files in the `production` and `staging` folders for changes
- If everything looks good, make a PR

First, install the latest version of LMIPy

In [1]:
!pip install LMIPy

from IPython.display import clear_output
clear_output()

print('LMI ready!')

LMI ready!


Next, import relevant modules

In [10]:
import LMIPy as lmi

import os
import json
import shutil

from pprint import pprint
from datetime import datetime
from tqdm import tqdm

First, pull the gfw repo and check that the following path correctly finds the `data/layers` folder, inside which, you should find a `production` and `staging` folder.

In [3]:
envs = ['staging', 'production']

In [4]:
path = './backup/configs'

In [5]:
# Create directory and archive previous datasets
with open(path + '/metadata.json') as f:
    date = json.load(f)[0]['updatedAt']
    
shutil.make_archive(f'./backup/archived/archive_{date}', 'zip', path)

'/Users/vizzuality/Workspace/gfw/data/backup/archived/archive_2019-06-21@10h-06m-39s.zip'

In [6]:
# Check correct folders are found

if not all([folder in os.listdir(path) for folder in envs]):
    print(f'Boo! Incorrect path: {path}')
else:
    print('Good to go!')

Good to go!


Run the following to save, build `.json` files and log changes.

## Update record

In [7]:
%%time
for env in envs:
    
    # Get all old ids
    old_ids = [file.split('.json')[0] for file in os.listdir(path + f'/{env}') if '_metadata' not in file]
    
    old_datasets = []
    files = os.listdir(path + f'/{env}')
    
    # Extract all old datasets
    for file in files:
        if '_metadata' not in file:
            with open(path + f'/{env}/{file}') as f:
                old_datasets.append(json.load(f))
    
    # Now pull all current gfw datasets and save
    col = lmi.Collection(app=['gfw'], env=env)
    col.save(path + f'/{env}')
    
    # Get all new ids
    new_ids = [file.split('.json')[0] for file in os.listdir(path + f'/{env}') if '_metadata' not in file]
    
    # See which are new, and which have been removed
    added = list(set(new_ids) - set(old_ids))
    removed = list(set(old_ids) - set(new_ids))
    changed = []
    
    # COmpare old and new, logging those that have changed
    for old_dataset in old_datasets:
        ds_id = old_dataset['id']
        old_ids.append(ds_id)
        with open(path + f'/{env}/{ds_id}.json') as f:
                new_dataset = json.load(f)
        
        if old_dataset != new_dataset:
            changed.append(ds_id)
    
    # Create metadata json
    with open(path + f'/{env}/_metadata.json', 'w') as f:
        
        meta = {
            'updatedAt': datetime.today().strftime('%Y-%m-%d@%Hh-%Mm-%Ss'),
            'env': env,
            'differences': {
                'changed': changed,
                'added': added,
                'removed': removed
            }
        }
        
        # And save it too!
        json.dump(meta,f)
        
print('Done!')


  0%|          | 0/43 [00:00<?, ?it/s]

Saving to path: ./backup/configs/staging


100%|██████████| 43/43 [02:50<00:00,  3.97s/it]


Save complete!



  0%|          | 0/493 [00:00<?, ?it/s]

Saving to path: ./backup/configs/production


100%|██████████| 493/493 [28:07<00:00,  3.42s/it]

Save complete!
Done!
CPU times: user 23.7 s, sys: 2.05 s, total: 25.7 s
Wall time: 31min 8s





In [14]:
# Generate rich metadata

metadata = []
for env in tqdm(envs):
    with open(path + f'/{env}/_metadata.json') as f:
        metadata.append(json.load(f))
        
for env in tqdm(metadata):
    for change_type, ds_list in env['differences'].items():
        tmp = []
        for dataset in ds_list:
            # generate Dataset entity to get name etc...
            print(dataset)
            tmp.append(str(lmi.Dataset(dataset)))
        env['differences'][change_type] = tmp
        
with open(path + f'/metadata.json', 'w') as f:
        
        # And save it too!
        json.dump(metadata,f)





100%|██████████| 2/2 [00:00<00:00, 1080.03it/s][A




  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A

09557278-4449-41ba-8f56-535c98f7489f
cc7a506e-b2f4-4f3f-8852-a1c5f7d19e8e
6dc39298-7db4-415e-b0f5-531a1f198629
f1b4b202-5a2a-421e-9a12-54888e86e140
ac739f3d-8d36-4ff7-90cf-6243f67a95a7
2831761c-86d8-4a5c-8bba-76c4f5211cc5
bc099e2e-0292-4e22-9445-9d428ecf7a56
0366e6d1-3d1e-4758-9703-f8ca57d1a894
79bc3d81-2326-4513-9d4f-3ed6f5003231
603eace3-6e9e-4c85-ad30-936007b40321
0e251685-9d1f-4641-95f4-d401810a9ad3
77446273-5fd3-4d7a-9811-1b0986772328
e11856c6-9ebd-46ba-a651-5b38f1ccf55a
24363416-2ebc-4e71-9514-d831a1b4de4d
1580b62a-35ac-458b-bbdf-5f658920faf0
1e24c6d9-6735-45ad-b2d3-53419af35eb3
d6a0de48-b00b-48b9-b41d-abac3203f990
1ddacdc5-b05e-4ba0-91e6-9b99c66f70b2
f7b77b94-b48a-4acf-9fcc-5e677648586f
f84f8889-6bb7-494d-b95f-c0b90d798cbf
1b5d6aeb-8ad4-45da-8b46-06c489b2c54b
8959e698-48b9-403b-a9d1-d4429cc3f5a2
9e494e8d-808f-4179-aed8-edf753738997
c461cc5b-2eaf-4a66-8f85-2b5c47514ed4






 50%|█████     | 1/2 [00:23<00:23, 23.71s/it][A[A[A[A

63e88e53-0a88-416e-9532-fa06f703d435
098b33df-6871-4e53-a5ff-b56a7d989f9a
3d170908-043f-49db-b26b-9e9bfaaa40ce
461e6f3f-c03c-40b2-8a40-47d1354c93bf
a20e9c0e-8d7d-422f-90f5-3b9bca355aaf
01e90557-91f1-4da2-a810-a1bdd38e7824
a705fce9-601c-455c-b97b-6237da5cedba
391ca96d-303f-4aef-be4b-9cdb4856832c
044f4af8-be72-4999-b7dd-13434fc4a394
93e67a77-1a31-4d04-a75d-86a4d6e35d54
c7a1d922-e320-4e92-8e4c-11ea33dd6e35
e663eb09-04de-4f39-b871-35c6c2ed10b5
ff289906-aa83-4a89-bba0-562edd8c16c6
428db321-5ebb-4e86-a3df-32c63b6d3c83
9b26177b-1a28-4078-a4b9-8267ac4df669
4145f642-5455-4414-b214-58ad39b83e1e
c36c3108-2581-4b68-852a-c929fc758001
5bc5cd49-706f-409c-b10d-77fdfecb010f
9cd1da2d-ab39-4fd9-9487-beea1d56dbac
134caa0a-21f7-451d-a7fe-30db31a424aa
7cc6ac21-c8ef-4dd8-a181-8967721a15a4
85f82851-e16e-4126-a630-93bb63d4ef42
916022a9-2802-4cc6-a0f2-a77f81dd0c09
9c0dfd21-53dd-40a2-9239-6cf292bd80c0
b67fc529-af07-4443-85a9-24b5cf6f2eae
f56a1761-d6be-40ec-9cd3-df16d3588480
9b9e56fc-270e-486d-8db5-e0a839c9a1a9
3

d6b8df79-252b-4c98-8ed2-8b0477826051
ef8fdb2f-bc5e-4423-b6c7-c5713480212c
5fa6b464-4777-49e8-a663-5907eb668927
39e02031-5214-4ff1-806f-23f4485ab2ff
f442c025-ff77-4891-ab3a-a071fe84dad1
53b6cec6-8b8e-4803-b633-80f163f56fe2
998dd97a-389f-4a02-988f-17b184f507ac
b94be15a-782a-416b-be4f-9c1992fa5843
7332c519-5cc3-4d61-82a8-9e6ef93e7e1c
0edea763-4982-4c2c-a8f2-dc2eb13ae221
9182b2e4-fa72-4c79-b2f1-6acb9dd2bbc1
6e7577cb-8b64-49de-b940-0f3315dbcb73
2e613a18-09d0-4870-b678-7199a9de9c5f
7666154c-77fe-4d02-b8e3-dada7193340f
8537210d-7c96-45c6-a90d-cbdcd762bc18
27fc90d2-35fc-4893-a3cb-87fcd55d8914
633fcf4a-e757-4a6d-89c9-e611bc555ce5
724293e5-8eac-4b7e-b7fb-e190db2633a0
731428c4-fe29-4a51-bee5-857a68784e48
bb7c0ba7-b154-4276-b65a-506f672742d7
6f547d49-f5bc-47d4-a33e-f0e13ba9d1f3
ebbd3c15-41cb-43ad-89a3-ff26e456cebe
8c4b080d-8779-4cb2-a7de-f4d23e2f29d1
8a3af91b-fbc6-4aaf-9f25-5fe6218fadd3
269af948-fc1c-40d7-823b-35bdb75c67ad
ef91cab3-92ba-4bf4-bb3e-edd526529be2
cd5c83e9-d407-491b-a41c-21a72bda9109
d





100%|██████████| 2/2 [04:29<00:00, 134.82s/it][A[A[A[A


In [15]:
pprint(metadata)

[{'differences': {'added': ['Dataset 09557278-4449-41ba-8f56-535c98f7489f '
                            '[Test] Wood fiber concessions (LM v3) (v20200725)',
                            'Dataset cc7a506e-b2f4-4f3f-8852-a1c5f7d19e8e '
                            'Brazil land cover (LM v3) [REMOVE]',
                            'Dataset 6dc39298-7db4-415e-b0f5-531a1f198629 '
                            '[Test]VIIRS Vector Tiles (LM v3)',
                            'Dataset f1b4b202-5a2a-421e-9a12-54888e86e140 USA '
                            'Land Cover (LM v3) [REMOVE]',
                            'Dataset ac739f3d-8d36-4ff7-90cf-6243f67a95a7 '
                            'Gross Carbon Emissions',
                            'Dataset 2831761c-86d8-4a5c-8bba-76c4f5211cc5 Test '
                            'Dataset',
                            'Dataset bc099e2e-0292-4e22-9445-9d428ecf7a56 '
                            'Potential carbon sequestration rate (LM v3)',
                     