<a href="https://colab.research.google.com/github/whrc/ARTS/blob/main/Tutorial/data_formatting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Metadata formatting for the ARTS data set

Heidi Rodenhizer, Yili Yang

Jan 2024

## Dependencies

In [None]:
%%capture
pip install git+https://github.com/whrc/ARTS.git

In [None]:
import uuid
import numpy as np
import pandas as pd
import geopandas as gpd
import warnings
import re
from datetime import datetime
import os
from ARTS import dataformatting
from os.path import dirname
from pathlib import Path

In [None]:
# if you are using Google Colab, use the following code to mount your Google Drive
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


## User-Defined Input

Before starting, copy your new shapefile into the input_data directory. It is preferred that your file use EPSG:3413 (WGS 84 / NSIDC Sea Ice Polar Stereographic North) as the coordinate system, but this script will perform the transformation if necessary.

Provide the file name to the data:

In [None]:
# RTS data set to be processed
your_rts_dataset_dir = "/content/drive/MyDrive/ARTS_dataset/mock_dataset/input_data/rts_dataset_test_polygons_new.geojson"

# ARTS main dataset to be appended
ARTS_main_dataset_dir = "/content/drive/MyDrive/ARTS_dataset/mock_dataset/input_data/rts_dataset_test_polygons_current.geojson"

# Metadata Description file
metadata_dir = (
    "/content/drive/MyDrive/ARTS_dataset/mock_dataset/Metadata_Format_Summary.csv"
)

Provide the names of any metadata fields in your new file that are not already in the official RTS Data Set (please check the list to ensure that the field has not been included previously) that you would like to be included in the compiled data set:

In [None]:
# Provide new metatdata fields as a list of the character column names. If there are no new fields, leave the code assigning an empty list.
# If your new file is a shapefile, also provide a list of the abbreviated names
# Example:
# new_fields = ['CustomColumn1', 'CustomColumn2']
# Shapefile example:
# new_fields_abbreviated = ['CstmCl1', 'CstmCl2']
new_fields = []

Have you already created RTS centroid columns, or would you like them to be created within this script? Provide either True, if the columns do not exist yet, or False, if you have already created them:

In [None]:
# Example:
# calculate_centroid = False
calculate_centroid = True

Would you like your formatted new data to be output in its own file (in which case you will email the file of new features to us to merge with the compiled data set) or appended the compiled dataset (in which case you will commit your updated file to your forked github repository and create a pull request to add the file to the official github repository). Your decision here should mostly be based on your comfort with github. If you have no idea what the second half of that sentence means, please opt for the separate file and email it to us.

In [None]:
# Example
# separate_file = True
separate_file = True

# Import Metadata Format Summary

In [None]:
metadata_format_summary = pd.read_csv(metadata_dir)

required_fields = list(
    metadata_format_summary[metadata_format_summary.Required == "True"].FieldName.values
)

generated_fields = list(
    metadata_format_summary[
        metadata_format_summary.Required == "Generated"
    ].FieldName.values
)

optional_fields = list(
    metadata_format_summary[
        metadata_format_summary.Required == "False"
    ].FieldName.values
)

all_fields = required_fields + generated_fields + optional_fields + new_fields

metadata_format_summary

Unnamed: 0,FieldName,Format,Required,Description
0,CentroidLat,Decimal Degrees,True,"Polygon centroid latitude in EPSG:4326, round ..."
1,CentroidLon,Decimal Degrees,True,"Polygon centroid longitude in EPSG:4326, round..."
2,RegionName,String,True,Name of the geographical region
3,CreatorLab,String,True,Data creator and associated organization
4,BaseMapDate,String,True,Date of base map used for RTS delineation in Y...
5,BaseMapSource,String,True,Name of the satellite sensor used for RTS deli...
6,BaseMapResolution,Number,True,Resolution of the imagery used for RTS delinea...
7,TrainClass,String,True,'Positive’ for genuine RTS and ‘Negative’ for ...
8,LabelType,String,True,"Type of digitisation, e.g. ‘Polygon’, ‘Boundin..."
9,MergedRTS,String,Generated,UIDs of intersecting RTS that merged into one RTS


# Read RTS datasets

In [None]:
ARTS_main_dataset = gpd.read_file(ARTS_main_dataset_dir).filter(
    items=required_fields + generated_fields + optional_fields + ["geometry"]
)

ARTS_main_dataset.ContributionDate = pd.to_datetime(ARTS_main_dataset.ContributionDate)

for field in required_fields:  # Check if all required columns are present
    if field not in ARTS_main_dataset.columns:
        raise ValueError(
            "{field} is missing. Has the RTS data set been modified since download?".format(
                field=repr(field)
            )
        )

ARTS_main_dataset

Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,MergedRTS,StabilizedRTS,ContributionDate,UID,Area,geometry
0,70.01668,68.33918,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,b4bae416-9fde-5d91-920d-731bcf042b2d,7581.395967,"POLYGON ((2007198.307 865988.469, 2007189.916 ..."
1,70.01622,68.33917,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,10f75ab9-2297-5b04-97ad-559b34fa020f,3621.349764,"POLYGON ((2007253.161 866032.001, 2007235.776 ..."
2,70.01648,68.33242,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,ff0d265e-385c-53c2-9c3a-28e885a220d2,1339.292585,"POLYGON ((2007310.378 865857.070, 2007340.692 ..."
3,70.0155,68.3295,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,3482.02968,"POLYGON ((2007453.557 865845.775, 2007456.723 ..."
4,70.01451,68.33296,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,7c64ad8e-07be-5ba5-8f97-19374f809af1,134.941981,"POLYGON ((2007514.965 865926.094, 2007508.132 ..."
5,70.01437,68.33493,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,e36abf57-ffb3-5c6a-be32-d8278f385a73,411.580601,"POLYGON ((2007496.803 865994.362, 2007488.866 ..."


In [None]:
processed_data = dataformatting.preprocessing(
    your_rts_dataset_dir,
    required_fields,
    optional_fields,
    new_fields,
    calculate_centroid,
)
processed_data


  new_data["CentroidLat"] = new_data.to_crs(4326).centroid.y.round(5)

  new_data["CentroidLon"] = new_data.to_crs(4326).centroid.x.round(5)


Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,geometry
0,70.01655,68.33926,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007199.012 865984.608, 2007188.217 ..."
1,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ..."
2,70.01652,68.33235,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007340.152 865838.514, 2007326.959 ..."
3,70.01531,68.33115,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007453.557 865845.775, 2007456.416 ..."
4,70.01457,68.33342,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007492.587 865949.766, 2007492.342 ..."
5,70.01448,68.33495,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007479.747 865990.850, 2007472.484 ..."
6,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-9-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ..."


# Check Metadata Format of New Data

In [None]:
dataformatting.run_formatting_checks(processed_data)

Formatting looks good!


# Generate UIDs

Set seed for UID generation (R) by concatenating all required metadata columns (except UID) into a single string

In [None]:
dataformatting.seed_gen(processed_data)
processed_data.seed

0    70.0165568.33926Yamal-GydanRodenhizer2023-05-0...
1    70.0154368.34071Yamal-GydanRodenhizer2023-05-0...
2    70.0165268.33235Yamal-GydanRodenhizer2023-05-0...
3    70.0153168.33115Yamal-GydanRodenhizer2023-05-0...
4    70.0145768.33342Yamal-GydanRodenhizer2023-05-0...
5    70.0144868.33495Yamal-GydanRodenhizer2023-05-0...
6    70.0154368.34071Yamal-GydanRodenhizer2022-05-0...
Name: seed, dtype: object

Generate UIDs

In [None]:
processed_data["UID"] = [
    str(uuid.uuid5(uuid.NAMESPACE_DNS, name=seed)) for seed in processed_data.seed
]
processed_data.UID

0    697680a4-9707-59fb-aabb-540308cf0705
1    edb47fed-2c5d-59f0-9609-dd230ab25a58
2    d10b5ffe-ff73-57cf-a12b-4b74142f0b98
3    1fae5c95-c99a-5400-ad29-6273ecbbaf94
4    e9627675-6398-5d4f-a3c0-19d0ef5511df
5    f6593433-6229-5324-85cf-e3edccae5420
6    aedeff78-0897-5159-aefd-c5a5885475c8
Name: UID, dtype: object

# Check for Intersections with RTS Data Set

Find intersecting RTS polygons from the official RTS data set and retrieve their UIDs. Create an empty column for the UIDs of polygons that have been repeated that will be manually populated.

In [None]:
dataformatting.check_intersections(
    processed_data, your_rts_dataset_dir, ARTS_main_dataset
)

   CentroidLat  CentroidLon   RegionName  CreatorLab            BaseMapDate  \
0     70.01655     68.33926  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   
2     70.01652     68.33235  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   
3     70.01531     68.33115  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   
4     70.01457     68.33342  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   
5     70.01448     68.33495  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   

  BaseMapSource  BaseMapResolution TrainClass LabelType  \
0   WorldView-2                4.0   Positive   Polygon   
2   WorldView-2                4.0   Positive   Polygon   
3   WorldView-2                4.0   Positive   Polygon   
4   WorldView-2                4.0   Positive   Polygon   
5   WorldView-2                4.0   Positive   Polygon   

                                            geometry BaseMapResolutionStr  \
0  POLYGON ((2007199.012 865984.608, 2007188.217 ...                    4   
2  POLYGON ((200

At this point, you will need to manually check all polygons with intersections against the polygons in the official RTS data set in your preferred GIS software and save the output as a geojson file.

When possible/necessary, try to find imagery that matches the date of the intersecting polygons - this may require contacting the lab that did the original delineation.

Your job is to inspect each of the polygons listed in the 'Intersections' column compared to the new RTS feature and manually copy and paste the UIDs from the 'Intersections' column into the 'RepeatRTS', 'StabilizedRTS', 'MergedRTS', or 'AccidentalOverlap' based on the relationship between the two polygons.

- Paste the UID into the RepeatRTS column when the new RTS feature is the same RTS feature as the RTS feature in the 'Intersections' column, but was delineated at a different point in time, by a different lab at the same point in time, or from different imagery at the same point in time. The RTS feature is the same when it was the result of the same RTS initiation event.

- Paste the UID into the StabilizedRTS column when the RTS feature in the 'Intersections' column is a stabilized RTS scar as of the date of the imagery used in the new RTS delineations.

- Paste the UID into the MergedRTS column when multiple RTS features in the 'Intersections' column merged to form the new RTS feature.

- Paste the UID into the AccidentalOverlap column when inaccuracies in delineation of separate RTS features lead to overlap (e.g. features that are very close to each other and the polygons barely touch).

When this is done, each of the UIDs in the Intersections column should have been copied into one (and only one) of the 'RepeatRTS', 'StabilizedRTS', 'MergedRTS', or 'AccidentalOverlap' columns.


# Load Manually Edited File and Join to Processed Data

Add the 'RepeatRTS', 'StabilizedRTS', and 'MergedRTS' columns that you just edited back into `processed_data`.

In [None]:
# provide path to the manually-edited file
edited_file_path = "/content/drive/MyDrive/ARTS_dataset/mock_dataset/python_output/rts_dataset_test_polygons_new_overlapping_polygons_edited.geojson"

merged_data = dataformatting.merge_data(processed_data, edited_file_path)
merged_data

Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,geometry,BaseMapResolutionStr,seed,UID,Intersections,AdjacentPolys,RepeatRTS,MergedRTS,StabilizedRTS,AccidentalOverlap
0,70.01655,68.33926,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007199.012 865984.608, 2007188.217 ...",4,70.0165568.33926Yamal-GydanRodenhizer2023-05-0...,,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,
1,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ...",4,70.0154368.34071Yamal-GydanRodenhizer2023-05-0...,edb47fed-2c5d-59f0-9609-dd230ab25a58,,,,,,
2,70.01652,68.33235,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007340.152 865838.514, 2007326.959 ...",4,70.0165268.33235Yamal-GydanRodenhizer2023-05-0...,ff0d265e-385c-53c2-9c3a-28e885a220d2,ff0d265e-385c-53c2-9c3a-28e885a220d2,,ff0d265e-385c-53c2-9c3a-28e885a220d2,,,
3,70.01531,68.33115,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007453.557 865845.775, 2007456.416 ...",4,70.0153168.33115Yamal-GydanRodenhizer2023-05-0...,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,,,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,
4,70.01457,68.33342,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007492.587 865949.766, 2007492.342 ...",4,70.0145768.33342Yamal-GydanRodenhizer2023-05-0...,,7c64ad8e-07be-5ba5-8f97-19374f809af1,,,,,7c64ad8e-07be-5ba5-8f97-19374f809af1
5,70.01448,68.33495,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007479.747 865990.850, 2007472.484 ...",4,70.0144868.33495Yamal-GydanRodenhizer2023-05-0...,e36abf57-ffb3-5c6a-be32-d8278f385a73,e36abf57-ffb3-5c6a-be32-d8278f385a73,,e36abf57-ffb3-5c6a-be32-d8278f385a73,,,
6,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-9-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ...",4,70.0154368.34071Yamal-GydanRodenhizer2022-05-0...,aedeff78-0897-5159-aefd-c5a5885475c8,,,,,,


# Check for Intersections within New RTS Data Set

Intersections within the new data set are assumed to be repeat delineations of the same RTS feature. If this is not true (e.g. if you have delineated an old RTS scar and an active RTS feature on top of it), this code will not assign UIDs properly. In this case, please get in touch with us to determine how to proceed.

In [None]:
merged_data_self_intersect = dataformatting.self_intersection(merged_data)
merged_data_self_intersect

Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,geometry,...,seed,UID,Intersections,AdjacentPolys,RepeatRTS,MergedRTS,StabilizedRTS,AccidentalOverlap,ContributionDate,SelfIntersectionIndices
0,70.01655,68.33926,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007199.012 865984.608, 2007188.217 ...",...,70.0165568.33926Yamal-GydanRodenhizer2023-05-0...,,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,,2024-01-19,
1,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ...",...,70.0154368.34071Yamal-GydanRodenhizer2023-05-0...,aedeff78-0897-5159-aefd-c5a5885475c8,,,,,,,2024-01-19,aedeff78-0897-5159-aefd-c5a5885475c8
2,70.01652,68.33235,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007340.152 865838.514, 2007326.959 ...",...,70.0165268.33235Yamal-GydanRodenhizer2023-05-0...,ff0d265e-385c-53c2-9c3a-28e885a220d2,ff0d265e-385c-53c2-9c3a-28e885a220d2,,ff0d265e-385c-53c2-9c3a-28e885a220d2,,,,2024-01-19,
3,70.01531,68.33115,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007453.557 865845.775, 2007456.416 ...",...,70.0153168.33115Yamal-GydanRodenhizer2023-05-0...,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,,,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,,2024-01-19,
4,70.01457,68.33342,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007492.587 865949.766, 2007492.342 ...",...,70.0145768.33342Yamal-GydanRodenhizer2023-05-0...,,7c64ad8e-07be-5ba5-8f97-19374f809af1,,,,,7c64ad8e-07be-5ba5-8f97-19374f809af1,2024-01-19,
5,70.01448,68.33495,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007479.747 865990.850, 2007472.484 ...",...,70.0144868.33495Yamal-GydanRodenhizer2023-05-0...,e36abf57-ffb3-5c6a-be32-d8278f385a73,e36abf57-ffb3-5c6a-be32-d8278f385a73,,e36abf57-ffb3-5c6a-be32-d8278f385a73,,,,2024-01-19,
6,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-9-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ...",...,70.0154368.34071Yamal-GydanRodenhizer2022-05-0...,aedeff78-0897-5159-aefd-c5a5885475c8,,,,,,,2024-01-19,aedeff78-0897-5159-aefd-c5a5885475c8


# Check Completeness of Intersection Information

In [None]:
dataformatting.check_intersection_info(merged_data_self_intersect)

Intersection information is complete.


# Final Column Selection

In [None]:
merged_data_self_intersect = dataformatting.add_empty_columns(
    merged_data_self_intersect,
    [col for col in optional_fields if col not in ["StabilizedRTS", "MergedRTS"]],
)

merged_data_self_intersect = merged_data_self_intersect[all_fields + ["geometry"]]

merged_data_self_intersect

Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,MergedRTS,StabilizedRTS,ContributionDate,UID,BaseMapID,Area,geometry
0,70.01655,68.33926,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,2024-01-19,,,,"POLYGON ((2007199.012 865984.608, 2007188.217 ..."
1,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-19,aedeff78-0897-5159-aefd-c5a5885475c8,,,"POLYGON ((2007289.337 866129.959, 2007283.521 ..."
2,70.01652,68.33235,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-19,ff0d265e-385c-53c2-9c3a-28e885a220d2,,,"POLYGON ((2007340.152 865838.514, 2007326.959 ..."
3,70.01531,68.33115,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,2024-01-19,,,,"POLYGON ((2007453.557 865845.775, 2007456.416 ..."
4,70.01457,68.33342,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-19,,,,"POLYGON ((2007492.587 865949.766, 2007492.342 ..."
5,70.01448,68.33495,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-19,e36abf57-ffb3-5c6a-be32-d8278f385a73,,,"POLYGON ((2007479.747 865990.850, 2007472.484 ..."
6,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-9-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-19,aedeff78-0897-5159-aefd-c5a5885475c8,,,"POLYGON ((2007289.337 866129.959, 2007283.521 ..."


In [None]:
if separate_file:
    merged_data_self_intersect.to_file(
        Path("..")
        / "python_output"
        / (str(your_rts_dataset_dir).split(".")[0] + "_formatted.geojson")
    )
else:
    ARTS_main_dataset = dataformatting.add_empty_columns(
        ARTS_main_dataset, [col for col in optional_fields]
    )
    ARTS_main_dataset.ContributionDate = [
        value.strftime("%Y-%m-%d") for value in ARTS_main_dataset.ContributionDate
    ]

    ARTS_main_dataset = ARTS_main_dataset[all_fields + ["geometry"]]
    updated_data = pd.concat([ARTS_main_dataset, merged_data_self_intersect])
    updated_data.to_file(Path("..") / "python_output" / ARTS_main_dataset_dir)

Now you are ready to submit

In [None]:
if separate_file:
    print(
        str(
            Path("..")
            / "python_output"
            / (str(your_rts_dataset_dir).split(".")[0] + "_formatted.geojson")
        )
    )
else:
    print(str(Path("..") / "python_output" / ARTS_main_dataset))

/content/drive/MyDrive/ARTS_dataset/mock_dataset/input_data/rts_dataset_test_polygons_new_formatted.geojson
