# Find shortest reaction
In order to explore the data better we find the shortest reaction. The hope it that this reaction is simple to understand and will help us understand the dataset better.

In [None]:
# import requirements
import json

from ord_schema.message_helpers import load_message
from ord_schema.proto import dataset_pb2
from google.protobuf.json_format import MessageToJson
from tqdm import tqdm

from pathlib import Path

data_path = Path("ord-data") / "data"
temp_folder = Path("temp")
temp_folder.mkdir(exist_ok=True)

# This will add 2,274,399 reactions
count = 0
len_reaction = 1e10
shortest_reaction = None
all_gz_paths = [i for i in data_path.glob("*/*.pb.gz")]
for gz_path in tqdm(all_gz_paths):
    json_name = gz_path.name.replace(".pb.gz", ".json")
    dataset = load_message(
        str(gz_path),
        dataset_pb2.Dataset,
    )

    # take one reaction message from the dataset for example
    for reaction in dataset.reactions:
        rxn_json = json.loads(
            MessageToJson(
                message=reaction,
                including_default_value_fields=False,
                preserving_proto_field_name=True,
                indent=2,
                sort_keys=False,
                use_integers_for_enums=False,
                descriptor_pool=None,
                float_precision=None,
                ensure_ascii=True,
            )
        )
        if not rxn_json.get("identifiers", False):
            continue
        if not rxn_json["identifiers"][0].get("is_mapped", False):
            continue
        if len(rxn_json["identifiers"][0]["value"]) < len_reaction:
            shortest_reaction = rxn_json
            len_reaction = len(rxn_json["identifiers"][0]["value"])
            print(f"new shortest reaction: {len_reaction}")
            if len_reaction < 20:
                # Short enough. I don't have forever
                break
    if len_reaction < 20:
        # Short enough. I don't have forever
        break

In [None]:
with open(temp_folder / "short_reaction.json", "w") as fp:
    json.dump(shortest_reaction, fp, indent=4)

In [None]:
print(json.dumps(shortest_reaction, indent=4))