# This notebook does:
* moves each `crop` folder from the library https://usegalaxy.eu/libraries/folders/F70e3e2cca6d50fdb/page/1 into a history
* combines the files into one collection
* collapse the collection
* rename the created file to the name of the barcode of the folder
* combines all files into one collection

In [1]:
from bioblend.galaxy import GalaxyInstance
from bioblend.galaxy import dataset_collections as collections
import pandas as pd
from io import StringIO
import os
import requests

In [2]:
%env GALAXY_API_KEY=cf4092f19ab187b157fea51f97f2791b

env: GALAXY_API_KEY=cf4092f19ab187b157fea51f97f2791b


In [3]:
gi = GalaxyInstance(url='https://usegalaxy.eu/', key=os.environ['GALAXY_API_KEY'])

### Run the fetch, combine, collapse, combine pipeline

In [13]:
# paths and variables
h_name = 'collection_palestine_v2'
look_for = ("/crop/") #this needs to be a unique text part that can be used to match the folder path in the library
library_name = "Palestine"
output_collection = "palestine_data"

# create history with name
histories = gi.histories.get_histories(name = h_name)
if not histories:
    history_params = gi.histories.create_history(h_name)
    history_id = history_params["id"]
elif len(histories) > 1:
    raise ValueError('Too many histories with that name')
else:
    history_id = histories[0]["id"]

print(f"Storing data in history with ID: {history_id}")

# get all folders form a library
library_id = gi.libraries.get_libraries(name = library_name)[0]['id']
folders = gi.libraries.get_folders(library_id = library_id)

# get specific folders based on look_for text match
folder_ids = {}
for folder in folders:
    folder_path = folder['name']
    if look_for in folder_path:
        folder_id = folder['id']
        folder_name = folder_path.split('/')[-1] #we will use the name of the folder as name of the created collapsed file
        folder_ids[folder_name] = folder_id

#copy and collapse all folders into files in the history
collapsed_collection_ids = {}
for folder_name, folder_id in folder_ids.items():
    print(f"Processing folder with name: {folder_name}, id:{folder_id}")

    #copy complete folder
    copied_dataset = gi.histories.copy_dataset(history_id, folder_id, source = "library_folder")

    #create collection
    elements = []
    for dataset in copied_dataset:
        elements.append(collections.HistoryDatasetElement(name=dataset["name"], id=dataset["id"]))

    collection_response = gi.histories.create_dataset_collection(
                history_id=history_id,
                collection_description=collections.CollectionDescription(
                    name=folder_name,
                    type="list",
                    elements=elements
                )
        )

    #collapse collection using the collapse_dataset tool
    collection_id = collection_response["id"]

    tool_model = gi.tools.build(tool_id="toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/5.1.0", history_id=history_id)
    template_inputs = tool_model["state_inputs"]

    template_inputs['input_list']['values'][0]['id'] = collection_id
    template_inputs['input_list']['values'][0]['src'] = "hdca" #optional possible input values can be seen by inspecting the tool_model, that showed hdca for the collections

    output = gi.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/5.1.0", 
                    history_id=history_id, 
                    tool_inputs=template_inputs)
    
    id_of_created_dataset = output["outputs"][0]['id']   

    collapsed_collection_ids[folder_name] = id_of_created_dataset

    #update name
    gi.histories.update_dataset(history_id, id_of_created_dataset, name = folder_name)

#create collection of all datasets
elements = []
for dataset_name, dataset_id in collapsed_collection_ids.items():
    elements.append(collections.HistoryDatasetElement(name=dataset_name, id=dataset_id))

collection_response = gi.histories.create_dataset_collection(
            history_id=history_id,
            collection_description=collections.CollectionDescription(
                name=output_collection,
                type="list",
                elements=elements
            )
    )


Processing folder with name: barcode01, id:F8963227bea76f224
Processing folder with name: barcode02, id:F584def5b593281ed
Processing folder with name: barcode03, id:F451b3772c539bfbd
Processing folder with name: barcode04, id:F648bdd4267ba3213
Processing folder with name: barcode05, id:F75a6ad5e53c5e04d
Processing folder with name: barcode06, id:Ffe330413467dea14
Processing folder with name: barcode07, id:Fb2ce532c14359a8b
Processing folder with name: barcode08, id:Fe370648cce1dd14b
Processing folder with name: barcode09, id:Fcd0dff06735c1bbe
Processing folder with name: barcode10, id:F1372e67d2434c48c
Processing folder with name: barcode11, id:Fc87c6570a685d50b
Processing folder with name: barcode12, id:Ff0c01f0d894bf716
Processing folder with name: barcode13, id:Ff505f004a4df4d19
Processing folder with name: barcode14, id:F6286e1a8443b2953
Processing folder with name: barcode15, id:Fad5ea92260a13762
Processing folder with name: barcode16, id:F3380e5303529b049
Processing folder with n