In [3]:
import importlib.util
import os
from pathlib import Path
import re
from views_pipeline_core.managers.model import ModelManager, ModelPathManager
from views_pipeline_core.managers.ensemble import  EnsembleManager, EnsemblePathManager

In [4]:
base_dir = os.getcwd()
target_dir = Path(base_dir+"/models/")
target_dir


PosixPath('/home/sonja/Desktop/views-platform/views-models/models')

In [None]:
#model_manager = ModelManager(model_path=ModelPathManager(configs_dir))
model_manager = ModelManager(model_path=ModelPathManager('/home/sonja/Desktop/views-platform/views-models/models/blank_space'))

In [7]:
# Update repository structure:
def generate_repo_structure(folders, scripts, model_name, root_key="model_dir"):
    """Generate a structured repository tree with correct folder hierarchy and script placement."""

    if root_key not in folders:
        raise ValueError(f"Root key '{root_key}' not found in folders dictionary")

    root_path = Path(folders[root_key])  # Get the main model directory path
    tree = [model_name]  # Start with the model name
    folder_structure = {}

    # Build folder structure and ensure all folders exist in the mapping
    for folder_name, folder_path in folders.items():
        path = Path(folder_path)
        relative_path = path.relative_to(root_path)
        folder_structure[str(relative_path)] = {"name": folder_name, "scripts": []}

    # Assign scripts to the correct folders
    for script_name, script_path in scripts.items():
        script_path_obj = Path(script_path)
        parent_folder = script_path_obj.parent.relative_to(root_path)

        # Ensure the parent folder exists in our dictionary before adding
        parent_folder_str = str(parent_folder)
        if parent_folder_str in folder_structure:
            folder_structure[parent_folder_str]["scripts"].append(script_name)

    # Generate the tree output
    seen_folders = set()

    def build_tree(path, depth=0):
        """Recursive function to format the tree output."""
        indent = "│   " * depth
        path_str = str(path)

        # Ensure we don't print duplicate folders
        if path_str in seen_folders:
            return
        seen_folders.add(path_str)

        tree.append(f"{indent}├── {path.name}")

        # Add scripts belonging to this folder
        if path_str in folder_structure:
            for script in sorted(folder_structure[path_str]["scripts"]):
                tree.append(f"{indent}│   ├── {script}")

        # Recurse into subfolders
        subfolders = [p for p in folder_structure if Path(p).parent == path]
        for subfolder in sorted(subfolders):
            build_tree(Path(subfolder), depth + 1)

    # Build tree from the root folder
    build_tree(Path("."))  # "." represents the root of the model repo

    # Add root-level files (only if they are not assigned elsewhere)
    root_scripts = set(scripts.keys()) - {s for f in folder_structure.values() for s in f["scripts"]}
    root_files = ["requirements.txt", "run.sh"] + sorted(root_scripts)

    for file in root_files:
        tree.append(f"├── {file}")

    return "\n".join(tree)


In [8]:
for subfolder in target_dir.iterdir():
    if subfolder.is_dir():  # Check if it's a directory
        print(f"Model: {subfolder.name}")
        #configs_dir = Path(subfolder.name+"/configs")
        configs_dir = target_dir / subfolder.name / "configs"
        model_manager = ModelManager(model_path=ModelPathManager(configs_dir))
        mpm = ModelPathManager(configs_dir)

        ## Get Meta Info
        model_name = model_manager.configs['name']
        model_name = " ".join(word.capitalize() for word in model_name.split("_"))

        algorithm = model_manager.configs['algorithm']
        if algorithm=='HurdleModel':
            classifier = model_manager.configs['model_clf']
            regressor = model_manager.configs['model_reg']
            algorithm_all = f"{algorithm} (Classifier: {classifier}, Regressor: {regressor})"
        else:
            algorithm_all = algorithm

        target = model_manager.configs['depvar']
        if isinstance(target, list):
            target = ", ".join(target)
        queryset = model_manager.configs['queryset']
        level = model_manager.configs['level']
        try:
            metrics = model_manager.configs['metrics']
        except KeyError:
            metrics = "No information provided"
        if isinstance(metrics, list):
            metrics = ", ".join(metrics)

        ## Get deployment mode 
        deployment = model_manager.configs['deployment_status']

        ## Get queryset description
        queryset_info = mpm.get_queryset()
        description = queryset_info.description
        try:
            description = " ".join(description.split())
        except AttributeError:
            description = 'No description provided'
        name = queryset_info.name

        ## Update old README file - For Bitter Symphony Model 
        scaffold_path = target_dir / "README_scaffold.md"
        readme_path = target_dir / subfolder.name / "README.md"

        # Read old README
        with open(readme_path, "r") as file:
            old_readme_content = file.read()

        # Add created sessioin if it exists

        match = re.search(r"(## Created on.*)", old_readme_content, re.DOTALL)
        if match==None:
            new_string=''
        else:
            created_section = match.group(1).strip()
            insert_position = created_section.find("##")

            # Find where the '##' ends (after '##' and the next space)
            end_of_heading = len("##")  # Skip the '##' part itself
            new_string = created_section[:end_of_heading] + " " + 'Model' + created_section[end_of_heading:]

        # Read scaffold.md content
        with open(scaffold_path, "r") as file:
            content = file.read()


        # Dictionary of placeholders and their replacements
        replacements = {
            "{{MODEL_NAME}}": model_name,
            "{{MODEL_ALGORITHM}}": algorithm_all,
            "{{LEVEL_OF_ANALYSIS}}": level,
            "{{TARGET}}": target,
            "{{FEATURES}}": name, 
            "{{DESCRIPTION}}": description,
            "{{DEPLOYMENT}}": deployment,
            "{{METRICS}}": metrics,
            "{{CREATED_SECTION}}": new_string,
        }


        # Replace placeholders in scaffold content
        for placeholder, value in replacements.items():
            content = content.replace(placeholder, value)


        repo_root = target_dir / subfolder.name
        repo_structure = generate_repo_structure(mpm.get_directories(), mpm.get_scripts(), model_name=model_name)
        formatted_structure = f"```\n{repo_structure}\n```"
        formatted_structure


        updated_readme = content.replace("## Repository Structure",
                f"## Repository Structure\n\n{formatted_structure}",
            )
        
        # Write the updated content to README.md
        with open(readme_path, "w") as file:
            file.write(updated_readme)

Model: chunky_cat
Model: midnight_rain


Directory /home/sonja/Desktop/views-platform/views-models/models/midnight_rain/notebooks does not exist. Continuing...
Directory /home/sonja/Desktop/views-platform/views-models/models/midnight_rain/notebooks does not exist. Continuing...


Model: bad_blood
Model: blank_space
Model: high_hopes
Model: counting_stars
Model: demon_days
Model: old_money
Model: fast_car
Model: popular_monster
Model: green_squirrel
Model: twin_flame
Model: good_riddance
Model: yellow_pikachu
Model: wildest_dream
Model: bittersweet_symphony
Model: caring_fish
Model: ominous_ox
Model: little_lies
Model: purple_alien
Model: lavender_haze
Model: teen_spirit
Model: heavy_rotation
Model: dark_paradise


Directory /home/sonja/Desktop/views-platform/views-models/models/dark_paradise/notebooks does not exist. Continuing...
Directory /home/sonja/Desktop/views-platform/views-models/models/dark_paradise/notebooks does not exist. Continuing...


Model: fluorescent_adolescent
Model: orange_pasta
Model: yellow_submarine
Model: national_anthem
Model: electric_relaxation
Model: plastic_beach
Model: brown_cheese
Model: invisible_string


Directory /home/sonja/Desktop/views-platform/views-models/models/invisible_string/notebooks does not exist. Continuing...
Directory /home/sonja/Desktop/views-platform/views-models/models/invisible_string/notebooks does not exist. Continuing...


Model: car_radio


In [None]:
##### Ensamble Models ###

In [11]:
ens_manager = EnsembleManager(ensemble_path=EnsemblePathManager('/home/sonja/Desktop/views-platform/views-models/ensembles/cruel_summer'))
#ens_manager.configs

In [12]:
base_dir = os.getcwd()
target_ens_dir = Path(base_dir+"/ensembles/")
target_ens_dir

PosixPath('/home/sonja/Desktop/views-platform/views-models/ensembles')

In [13]:
for subfolder in target_ens_dir.iterdir():
    if subfolder.is_dir():  # Check if it's a directory
        print(f"Model: {subfolder.name}")
        #configs_dir = Path(subfolder.name+"/configs")
        configs_dir = target_ens_dir / subfolder.name / "configs"
        ens_manager = EnsembleManager(ensemble_path=EnsemblePathManager(configs_dir))
        epm = EnsemblePathManager(configs_dir)

        ## Get Meta Info
        ens_name = ens_manager.configs['name']
        ens_name = " ".join(word.capitalize() for word in ens_name.split("_"))

        models = ens_manager.configs['models']
        models = ", ".join(models)

        target = ens_manager.configs['depvar']
        if isinstance(target, list):
            target = ", ".join(target)
        level = ens_manager.configs['level']
        try:
            metrics = ens_manager.configs['metrics']
        except KeyError:
            metrics = "No information provided"
        if isinstance(metrics, list):
            metrics = ", ".join(metrics)
        
        aggregation = ens_manager.configs['aggregation']

        ## Get deployment mode 
        deployment = ens_manager.configs['deployment_status']

        ## Update old README file - For Bitter Symphony Model 
        scaffold_path = target_ens_dir / "README_ensemble_scaffold.md"
        readme_path = target_ens_dir / subfolder.name / "README.md"

        # Read old README
        with open(readme_path, "r") as file:
            old_readme_content = file.read()

        # Add created sessioin if it exists

        match = re.search(r"(## Created on.*)", old_readme_content, re.DOTALL)
        if match==None:
            new_string=''
        else:
            created_section = match.group(1).strip()
            insert_position = created_section.find("##")

            # Find where the '##' ends (after '##' and the next space)
            end_of_heading = len("##")  # Skip the '##' part itself
            new_string = created_section[:end_of_heading] + " " + 'Model' + created_section[end_of_heading:]

        # Read scaffold.md content
        with open(scaffold_path, "r") as file:
            content = file.read()


        # Dictionary of placeholders and their replacements
        replacements = {
            "{{ENSEMBLE_NAME}}": ens_name,
            "{{MODELS}}": models,
            "{{LEVEL_OF_ANALYSIS}}": level,
            "{{TARGET}}": target,
            "{{AGGREGATION}}": aggregation,
            "{{DEPLOYMENT}}": deployment,
            "{{METRICS}}": metrics,
            "{{CREATED_SECTION}}": new_string,
        }


        # Replace placeholders in scaffold content
        for placeholder, value in replacements.items():
            content = content.replace(placeholder, value)

        repo_structure = generate_repo_structure(epm.get_directories(), epm.get_scripts(), model_name=ens_name)
        formatted_structure = f"```\n{repo_structure}\n```"
        formatted_structure


        updated_readme = content.replace("## Repository Structure",
                f"## Repository Structure\n\n{formatted_structure}",
            )
        

        # Write the updated content to README.md
        with open(readme_path, "w") as file:
            file.write(updated_readme)

Model: cruel_summer
Model: pink_ponyclub
Model: white_mustang
Model: skinny_love


In [62]:
model_manager.configs

{'steps': [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36],
 'parameters': {'clf': {'n_estimators': 200}, 'reg': {'n_estimators': 200}},
 'name': 'old_money',
 'algorithm': 'HurdleModel',
 'model_clf': 'LGBMClassifier',
 'model_reg': 'LGBMRegressor',
 'metrics': ['RMSLE', 'CRPS'],
 'depvar': 'ln_ged_sb_dep',
 'queryset': 'fatalities003_pgm_escwa_drought',
 'level': 'pgm',
 'creator': 'Xiaolong',
 'deployment_status': 'shadow'}

In [10]:
mpm = ModelPathManager("/home/sonja/Desktop/views-platform/views-models/models/bad_blood/logs")

In [63]:
mpm.get_directories()

{'model_dir': '/home/sonja/Desktop/views-platform/views-models/models/old_money',
 'logging': '/home/sonja/Desktop/views-platform/views-models/models/old_money/logs',
 'artifacts': '/home/sonja/Desktop/views-platform/views-models/models/old_money/artifacts',
 'configs': '/home/sonja/Desktop/views-platform/views-models/models/old_money/configs',
 'data': '/home/sonja/Desktop/views-platform/views-models/models/old_money/data',
 'data_generated': '/home/sonja/Desktop/views-platform/views-models/models/old_money/data/generated',
 'data_processed': '/home/sonja/Desktop/views-platform/views-models/models/old_money/data/processed',
 'reports': '/home/sonja/Desktop/views-platform/views-models/models/old_money/reports',
 'data_raw': '/home/sonja/Desktop/views-platform/views-models/models/old_money/data/raw',
 'notebooks': '/home/sonja/Desktop/views-platform/views-models/models/old_money/notebooks'}

In [67]:
from pathlib import Path

def generate_repo_structure(folders, root_key="model_dir"):
    """Generate a tree-like repository structure, starting from the model directory."""
    
    # Ensure the root directory exists in the dictionary
    if root_key not in folders:
        raise ValueError(f"Root key '{root_key}' not found in folders dictionary")
    
    root_path = Path(folders[root_key])  # Get the main model directory path
    sorted_items = sorted(folders.items(), key=lambda x: x[1])  # Sort paths
    tree = []
    seen_paths = set()

    for folder, path in sorted_items:
        path_obj = Path(path)
        
        # Skip folders that are not within the root directory
        if root_path not in path_obj.parents and root_path != path_obj:
            continue

        # Convert path relative to root_path
        relative_path = path_obj.relative_to(root_path)
        path_parts = relative_path.parts  # Break it down into subfolder components
        
        # Construct each level of the hierarchy
        structure = ""
        for i, part in enumerate(path_parts):
            indent = "│   " * i  # Indentation for hierarchy
            if part not in seen_paths:
                structure += f"{indent}├── {part}\n"
                seen_paths.add(part)

        tree.append(structure.strip())

    # Add the root directory at the top
    return f"{root_key} ({root_path})\n" + "\n".join(tree)


# Example usage
repo_structure = generate_repo_structure(mpm.get_directories())
print(repo_structure)

model_dir (/home/sonja/Desktop/views-platform/views-models/models/old_money)

├── artifacts
├── configs
├── data
│   ├── generated
│   ├── processed
│   ├── raw
├── logs
├── notebooks
├── reports


In [71]:
def generate_repo_structure(folders, model_name, root_key="model_dir"):
    """Generate a tree-like repository structure, starting from the model directory and showing model name."""
    
    # Ensure the root directory exists in the dictionary
    if root_key not in folders:
        raise ValueError(f"Root key '{root_key}' not found in folders dictionary")
    
    root_path = Path(folders[root_key])  # Get the main model directory path
    sorted_items = sorted(folders.items(), key=lambda x: x[1])  # Sort paths
    tree = []
    seen_paths = set()

    for folder, path in sorted_items:
        path_obj = Path(path)
        
        # Skip folders that are not within the root directory
        if root_path not in path_obj.parents and root_path != path_obj:
            continue

        # Convert path relative to root_path
        relative_path = path_obj.relative_to(root_path)
        path_parts = relative_path.parts  # Break it down into subfolder components
        
        # Construct each level of the hierarchy
        structure = ""
        for i, part in enumerate(path_parts):
            indent = "│   " * i  # Indentation for hierarchy
            if part not in seen_paths:
                structure += f"{indent}├── {part}\n"
                seen_paths.add(part)

        tree.append(structure.strip())

    # Add the model name at the top
    return f"{model_name}\n" + "\n".join(tree)


repo_structure = generate_repo_structure(mpm.get_directories(), model_name=model_name)
print(repo_structure)



Old Money

├── artifacts
├── configs
├── data
│   ├── generated
│   ├── processed
│   ├── raw
├── logs
├── notebooks
├── reports


In [64]:
mpm.get_scripts()

{'config_deployment.py': '/home/sonja/Desktop/views-platform/views-models/models/old_money/configs/config_deployment.py',
 'config_hyperparameters.py': '/home/sonja/Desktop/views-platform/views-models/models/old_money/configs/config_hyperparameters.py',
 'config_meta.py': '/home/sonja/Desktop/views-platform/views-models/models/old_money/configs/config_meta.py',
 'main.py': '/home/sonja/Desktop/views-platform/views-models/models/old_money/main.py',
 'README.md': '/home/sonja/Desktop/views-platform/views-models/models/old_money/README.md',
 'config_queryset.py': '/home/sonja/Desktop/views-platform/views-models/models/old_money/configs/config_queryset.py',
 'config_sweep.py': '/home/sonja/Desktop/views-platform/views-models/models/old_money/configs/config_sweep.py'}

In [None]:
def generate_repo_structure(folders, scripts, model_name, root_key="model_dir"):
    """Generate a structured repository tree with correct folder hierarchy and script placement."""

    if root_key not in folders:
        raise ValueError(f"Root key '{root_key}' not found in folders dictionary")

    root_path = Path(folders[root_key])  # Get the main model directory path
    tree = [model_name]  # Start with the model name
    folder_structure = {}

    # Build folder structure and ensure all folders exist in the mapping
    for folder_name, folder_path in folders.items():
        path = Path(folder_path)
        relative_path = path.relative_to(root_path)
        folder_structure[str(relative_path)] = {"name": folder_name, "scripts": []}

    # Assign scripts to the correct folders
    for script_name, script_path in scripts.items():
        script_path_obj = Path(script_path)
        parent_folder = script_path_obj.parent.relative_to(root_path)

        # Ensure the parent folder exists in our dictionary before adding
        parent_folder_str = str(parent_folder)
        if parent_folder_str in folder_structure:
            folder_structure[parent_folder_str]["scripts"].append(script_name)

    # Generate the tree output
    seen_folders = set()

    def build_tree(path, depth=0):
        """Recursive function to format the tree output."""
        indent = "│   " * depth
        path_str = str(path)

        # Ensure we don't print duplicate folders
        if path_str in seen_folders:
            return
        seen_folders.add(path_str)

        tree.append(f"{indent}├── {path.name}")

        # Add scripts belonging to this folder
        if path_str in folder_structure:
            for script in sorted(folder_structure[path_str]["scripts"]):
                tree.append(f"{indent}│   ├── {script}")

        # Recurse into subfolders
        subfolders = [p for p in folder_structure if Path(p).parent == path]
        for subfolder in sorted(subfolders):
            build_tree(Path(subfolder), depth + 1)

    # Build tree from the root folder
    build_tree(Path("."))  # "." represents the root of the model repo

    # Add root-level files (only if they are not assigned elsewhere)
    root_scripts = set(scripts.keys()) - {s for f in folder_structure.values() for s in f["scripts"]}
    root_files = ["requirements.txt", "run.sh"] + sorted(root_scripts)

    for file in root_files:
        tree.append(f"├── {file}")

    return "\n".join(tree)
repo_structure = generate_repo_structure(mpm.get_directories(), mpm.get_scripts(), model_name=model_name)
print(repo_structure)

Old Money
├── 
│   ├── README.md
│   ├── main.py
│   ├── artifacts
│   ├── configs
│   │   ├── config_deployment.py
│   │   ├── config_hyperparameters.py
│   │   ├── config_meta.py
│   │   ├── config_queryset.py
│   │   ├── config_sweep.py
│   ├── data
│   │   ├── generated
│   │   ├── processed
│   │   ├── raw
│   ├── logs
│   ├── notebooks
│   ├── reports
├── requirements.txt
├── run.sh


In [65]:
mpm.view_directories()


Name                	Path                                              
root                	/home/sonja/Desktop/views-platform/views-models   
logging             	/home/sonja/Desktop/views-platform/views-models/models/old_money/logs
artifacts           	/home/sonja/Desktop/views-platform/views-models/models/old_money/artifacts
configs             	/home/sonja/Desktop/views-platform/views-models/models/old_money/configs
data                	/home/sonja/Desktop/views-platform/views-models/models/old_money/data
data_generated      	/home/sonja/Desktop/views-platform/views-models/models/old_money/data/generated
data_processed      	/home/sonja/Desktop/views-platform/views-models/models/old_money/data/processed
reports             	/home/sonja/Desktop/views-platform/views-models/models/old_money/reports
data_raw            	/home/sonja/Desktop/views-platform/views-models/models/old_money/data/raw
notebooks           	/home/sonja/Desktop/views-platform/views-models/models/old_money/notebo

In [16]:
pretend = ModelPathManager("orange_cat", validate=False)

In [17]:
pretend.get_directories()

{'model_dir': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat',
 'logging': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/logs',
 'artifacts': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/artifacts',
 'configs': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/configs',
 'data': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/data',
 'data_generated': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/data/generated',
 'data_processed': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/data/processed',
 'reports': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/reports',
 'data_raw': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/data/raw',
 'notebooks': '/home/sonja/Desktop/views-platform/views-models/models/orange_cat/notebooks'}

In [19]:
mpm.models

PosixPath('/home/sonja/Desktop/views-platform/views-models/models')

In [20]:
mpm.root

PosixPath('/home/sonja/Desktop/views-platform/views-models')

In [22]:
mpm.get_latest_model_artifact_path(run_type="forecasting")

FileNotFoundError: No model artifacts found for run type 'forecasting' in path '/home/sonja/Desktop/views-platform/views-models/models/bad_blood/artifacts'

In [23]:
mpm.get_queryset()

Queryset(name='fatalities003_pgm_natsoc', loa='priogrid_month', themes=['fatalities'], description='Fatalities natural and social geography, pgm level\n\n                                    Predicting ln(fatalities) using natural and social geography features\n\n                                    ', operations=[[RenameOperation(namespace='trf', name='util.rename', arguments=['ln_ged_sb_dep']), TransformOperation(namespace='trf', name='ops.ln', arguments=[]), TransformOperation(namespace='trf', name='missing.replace_na', arguments=[]), DatabaseOperation(namespace='base', name='priogrid_month.ged_sb_best_sum_nokgi', arguments=['values'])], [RenameOperation(namespace='trf', name='util.rename', arguments=['ln_ged_sb']), TransformOperation(namespace='trf', name='missing.fill', arguments=[]), TransformOperation(namespace='trf', name='ops.ln', arguments=[]), DatabaseOperation(namespace='base', name='priogrid_month.ged_sb_best_sum_nokgi', arguments=['values'])], [RenameOperation(namespace='tr