In [2]:
#!pip install openai

In [3]:
from configparser import ConfigParser
from tqdm import tqdm
import datetime
import requests
import openai
import json
import os
import re

In [4]:
# Get API key stored in local cfg file
# How to is available here : https://towardsdatascience.com/keeping-credentials-safe-in-jupyter-notebooks-fbd215a8e311

parser = ConfigParser()
_ = parser.read('ObsidianGPT.cfg')
openai.api_key = parser.get('my_api', 'auth_key')

In [5]:
#Uncomment to show available OPENAI models
for i in openai.Model.list()["data"]:
    print(i["id"])

davinci
gpt-3.5-turbo-16k-0613
text-davinci-001
text-search-curie-query-001
babbage
text-babbage-001
gpt-3.5-turbo-16k
curie-instruct-beta
gpt-4-0314
gpt-4
davinci-similarity
code-davinci-edit-001
text-similarity-curie-001
ada-code-search-text
gpt-3.5-turbo-0613
text-search-ada-query-001
babbage-search-query
ada-similarity
text-curie-001
text-search-ada-doc-001
text-search-babbage-query-001
code-search-ada-code-001
curie-search-document
davinci-002
text-search-davinci-query-001
text-search-curie-doc-001
babbage-search-document
babbage-002
babbage-code-search-text
text-embedding-ada-002
davinci-instruct-beta
davinci-search-query
text-similarity-babbage-001
text-davinci-002
code-search-babbage-text-001
text-davinci-003
text-search-davinci-doc-001
code-search-ada-text-001
gpt-4-0613
ada-search-query
text-similarity-ada-001
ada-code-search-code
whisper-1
text-davinci-edit-001
davinci-search-document
curie-search-query
babbage-similarity
ada
ada-search-document
text-ada-001
text-similarity-

# Functions definition

In [6]:
# Passes a specific prompt to the chosen OpenAI GPT model
def generate_text(prompt, GPTmodel):
    response = openai.ChatCompletion.create(model=GPTmodel,messages=[{"role": "user", "content": prompt}])
    return response.choices[0].message.content.strip()

In [7]:
# Creates a local obsidian vault
def create_obsidian_vault(vault_name, parent_directory):
    '''Creates or Updates and Obsidian vault, in a given directory. Params are vault_name, parent_directory'''
    vault_path = os.path.join(parent_directory, vault_name)
    
    # Create the vault directory
    os.makedirs(vault_path, exist_ok=True)

    # Create default folders within the vault
    os.makedirs(os.path.join(vault_path, "attachments"), exist_ok=True)
    os.makedirs(os.path.join(vault_path, "notes"), exist_ok=True)
    
    return vault_path

In [8]:
# Extract strings with regexp
def regex_string_finder(input_string, pattern):
    '''
    Usage: regex_string_finder("This is a sample.text:with:colons", r'\.(.*?)\:')
    '''
    # Use re.search to find the first match of the pattern
    match = re.search(pattern, input_string)

    # Check if a match is found
    if match:
        # Extract the substring between '.' and ':'
        extracted_string = match.group(1)
        return extracted_string.strip()
    else:
        return "No match found."

In [9]:
def write_variable_to_text_file(variable, file_path):
    # Open the file in write mode ('w')
    with open(file_path, 'w') as file:
        # Write the variable's value to the file
        file.write(variable)
    
    print(f"Variable written to {file_path}")

# Create and/or modify the local Obsidian Vault

In [10]:
today=datetime.datetime.now()
vault_path = create_obsidian_vault(vault_name=f"MyNewVault"+ "-" + str(today.date()), parent_directory=os.getcwd())
vault_path

'/Users/WDescamps/Desktop/code_projects/side_projects/ObsidianGPT/MyNewVault-2023-09-07'

## Prompt created by GPT, asking him to create a prompt to match my input of models

In [11]:
# Quite a bit of trial and error prompting led to the following prompt. Feel free to experiment with it!

In [37]:
# Get the list of models for different kinds of data from the GPT API
models_prompt = """
Provide an exhaustive list of popular machine learning and deep learning models for all types of data, 
grouped by data type and problem type.Provide as your response the dictionnary containing the results. 
"""

In [38]:
%%time
# Get the list of models, for each data type and problem type
models_text = generate_text(models_prompt, GPTmodel="gpt-4-0314") #Points to the latest GPT4 model or gpt-3.5-turbo for faster results vs lower query quality
models_text

CPU times: user 25.9 ms, sys: 11.7 ms, total: 37.6 ms
Wall time: 2min 2s


'{\n  "Structured Data": {\n    "Regression": [\n      "Linear Regression",\n      "Ridge Regression",\n      "Lasso Regression",\n      "Elastic Net",\n      "Support Vector Regression",\n      "Decision Tree Regression",\n      "Random Forest Regression",\n      "AdaBoost Regression",\n      "Gradient Boosting Regression",\n      "XGBoost",\n      "LightGBM",\n      "CatBoost",\n      "Artificial Neural Networks",\n      "Long Short-Term Memory"\n    ],\n    "Classification": [\n      "Logistic Regression",\n      "Linear Discriminant Analysis",\n      "Quadratic Discriminant Analysis",\n      "Support Vector Machines",\n      "Decision Tree Classifier",\n      "Random Forest Classifier",\n      "AdaBoost Classifier",\n      "Gradient Boosting Classifier",\n      "XGBoost Classifier",\n      "LightGBM Classifier",\n      "CatBoost Classifier",\n      "K-Nearest Neighbors",\n      "Naive Bayes Classifier",\n      "Artificial Neural Networks"\n    ],\n    "Clustering": [\n      "K-Mean

In [40]:
output = models_text
lines = output.split("\n")
for line_raw_case in lines:

    line=line_raw_case.lower()
    print(line)

{
  "structured data": {
    "regression": [
      "linear regression",
      "ridge regression",
      "lasso regression",
      "elastic net",
      "support vector regression",
      "decision tree regression",
      "random forest regression",
      "adaboost regression",
      "gradient boosting regression",
      "xgboost",
      "lightgbm",
      "catboost",
      "artificial neural networks",
      "long short-term memory"
    ],
    "classification": [
      "logistic regression",
      "linear discriminant analysis",
      "quadratic discriminant analysis",
      "support vector machines",
      "decision tree classifier",
      "random forest classifier",
      "adaboost classifier",
      "gradient boosting classifier",
      "xgboost classifier",
      "lightgbm classifier",
      "catboost classifier",
      "k-nearest neighbors",
      "naive bayes classifier",
      "artificial neural networks"
    ],
    "clustering": [
      "k-means clustering",
      "dbscan",
     

In [29]:
models={
    "numerical data": {
        "regression": ["linear regression", "ridge regression", "lasso regression", "support vector machines", "decision trees", "random forest", "gradient boosting", "adaboost", "neural network", "k-nearest neighbors"],
        "classification": ["logistic regression", "naive bayes", "support vector machines", "decision trees", "random forest", "gradient boosting", "adaboost", "k-nearest neighbors", "neural network"],
        "clustering": ["k-means", "hierarchical clustering", "dbscan", "spectral clustering", "mean-shift"],
        "dimensionality reduction": ["pca", "t-sne", "lda", "autoencoders", "umap"]
    },
    "categorical data": {
        "regression": ["ordinal regression", "poisson regression", "negative binomial regression"],
        "classification": ["logistic regression", "naive bayes", "support vector machines", "decision trees", "random forest", "gradient boosting", "adaboost", "k-nearest neighbors", "neural network"],
        "clustering": ["k-modes", "latent class analysis", "hierarchical clustering", "dbscan", "spectral clustering"],
        "association": ["apriori", "eclat"]
    },
    "text data": {
        "classification": ["naive bayes", "support vector machines", "decision trees", "random forest", "gradient boosting", "adaboost", "k-nearest neighbors", "neural network", "convolutional neural networks (cnn)", "recurrent neural networks (rnn)", "long short-term memory networks (lstm)", "transformers", "bert"],
        "clustering": ["k-means", "latent dirichlet allocation (lda)", "hierarchical clustering", "dbscan"],
        "information retrieval": ["tf-idf", "latent semantic indexing (lsi)", "topic modeling", "word2vec", "glove", "bert"],
        "sentiment analysis": ["naive bayes", "support vector machines", "lstm", "cnn", "bert", "transformers"]
    },
    "image data": {
        "classification": ["neural network", "convolutional neural networks (cnn)", "transformers", "resnet", "inception", "vgg", "xception", "mobilenet"],
        "object detection": ["r-cnn", "fast r-cnn", "faster r-cnn", "ssd", "yolo"],
        "segmentation": ["u-net", "mask r-cnn", "fcn", "segnet"],
        "image recognition": ["convolutional neural networks (cnn)", "transformers", "resnet", "inception", "vgg", "xception", "mobilenet"],
        "face recognition": ["eigenfaces", "fisherfaces", "local binary patterns histograms (lbph)", "deepface", "facenet", "openface"],
        "image synthesis": ["gan", "dcgan"]
    },
    "time-series data": {
        "regression": ["arima", "sarima", "state space models", "holt-winters", "prophet", "lstm"],
        "classification": ["neural network", "logistic regression", "k-nearest neighbors", "lstm"],
        "forecasting": ["arima", "sarima", "exponential smoothing", "state space models", "holt-winters", "prophet", "lstm"],
        "anomaly detection": ["arima", "sarima", "holt-winters", "prophet", "one-class svm", "isolation forest","autoencoder"]
    },
    "sound data": {
        "classification": ["neural network", "convolutional neural networks (cnn)", "recurrent neural networks (rnn)", "mel frequency cepstral coefficients (mfcc)", "lstm"],
        "speech recognition": ["hidden markov models (hmm)", "deep speech", "wavenet"],
        "sound synthesis": ["wavenet", "gan"],
        "sound segmentation": ["hidden markov models (hmm)", "neural network", "lstm"]
    }
}

['ObsidianGPT.cfg']

In [None]:
popular_ml_models = {
    "structured_data": {
        "classification": {
            "models": ["logistic regression", "decision trees", "random forest", "gradient boosting machines", "support vector machine", "xgboost", "lightgbm", "catboost", "neural networks"],
        },
        "regression": {
            "models": ["linear regression", "decision trees", "random forest", "gradient boosting machines", "support vector regression", "xgboost", "lightgbm", "catboost", "neural networks"],
        },
        "clustering": {
            "models": ["kmeans", "agglomerative clustering", "dbscan", "spectral clustering", "optics", "mean shift",],
        },
        "anomaly_detection": {
            "models": ["isolation forest", "one-class svm", "local outlier factor", "autoencoders"],
        }
    },
    "unstructured_data": {
        "text_data": {
            "classification": {
                "models": ["naive bayes", "svm", "random forest", "xgboost", "rnn", "cnn", "transformers(bert, gpt)", "fasttext"],
            },
            "clustering": {
                "models": ["kmeans", "agglomerative clustering", "dbscan", "latent dirichlet allocation"],
            },
            "sequence_prediction": {
                "models": ["rnn", "lstm", "gru", "transformers"],
            },
            "sentiment_analysis": {
                "models": ["naive bayes", "svm", "cnn", "rnn", "bert", "roberta", "xlnet"],
            },
        },
        "image_data": {
            "classification": {
                "models": ["cnn", "pretrained cnn (vgg16, resnet, inception, xception)", "capsule networks", "autoencoders"],
            },
            "object_detection": {
                "models": ["r-cnn", "fast r-cnn", "faster r-cnn", "ssd", "yolo", "mask r-cnn"],
            },
            "segmentation": {
                "models": ["u-net", "mask r-cnn", "fcn", "deeplab"],
            },
            "image_generation": {
                "models": ["gan", "dcgan", "wgan", "cgan", "stylegan"],
            },
        },
    },
    "time_series_data": {
        "forecasting": {
            "models": ["arima", "sarima", "prophet", "var", "ses", "holt-winters", "rnn", "lstm", "gru"],
        },
        "anomaly_detection": {
            "models": ["arima", "prophet", "isolation forest", "autoencoders", "lstm", "gru"],
        },
    },
    "reinforcement_learning": {
        "models": ["q-learning", "deep q-learning", "policy gradient", "actor-critic", "reinforce", "a3c", "a2c", "ppo", "ddpg", "td3", "sac"],
    },
}

## Clean model list

In [185]:
#save models dict to disk with current date
with open(f'models_{today.date()}.txt', 'w') as convert_file:
     convert_file.write(json.dumps(full_model_list))

In [None]:
# ChatGPT output is too variable, let's try again with our models extracted yesterday

In [18]:
# Load file from disk if necessary
filename = f'models_2023-09-05.txt'
with open(filename, 'r') as read_file:
    full_model_list = json.load(read_file)

In [19]:
full_model_list

[{'name': 'linear regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'polynomial regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'ridge regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'lasso regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'support vector regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'logistic regression',
  'model_type': 'classification models',
  'data_type': 'numerical data'},
 {'name': 'k-nearest neighbors',
  'model_type': 'classification models',
  'data_type': 'numerical data'},
 {'name': 'support vector machines',
  'model_type': 'classification models',
  'data_type': 'numerical data'},
 {'name': 'decision trees',
  'model_type': 'classification models',
  'data_type': 'numerical data'},
 {'name': 'random forest',
  'model_type': 'class

## Add data to existing models

In [177]:
print(today.date())

2023-09-05


In [34]:
# A bit of history 
# On the 2023-05-05, the cell below cost 4$ in GPT4 API calls, and took 4 hours to execute
# On the 2023-09-05,ITS BROKEN. So the cell below cost 0.51$ in GPT4 API calls, and took 19 minutes to execute
# On the 2023-09-06, ITS BROKEN, does not create an entry for each model. It bunches them up in one entry under model type. The cell below cost 0.82$ in GPT4 API calls, and took 24 minutes to execute
# On the 2023-09-06, it works again! The cell below cost 3.5$ in GPT4 API calls, and took 1h24 minutes to execute


In [20]:
%%time
models_with_details=full_model_list.copy()
for model in tqdm(models_with_details):
    resource_prompt = f"""
    For the {model['name']} model with {model['data_type']} , provide:
    1. A short description of the model.
    2. A list of the pros and cons of the model.
    3. The three most relevant use cases.
    4. Three great resources with relevant internet links for implementing the model.
    5. The top 5 people with the most expertise relative to this model, with a link to their github or linkedin page
    """
    resources = generate_text(resource_prompt, GPTmodel="gpt-4-0314")
    # If debugging, uncomment line below
    print(resources)
    
    # Specify the file path where you want to create the text file
    file_path = f"{model['name']}_{model['data_type']}.txt"
    
    # Call the function to write the variable to the file
    write_variable_to_text_file(resources, file_path)

  1%|▊                                                        | 1/74 [00:55<1:07:40, 55.63s/it]

1. Description:
Linear regression is a fundamental type of predictive analysis which is used to understand the relationship between two or more variables. The variable we want to predict is called the dependent variable (or sometimes, the outcome, target). The variables we use to predict the value of the dependent variable are called the independent variables (or sometimes, the predictors, inputs, regressors).

2. Pros and Cons:
   Pros:
   - Simple to understand and interpret.
   - Little to no tuning required.
   - Fast to model and predict.
   - Works best with numerical continuous data.
   
   Cons:
   - It assumes linear relationship between independent and dependent variables.
   - It can be adversely affected by outliers.
   - Linear regression may over-simplify real-world problems by assuming a linear relationship among the variables.

3. Use Cases:
   - Predicting sales amount given advertising budgets in different venues (TV, Radio, Newspapers).
   - Predicting house prices g

  3%|█▌                                                       | 2/74 [01:54<1:09:09, 57.63s/it]

1. Description:
The Polynomial Regression model is a type of regression analysis in which the relationship between the independent variable x and the dependent variable y is modeled as an nth degree polynomial. It expands the linear model by adding extra predictors, obtained by raising each of the original predictors to a power. Polynomial regression fits a nonlinear relationship between the value of x and the corresponding conditional mean of y, denoted E(y |x).

2. Pros and Cons:

Pros:
- Polynomial regression models can fit a wider range of data than linear regression, as they can model relationships that change in direction.
- With high-degree polynomials, you can fit nearly any shape dataset.
- They're great for modeling curves and other complex data shapes.

Cons:
- Polynomial regression models can overfit data easily, leading to high-variance models that do not generalize well to future data.
- The addition of too many polynomial terms can greatly enlarge the amount of collinear

  3%|█▌                                                       | 2/74 [02:24<1:26:50, 72.37s/it]


KeyboardInterrupt: 

In [None]:
# Maintenant qu'on a écrit les fichiers sur le disque on peut en extraire plus proprement les infos

In [234]:
model

{'name': 'linear regression',
 'model_type': 'regression models',
 'data_type': 'numerical data',
 'resources': ' Useful Resources:',
 'python_code': '```',
 'description': '1. Description:',
 'pros_cons': ' Pros and Cons:',
 'top5': " Top 5 People:Due to privacy issues, it's not entirely ethical to provide direct links to individual profiles. As such, I don't have direct links to individuals' professional profiles per se. However, these are the top five individuals (Researchers or Professors) related to Factor Analysis:\n\n- Karl Jöreskog: Developer of LISREL, a software package used for structural equation modeling, which includes Factor Analysis.\n- Peter M Bentler: Developer of EQS, a structural equation model software package which includes factor analysis.\n- Robert Cudeck: Has published numerous papers on Factor Analysis.\n- Joop Hox: Wrote several publications on multilevel factor analysis.\n- Fan Zhang: Has written several academic papers on Factor Analysis.\n",
 'use_cases': 

In [233]:
models_with_details[0:1]

[{'name': 'linear regression',
  'model_type': 'regression models',
  'data_type': 'numerical data',
  'resources': ' Useful Resources:',
  'python_code': '```',
  'description': '1. Description:',
  'pros_cons': ' Pros and Cons:',
  'top5': " Top 5 People:Due to privacy issues, it's not entirely ethical to provide direct links to individual profiles. As such, I don't have direct links to individuals' professional profiles per se. However, these are the top five individuals (Researchers or Professors) related to Factor Analysis:\n\n- Karl Jöreskog: Developer of LISREL, a software package used for structural equation modeling, which includes Factor Analysis.\n- Peter M Bentler: Developer of EQS, a structural equation model software package which includes factor analysis.\n- Robert Cudeck: Has published numerous papers on Factor Analysis.\n- Joop Hox: Wrote several publications on multilevel factor analysis.\n- Fan Zhang: Has written several academic papers on Factor Analysis.\n",
  'use

## Save and/or reload the detailed models dictionnary

In [205]:
# first, let's save our dictionnary of enriched models to disk with the current date attached
with open(f'models_with_details_{datetime.datetime.now().date()}.txt', 'w') as convert_file:
     convert_file.write(json.dumps(models_with_details))

In [146]:
# Load file from disk if necessary
filename = f'models_with_details_{datetime.datetime.now().date()}.txt'
with open(filename, 'r') as read_file:
    models_with_details = json.load(read_file)

## Parsing the detailed models 

In [21]:
models_with_details

[{'name': 'linear regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'polynomial regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'ridge regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'lasso regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'support vector regression',
  'model_type': 'regression models',
  'data_type': 'numerical data'},
 {'name': 'logistic regression',
  'model_type': 'classification models',
  'data_type': 'numerical data'},
 {'name': 'k-nearest neighbors',
  'model_type': 'classification models',
  'data_type': 'numerical data'},
 {'name': 'support vector machines',
  'model_type': 'classification models',
  'data_type': 'numerical data'},
 {'name': 'decision trees',
  'model_type': 'classification models',
  'data_type': 'numerical data'},
 {'name': 'random forest',
  'model_type': 'class

In [207]:
models_with_details[0]

{'name': 'linear regression',
 'model_type': 'regression models',
 'data_type': 'numerical data',
 'resources': "1. Short Description:\nA linear regression model is a simple statistical tool used for predicting a quantitative response Y using a single or multiple predictor variable X. It predicts this output by fitting a linear equation to the observed data. The steps to perform the linear regression involve finding coefficients corresponding to the variables that minimize the difference between the predicted and true responses.\n\n2. Pros and Cons:\n\n   Pros:\n   - Simple to understand and interpret\n   - Requires less computational power\n   - Useful for findings relationships between features and output\n   \n   Cons:\n   - Assumes a linear relationship between variables\n   - Prone to overfitting and underfitting\n   - Sensitive to outliers\n\n3. Use Cases:\n   - Predicting house prices based on various attributes such as the number of rooms, location, area, etc.\n   - Predicting 

In [None]:
### Below is a bit too hackery, work in progress to make it more robust

In [208]:
models_with_details[0:1]

[{'name': 'linear regression',
  'model_type': 'regression models',
  'data_type': 'numerical data',
  'resources': "1. Short Description:\nA linear regression model is a simple statistical tool used for predicting a quantitative response Y using a single or multiple predictor variable X. It predicts this output by fitting a linear equation to the observed data. The steps to perform the linear regression involve finding coefficients corresponding to the variables that minimize the difference between the predicted and true responses.\n\n2. Pros and Cons:\n\n   Pros:\n   - Simple to understand and interpret\n   - Requires less computational power\n   - Useful for findings relationships between features and output\n   \n   Cons:\n   - Assumes a linear relationship between variables\n   - Prone to overfitting and underfitting\n   - Sensitive to outliers\n\n3. Use Cases:\n   - Predicting house prices based on various attributes such as the number of rooms, location, area, etc.\n   - Predict

In [226]:
%%time
for model in models_with_details[0:1]:
    print(model)
    output = resources
    parsed_output = {}
    keys = ["description", "pros_cons", "use_cases", "resources", "python_code", "top5"]

    lines = output.split("\n")
    current_key = ""
    
   
    # Print the parsed output
    for key, value in parsed_output.items():
        model[key]=value

{'name': 'linear regression', 'model_type': 'regression models', 'data_type': 'numerical data', 'resources': ' Useful Resources:', 'python_code': '```', 'description': '1. Description:', 'pros_cons': ' Pros and Cons:', 'top5': " Top 5 People:Due to privacy issues, it's not entirely ethical to provide direct links to individual profiles. As such, I don't have direct links to individuals' professional profiles per se. However, these are the top five individuals (Researchers or Professors) related to Factor Analysis:\n\n- Karl Jöreskog: Developer of LISREL, a software package used for structural equation modeling, which includes Factor Analysis.\n- Peter M Bentler: Developer of EQS, a structural equation model software package which includes factor analysis.\n- Robert Cudeck: Has published numerous papers on Factor Analysis.\n- Joop Hox: Wrote several publications on multilevel factor analysis.\n- Fan Zhang: Has written several academic papers on Factor Analysis.\n", 'use_cases': ' Releva

## Write to Obsidian Vault

In [200]:
# Iterate through the models list
for model in models_with_details:
    model_filename = f"{model['name']}.md"
    model_filepath = os.path.join(vault_path, model_filename)
    data_type = model['data_type']
    model_name = model['name']

    # Create the directory for the data type if it does not exist
    data_type_path = os.path.join(vault_path, data_type)
    if not os.path.exists(data_type_path):
        os.makedirs(data_type_path)

    # Create a file for the model and write its information
    file_name = f"{model_name}.md"
    file_path = os.path.join(data_type_path, file_name)
    with open(file_path, "w") as f:
        
        f.write(f"**Model Type:** {model['model_type']}\n")
        f.write(f"**Data Type:** {model['data_type']}\n\n")
        
        # Add additional information about the model if available
        if 'use_cases' in model:
            f.write("## Use Cases :\n\n")
            for use_case in model['use_cases']:
                f.write(use_case +"\n\n")
            f.write("\n")
        
        #f.write(f"**Description**:\n\n{model['description']}\n\n") # Description field is not correct, commented for now
        
        f.write(f"## Python code: \n{model['python_code']}\n\n")
        
        # Add additional information about the model if available
        if 'resources' in model:
            f.write("## Resources\n\n")
            for resource in model['resources']:
                f.write(resource +"\n")
            f.write("\n")

        # Add "See Also" section with links to related models
        f.write(f"**See Also**:\n\n")
        for other_model in models:
            if other_model['name'] != model['name'] and other_model['data_type'] == model['data_type']:
                f.write(f"- [[{other_model['name']}]]\n")


        # Add relevant tags with hierarchy. Strip special chars for clarity
        f.write(f"\n---\n")
        root_tag = model['data_type'].replace(' ', '').lower()
        leaf_tag = model['name'].replace('(', '').replace(')', '').replace(' ', '').lower()
        f.write(f"tags: #{root_tag}, #{root_tag}/{leaf_tag}\n")
