## Experimenting with Typescript def generation

**Status**
1. Bumping into issues getting correct schema for properties defined by Span.attribute_property(), likely because they are not initialized at the start.
2. Otherwise, this notebook should provide a scaffolding for generating typescript types.
3. The types generated is quite repetitive - the schema doesn't understand models extending each other haha.


To get this working, install https://www.npmjs.com/package/json-schema-to-typescript using `npm i -g json-schema-to-typescript` 

In [None]:
import sys

PATH_TO_TRULENS="../../../../trulens"
sys.path.append(PATH_TO_TRULENS + "/trulens_eval")

import json

import opentelemetry.trace.span as ot_span
import opentelemetry.trace as ot_trace

from trulens_eval.trace import OTSpan
from trulens_eval.trace import WithHashableSpanContext
from trulens_eval.trace.span import Span
from trulens_eval.trace.span import SpanAgent
from trulens_eval.trace.span import SpanEmbedding
from trulens_eval.trace.span import SpanLLM
from trulens_eval.trace.span import SpanMemory
from trulens_eval.trace.span import SpanMethodCall
from trulens_eval.trace.span import SpanOther
from trulens_eval.trace.span import SpanReranker
from trulens_eval.trace.span import SpanRetriever
from trulens_eval.trace.span import SpanRoot
from trulens_eval.trace.span import SpanTask
from trulens_eval.trace.span import SpanTool
from trulens_eval.trace.span import SpanType
from trulens_eval.trace.span import SpanTyped
from trulens_eval.trace.span import SpanUntyped
from trulens_eval.trace.span import TransSpanRecord
from trulens_eval.trace.span import TransSpanRecordAppCall

In [None]:
# ! pip install -U pydantic opentelemetry-api opentelemetry-sdk

In [None]:
from pydantic import TypeAdapter
TypeAdapter(ot_span.SpanContext).core_schema

In [None]:
# Reference: https://github.com/phillipdupuis/pydantic-to-typescript/tree/master

import argparse
import importlib
import inspect
import json
import logging
import os
import shutil
import sys
from importlib.util import module_from_spec, spec_from_file_location
from tempfile import mkdtemp
from types import ModuleType
from typing import Any, Dict, List, Tuple, Type
from uuid import uuid4

from pydantic import BaseModel, Extra, create_model

try:
    from pydantic.generics import GenericModel
except ImportError:
    GenericModel = None

logger = logging.getLogger("pydantic2ts")


def import_module(path: str) -> ModuleType:
    """
    Helper which allows modules to be specified by either dotted path notation or by filepath.

    If we import by filepath, we must also assign a name to it and add it to sys.modules BEFORE
    calling 'spec.loader.exec_module' because there is code in pydantic which requires that the
    definition exist in sys.modules under that name.
    """
    try:
        if os.path.exists(path):
            name = uuid4().hex
            spec = spec_from_file_location(name, path, submodule_search_locations=[])
            module = module_from_spec(spec)
            sys.modules[name] = module
            spec.loader.exec_module(module)
            return module
        else:
            return importlib.import_module(path)
    except Exception as e:
        logger.error(
            "The --module argument must be a module path separated by dots or a valid filepath"
        )
        raise e


def is_submodule(obj, module_name: str) -> bool:
    """
    Return true if an object is a submodule
    """
    return inspect.ismodule(obj) and getattr(obj, "__name__", "").startswith(
        f"{module_name}."
    )


def is_concrete_pydantic_model(obj) -> bool:
    """
    Return true if an object is a concrete subclass of pydantic's BaseModel.
    'concrete' meaning that it's not a GenericModel.
    """
    if not inspect.isclass(obj):
        return False
    elif obj is BaseModel:
        return False
    elif GenericModel and issubclass(obj, GenericModel):
        return not inspect.isabstract(obj) # NOTE: This line was changed to make use of the isabstract function rather than checking for the obsolete __concrete__ attribute
    else:
        return issubclass(obj, BaseModel)


def extract_pydantic_models(module: ModuleType) -> List[Type[BaseModel]]:
    """
    Given a module, return a list of the pydantic models contained within it.
    """
    models = []
    module_name = module.__name__

    for _, model in inspect.getmembers(module, is_concrete_pydantic_model):
        models.append(model)

    for _, submodule in inspect.getmembers(
        module, lambda obj: is_submodule(obj, module_name)
    ):
        models.extend(extract_pydantic_models(submodule))

    return models
    
def clean_schema(schema: Dict[str, Any]) -> None:
    """
    Clean up the resulting JSON schemas by:

    1) Removing titles from JSON schema properties.
       If we don't do this, each property will have its own interface in the
       resulting typescript file (which is a LOT of unnecessary noise).
    2) Getting rid of the useless "An enumeration." description applied to Enums
       which don't have a docstring.
    """
    for prop in schema.get("properties", {}).values():
        prop.pop("title", None)

    if "enum" in schema and schema.get("description") == "An enumeration.":
        del schema["description"]


def generate_json_schema(models: List[Type[BaseModel]]) -> str:
    """
    Create a top-level '_Master_' model with references to each of the actual models.
    Generate the schema for this model, which will include the schemas for all the
    nested models. Then clean up the schema.

    One weird thing we do is we temporarily override the 'extra' setting in models,
    changing it to 'forbid' UNLESS it was explicitly set to 'allow'. This prevents
    '[k: string]: any' from being added to every interface. This change is reverted
    once the schema has been generated.
    """

    # TODO: replace master model approach with definitions schema
    # https://docs.pydantic.dev/latest/api/pydantic_core_schema/#pydantic_core.core_schema.definitions_schema
    master_model = create_model(
        "_Master_", **{m.__name__: (m, ...) for m in models}
    )

    schema = master_model.model_json_schema(mode='serialization')

    for d in schema.get("$defs", {}).values():
        clean_schema(d)

    return schema

In [None]:
module = import_module("trulens_eval.trace.span")
models = extract_pydantic_models(module)

In [None]:
with open(f'{PATH_TO_TRULENS}/trulens_eval/trulens_eval/react_components/record_viewer/src/schema/schema.json', 'w+') as f:
    json.dump(generate_json_schema(models), f, indent=2)


Now, you can run `json2ts -i <path to schema> -o <output_path>`