In [None]:
import pathlib
import weave
weave.use_frontend_devmode()
from weave.legacy.weave.ecosystem import langchain

In [None]:
neg_feedback = ["Is there a way that I can embed an iframe component in Weights and Biases report?",
                "how do i create the pretty sweeps plot?",
                "I'm getting error 400. What can I do?",
                "How can I move a column in a table to the right?",
                "In Prompts, how can I resize the Trace Timeline to make it bigger or full screen?",
                "I have a question about exporting CSV files from a web panel. Whenever I do this, I always get two extra columns for MAX and MIN values, even if I only have one data curve. Does anyone know how to solve this issue?",
                "artifacts cli command to upload a folder of images",
                "Is there a recommended way to use Launch in an SLURM environment?",
                "How to export a single chart's data using the API?",
                "how can i login with a different wandb user?"
]

pos_feedback = ["my logging doesn't seem to include errors when the training crashes, how do I change the logging level for wandb logging?",
                "how can i get the data from my wandb run by querying my logs using python?",
                "how do I fix an error with wandb Table construction from pandas dataframe: TypeError: Data row contained incompatible types",
                "how can i make a heatmap using vega and plot it to wandb? what is the vegaspec?",
                "is there a good way to join 2 tables together programmatically?",
                "I have a question about sweeps. How can you constrain relationship between parameters. For example, I now that if num_layers * hidden_dim is large, I'll run out of GPU memory. So, also I would like to explore some hyperparameter space, there are some combination I know will fail. optuna as a way to do that: you can throw an special exception to cancel a run during a sweep, so that it is not recorded. Is there something similar in W&B, or another way of pruning unwanted combination of hyperparameters?",
                "where can I find my run_id",
                "How do I group runs?",
                "I am using the Hugging Face trainer to train a GPT-2 model. How can I log in wandb the results of the model in each evaluation?",
                "I am looking to finetune LLAMA on my own dataset using OpenAI, can you give me examples on how to do this?"
               ]

all_qs = [
    {'question': q, 'feedback': 'positive'} for q in pos_feedback] + [
    {'question': q, 'feedback': 'negative'} for q in neg_feedback]

questions = weave.save(all_qs, 'eval_questions')

In [None]:
questions

In [None]:
from langchain.docstore.document import Document
from langchain.text_splitter import (
    MarkdownTextSplitter,
    PythonCodeTextSplitter,
    TokenTextSplitter,
)
# Get markdown files from our docs repo

# Checkout of our docs repo: https://github.com/wandb/docodile/
DOC_DIR = '/Users/shawn/code2/docodile'
DOC_SUFFIX = '.md'

docs = []
for file in pathlib.Path(DOC_DIR).glob('**/*' + DOC_SUFFIX):
    with file.open('r') as f:
        # store them as langchain Document objects
        docs.append(Document(page_content=f.read(), metadata={'path': file.name}))
docs = MarkdownTextSplitter().split_documents(docs)
docs = TokenTextSplitter().split_documents(docs)

docs = weave.save(docs, 'wandb-docs')
docs

In [None]:
# Vector store with langchain
from langchain.vectorstores import VectorStore, FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(weave.use(docs), embeddings)

weave.save(vector_store)

In [None]:
# Vector store with Weave

# Stuff you can do:
#   - .similarity_search(<query>)
#     - but currently there is a crash because Row.Group tries to render and fails. Need to switch the
#       panel to Table and then change column to row.__getattr__('page_content')
#   - .document_embeddings
#     - this gets the embeddings out of FAISS, and also performs FAISS' k-means with 20 clusters
#     - switch to projection.plot
# TODO:
#   - give control over k for k-means

from weave.legacy.weave.ecosystem import langchain

vector_store_node = langchain.faiss_from_documents(docs, langchain.openai_embeddings())
vector_store_node

In [None]:
# With langchain
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
model_gpt_35_temp07 = RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.7),
        chain_type='stuff',
        retriever=vector_store.as_retriever()
    )
model = model_gpt_35_temp07
model = weave.save(model_gpt_35_temp07, 'mymodel')

#model.run('hello')

In [None]:
# With weave
from weave.legacy.weave.ecosystem import langchain
qa = langchain.retrieval_qa_from_chain_type(
    langchain.chat_openai('gpt-3.5-turbo', 0.7),
    'stuff',
    vector_store_node)
qa.run('hello')

In [None]:
# from langchain.chat_models import ChatOpenAI
# from langchain.chains import RetrievalQA
model_02 = RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.2),
        chain_type='stuff',
        retriever=vector_store.as_retriever()
    )
model_07 = RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.7),
        chain_type='stuff',
        retriever=vector_store.as_retriever()
    )

In [None]:
# Two with LC and weave.save

models = [model_02, model_07]
models = weave.save(models, 'docbot-models')
models.run('hello')

In [None]:
# You can map over questions
weave.legacy.weave.panels.Table(questions.limit(3), columns=[
    lambda q: q['question'],
    lambda q: models.run(q['question'])])

In [None]:
# Interactive evaluation and exploration dashboard

weave.legacy.weave.panels.Board(
    vars={
        'documents': docs,
        'questions': questions.limit(2),
        'embeddings': langchain.openai_embeddings(),
        'vector_store': lambda embeddings, documents: langchain.faiss_from_documents(documents, embeddings),
        'doc_embeddings': lambda vector_store: vector_store.document_embeddings(),
        #'models': models,
        'model_a': lambda vector_store: langchain.retrieval_qa_from_chain_type(
            langchain.chat_openai('gpt-3.5-turbo', 0.2),
            'stuff',
            vector_store),
        'model_b': lambda vector_store: langchain.retrieval_qa_from_chain_type(
            langchain.chat_openai('gpt-3.5-turbo', 0.7),
            'stuff',
            vector_store),
        'models': lambda model_a, model_b: weave.legacy.weave.ops.make_list(a=model_a, b=model_b),
        'projection': lambda doc_embeddings: doc_embeddings.projection2D(
                                                                  'pca',
                                                                  'single',
                                                                  ['embedding'],
                                                                  {'pca': {},
                                                                   'tsne': {
                                                                       'perplexity': 30,
                                                                       'learningRate': 10,
                                                                       'iterations': 25
                                                                   },
                                                                   'umap': {
                                                                       'neighbors': 15,
                                                                       'minDist': 0.1,
                                                                       'spread': 1.0
                                                                   }
                                                                  }),
    },
    panels=[    
        weave.legacy.weave.panels.BoardPanel(
            lambda models: weave.legacy.weave.panels.Each(models.run("What is Weave?")),
            layout=weave.legacy.weave.panels.BoardPanelLayout(x=0, y=0, w=24, h=6)
        ),
        weave.legacy.weave.panels.BoardPanel(
            lambda model_a, model_b: weave.legacy.weave.panels.Table(questions,
                                              columns=[
                                                  lambda question: question['question'],
                                                  lambda question: question['feedback'],
                                                  weave.legacy.weave.panels.TableColumn(
                                                      lambda question: model_a.run(question['question']).result,
                                                      name='model_a'
                                                  ),
                                                  weave.legacy.weave.panels.TableColumn(
                                                      lambda question: model_b.run(question['question']).result,
                                                      name='model_b'
                                                  ),
                                              ]),
            layout=weave.legacy.weave.panels.BoardPanelLayout(x=0, y=6, w=24, h=6)
        ),       
        weave.legacy.weave.panels.BoardPanel(
            id='docs_projection',
            panel=lambda projection: weave.legacy.weave.panels.Plot(
                projection,
                x=lambda row: row['projection.x'],
                y=lambda row: row['projection.y'],
                color=lambda row: row['source.cluster']
            ),
            layout=weave.legacy.weave.panels.BoardPanelLayout(x=0, y=12, w=12, h=6)
        ),
        weave.legacy.weave.panels.BoardPanel(
            lambda docs_projection: docs_projection.selected_data(),
            layout=weave.legacy.weave.panels.BoardPanelLayout(x=12, y=12, w=12, h=6)
        ),
        weave.legacy.weave.panels.BoardPanel(
            lambda documents: weave.legacy.weave.panels.Table(documents,
                                                 columns=[
                                                     lambda doc: doc.page_content,
                                                     lambda doc: doc.metadata['path']
                                                 ]),
            layout=weave.legacy.weave.panels.BoardPanelLayout(x=0, y=18, w=12, h=6)
        ),
        weave.legacy.weave.panels.BoardPanel(
            lambda vector_store: weave.legacy.weave.panels.Table(vector_store.similarity_search('weave'),
                                                    columns=[
                                                     lambda doc: doc.page_content,
                                                     lambda doc: doc.metadata['path']  
                                                    ]),
            layout=weave.legacy.weave.panels.BoardPanelLayout(x=12, y=18, w=12, h=6)
        ),
    ]
)