In [1]:
!huggingface-cli login --token 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

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/willem/.cache/huggingface/token
Login successful


In [1]:
import torch
import mlflow
import pandas as pd
from huggingface_hub import login
from transformers import AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, pipeline, AutoModelForCausalLM
import transformers
from torch import cuda, bfloat16
from langchain import hub
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# # login to hugginface (~/.cache/huggingface/token)
# login()

# Track to local MLflow tracking server
mlflow.set_tracking_uri("http://localhost:5000")

# Create a new experiment that the model and the traces will be logged to
mlflow.set_experiment("StableLM Tracing")

# Enable LangChain autologging
mlflow.langchain.autolog(log_models=True, log_input_examples=True)

###############################################################################
# Set up embeddings for later retrieval
###############################################################################
df = pd.read_csv("test.csv")

# Postprocessing
# Concatenate description & subtitles
df["info_to_embed"] = df["mediacontent_page_description"] # + " " + df["subtitle"]
df = df[df.info_to_embed.notnull()]
df= df[["mediacontent_page_description","mediacontent_pagetitle_program","info_to_embed","mediacontent_pageid"]]

loader = DataFrameLoader(df, page_content_column="info_to_embed")
catalog = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
catalog_chunks = text_splitter.split_documents(catalog)

model_name = "NetherlandsForensicInstitute/robbert-2022-dutch-sentence-transformers"
encode_kwargs = {'normalize_embeddings': False}
hf_embedding = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda:0'},
    encode_kwargs=encode_kwargs
)
vector_store = FAISS.from_documents(documents=catalog_chunks,
                                    embedding=hf_embedding)


###############################################################################
# Set up embeddings for later retrieval
###############################################################################
retriever = vector_store.as_retriever(search_kwargs={'k': 1})
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

###############################################################################
# Make Chain
###############################################################################
model_id= "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id)

pipe = transformers.pipeline(
        model=model_id, tokenizer=tokenizer,
        torch_dtype=torch.float16,
        return_full_text=True,  # langchain expects the full text(set to True when using Langchain)
        task='text-generation', # LLM task
        # we pass model parameters here too
        device=0,
        temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
        top_k=10,
        num_return_sequences=1,
        repetition_penalty=1.5,
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=256, 
    )

llm = HuggingFacePipeline(pipeline=pipe)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("Ik wil een programma over het leger zien met soldaten")


  from .autonotebook import tqdm as notebook_tqdm
2024/07/27 23:25:23 INFO mlflow.langchain._langchain_autolog: MLflow autologging does not support logging models containing BaseRetriever because logging the model requires `loader_fn` and `persist_dir`. Please log the model manually using `mlflow.langchain.log_model(model, artifact_path, loader_fn=..., persist_dir=...)`


"Human: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: Ik wil een programma over het leger zien met soldaten \nContext: Adjudant Eric Vroonen en sergeant Noémie Ghysselings werken in Kandahar als verpleger in het Green Skander veldhospitaal. In dit hightech medisch complex komen talloze Amerikaanse en Afghaanse slachtoffers binnen, en Eric en Noémie kunnen er enorm veel ervaring opdoen. Al zorgt een raketaanval soms voor ongewilde onderbrekingen. In Kunduz gaat Bas op stap met een TACP-team (Tactical Air Control Party). Er staan militaire oefeningen op het programma, want de unit moet op scherp staan. Bas mag de soldaten ook verblijden met postpakketten uit het vaderland, vooraleer de volgende missie tactisch voorbereid wordt. Een missie waarbij ook het camerateam mee op pad mag. \nAnswe