In [20]:

# Load a Llama-3-8B instruct 
import transformers
import sys
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
#import chromadb
#from chromadb.config import Settings
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

time_start = time()
model_config = transformers.AutoConfig.from_pretrained(
   model_id,
    trust_remote_code=True,
    #max_new_tokens=1024
)
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
time_end = time()
print(f"Prepare model, tokenizer: {round(time_end-time_start, 3)} sec.")




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Prepare model, tokenizer: 6.284 sec.


In [24]:
# Create a query pipeline using transformers and test it!
time_start = time()
query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        #max_length=1024,
        device_map="auto",)
time_end = time()
print(f"Prepare pipeline: {round(time_end-time_start, 3)} sec.")


# Test the model and pipeline 
def test_model(tokenizer, pipeline, message):
    """
    Perform a query
    print the result
    Args:
        tokenizer: the tokenizer
        pipeline: the pipeline
        message: the prompt
    Returns
        None
    """    
    time_start = time()
    sequences = pipeline(
        message,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,)
    time_end = time()
    total_time = f"{round(time_end-time_start, 3)} sec."
    
    question = sequences[0]['generated_text'][:len(message)]
    answer = sequences[0]['generated_text'][len(message):]
    
    return f"Question: {question}\nAnswer: {answer}\nTotal time: {total_time}"

from IPython.display import display, Markdown
def colorize_text(text):
    for word, color in zip(["Reasoning", "Question", "Answer", "Total time"], ["blue", "red", "green", "magenta"]):
        text = text.replace(f"{word}:", f"\n\n**<font color='{color}'>{word}:</font>**")
    return text

response = test_model(tokenizer,
                    query_pipeline,
                   "Please explain what is EU AI Act.")
display(Markdown(colorize_text(response)))



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Prepare pipeline: 0.0 sec.




**<font color='red'>Question:</font>** Please explain what is EU AI Act.


**<font color='green'>Answer:</font>**  The EU AI Act is a proposed regulation aimed at regulating the development, deployment, and use of artificial intelligence (AI) in the European Union. The regulation is intended to ensure that AI systems are developed and used in a way that is safe, transparent, and trustworthy.

The EU AI Act proposes a number of measures to achieve these goals, including:

1. Mandatory risk assessments: AI developers would be required to conduct risk assessments to identify potential risks associated with their AI systems.
2. Transparency requirements: AI systems would be required to provide clear and understandable information about their decision-making processes and the data they use.
3. Safety and security requirements: AI systems would be required to meet certain safety and security standards, such as ensuring that they do not pose a risk to human life or national security.
4. Liability and accountability: The regulation would establish clear liability and accountability mechanisms for AI developers and users in the event of accidents or harm caused by AI systems.
5


**<font color='magenta'>Total time:</font>** 7.84 sec.

In [19]:
# Create and test the HF pipeline
# checking again that everything is working fine

query_pipeline_hf = HuggingFacePipeline(pipeline=query_pipeline)

time_start = time()
question = "Please explain what EU AI Act is."
response = query_pipeline_hf(prompt=question)
time_end = time()
total_time = f"{round(time_end-time_start, 3)} sec."
full_response =  f"Question: {question}\nAnswer: {response}\nTotal time: {total_time}"
display(Markdown(colorize_text(full_response)))


ValidationError: 1 validation error for HuggingFacePipeline
max_new_tokens
  extra fields not permitted (type=value_error.extra)

In [5]:
# Create a database of knowledge with ChromaDB and PyPDF Loader 
loader = PyPDFLoader("./aiact_final_draft.pdf")
documents = loader.load()

# Chunk overlapping is required in order to be able to keep the context, even if we have a concept that we want to include that is spread over multiple document chunks.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
all_splits = text_splitter.split_documents(documents)

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
# try to access the sentence transformers from HuggingFace: https://huggingface.co/api/models/sentence-transformers/all-mpnet-base-v2
try:
    embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
except Exception as ex:
    print("Exception: ", ex)
    # alternatively, we will access the embeddings models locally
    local_model_path = "/kaggle/input/sentence-transformers/minilm-l6-v2/all-MiniLM-L6-v2"
    print(f"Use alternative (local) model: {local_model_path}\n")

    embeddings = HuggingFaceEmbeddings(model_name=local_model_path, model_kwargs=model_kwargs)
    
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")

  warn_deprecated(


In [6]:
# Create and test a Retrieval QA chain with with plain code and test it
retriever = vectordb.as_retriever()

qa_chain = RetrievalQA.from_chain_type(
    llm=query_pipeline_hf, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

def test_rag(qa_chain, query):
    time_start = time()
    response = qa_chain.run(query)
    time_end = time()
    total_time = f"{round(time_end-time_start, 3)} sec."

    full_response =  f"Question: {query}\nAnswer: {response}\nTotal time: {total_time}"
    display(Markdown(colorize_text(full_response)))
    
query = "How is performed the testing of high-risk AI systems in real world conditions?"
test_rag(qa_chain, query)

  warn_deprecated(
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m




**<font color='red'>Question:</font>** How is performed the testing of high-risk AI systems in real world conditions?


**<font color='green'>Answer:</font>** Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

interaction between the AI system and the environment within which it operates. The 
risk-management system should adopt the most appropriate risk-management measures 
in light of the state of the art in AI. When identifying the most appropriate risk-
management measures, the provider should document and explain the choices made 
and, when relevant, involve experts and external stakeholders. In identifying the 
reasonably foreseeable misuse of high-risk AI systems, the provider should cover uses of 
AI systems which, while not directly covered by the intended purpose and provided for in 
the instruction for use may nevertheless be reasonably expected to result from readily 
predictable human behaviour in the context of the specific characteristics and use of a 
particular AI system.

interaction between the AI system and the environment within which it operates. The 
risk-management system should adopt the most appropriate risk-management measures 
in light of the state of the art in AI. When identifying the most appropriate risk-
management measures, the provider should document and explain the choices made 
and, when relevant, involve experts and external stakeholders. In identifying the 
reasonably foreseeable misuse of high-risk AI systems, the provider should cover uses of 
AI systems which, while not directly covered by the intended purpose and provided for in 
the instruction for use may nevertheless be reasonably expected to result from readily 
predictable human behaviour in the context of the specific characteristics and use of a 
particular AI system.

interaction between the AI system and the environment within which it operates. The 
risk-management system should adopt the most appropriate risk-management measures 
in light of the state of the art in AI. When identifying the most appropriate risk-
management measures, the provider should document and explain the choices made 
and, when relevant, involve experts and external stakeholders. In identifying the 
reasonably foreseeable misuse of high-risk AI systems, the provider should cover uses of 
AI systems which, while not directly covered by the intended purpose and provided for in 
the instruction for use may nevertheless be reasonably expected to result from readily 
predictable human behaviour in the context of the specific characteristics and use of a 
particular AI system.

ENUnited in diversityENANNEX IX
Information to be submitted upon the registration of high-risk AI systems listed in Annex III in 
relation to testing in real world conditions in accordance with Article 60
The following information shall be provided and thereafter kept up to date with regard to testing 
in real world conditions to be registered in accordance with Article 60:
1. A Union-wide unique single identification number of the testing in real world 
conditions;
2. The name and contact details of the provider or prospective provider and of the 
deployers involved in the testing in real world conditions;
3. A brief description of the AI system, its intended purpose, and other information 
necessary for the identification of the system;
4. A summary of the main characteristics of the plan for testing in real world conditions;
5. Information on the suspension or termination of the testing in real world conditions.



**<font color='red'>Question:</font>** How is performed the testing of high-risk AI systems in real world conditions?
Helpful 

**<font color='green'>Answer:</font>** The testing of high-risk AI systems in real world conditions is performed by the provider or prospective provider and the deployers involved in the testing. The provider or prospective provider and the deployers must provide information such as the name and contact details, a brief description of the AI system, its intended purpose, and other necessary information for the identification of the system. The provider or prospective provider and the deployers must also provide a summary of the main characteristics of the plan for testing in real world conditions and information on the suspension or termination of the testing in real world conditions. The testing in real world conditions must be registered in accordance with Article 60.


**<font color='magenta'>Total time:</font>** 5.165 sec.

In [7]:
query = "What are the operational obligations of notified bodies?"
test_rag(qa_chain, query)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m




**<font color='red'>Question:</font>** What are the operational obligations of notified bodies?


**<font color='green'>Answer:</font>** Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

ENUnited in diversityEN5. Where objections are raised, the Commission shall, without delay, enter into 
consultations with the relevant Member States and the conformity assessment body. In 
view thereof, the Commission shall decide whether the authorisation is justified. The 
Commission shall address its decision to the Member State concerned and to the relevant 
conformity assessment body.
▌
Article 31
Requirements relating to notified bodies
1.A notified body shall be established under the national law of a Member State and shall 
have legal personality.
2. Notified bodies shall satisfy the organisational, quality management, resources and process 
requirements that are necessary to fulfil their tasks, as well as suitable cybersecurity 
requirements.
3. The organisational structure, allocation of responsibilities, reporting lines and operation of 
notified bodies shall ensure confidence in their performance, and in the results of the

ENUnited in diversityEN5. Where objections are raised, the Commission shall, without delay, enter into 
consultations with the relevant Member States and the conformity assessment body. In 
view thereof, the Commission shall decide whether the authorisation is justified. The 
Commission shall address its decision to the Member State concerned and to the relevant 
conformity assessment body.
▌
Article 31
Requirements relating to notified bodies
1.A notified body shall be established under the national law of a Member State and shall 
have legal personality.
2. Notified bodies shall satisfy the organisational, quality management, resources and process 
requirements that are necessary to fulfil their tasks, as well as suitable cybersecurity 
requirements.
3. The organisational structure, allocation of responsibilities, reporting lines and operation of 
notified bodies shall ensure confidence in their performance, and in the results of the

ENUnited in diversityEN5. Where objections are raised, the Commission shall, without delay, enter into 
consultations with the relevant Member States and the conformity assessment body. In 
view thereof, the Commission shall decide whether the authorisation is justified. The 
Commission shall address its decision to the Member State concerned and to the relevant 
conformity assessment body.
▌
Article 31
Requirements relating to notified bodies
1.A notified body shall be established under the national law of a Member State and shall 
have legal personality.
2. Notified bodies shall satisfy the organisational, quality management, resources and process 
requirements that are necessary to fulfil their tasks, as well as suitable cybersecurity 
requirements.
3. The organisational structure, allocation of responsibilities, reporting lines and operation of 
notified bodies shall ensure confidence in their performance, and in the results of the

ENUnited in diversityENArticle 45
Information obligations of notified bodies
1. Notified bodies shall inform the notifying authority of the following: 
(a) any Union technical documentation assessment certificates, any supplements to those 
certificates, and any quality management system approvals issued in accordance with 
the requirements of Annex VII;
(b) any refusal, restriction, suspension or withdrawal of a Union technical 
documentation assessment certificate or a quality management system approval 
issued in accordance with the requirements of Annex VII;
(c) any circumstances affecting the scope of or conditions for notification;
(d) any request for information which they have received from market surveillance 
authorities regarding conformity assessment activities;
(e) on request, conformity assessment activities performed within the scope of their 
notification and any other activity performed, including cross-border activities and 
subcontracting.



**<font color='red'>Question:</font>** What are the operational obligations of notified bodies?
Helpful 

**<font color='green'>Answer:</font>** According to Article 31 of the EU Regulation, notified bodies must satisfy organisational, quality management, resources and process requirements that are necessary to fulfil their tasks, as well as suitable cybersecurity requirements. Additionally, Article 45 outlines their information obligations, such as informing the notifying authority of any changes, refusals, or withdrawals of certifications, and providing information on conformity assessment activities. However, the operational obligations of notified bodies are not explicitly stated in the provided context. If you need further information, please let me know!ENUnited in diversityEN5. Where objections are raised, the Commission shall, without delay, enter into 
consultations with the relevant Member States and the conformity assessment body. In 
view thereof, the Commission shall decide whether the authorisation is justified. The 
Commission shall address its decision to the Member State concerned and to the relevant 
conformity assessment body.
▌
Article 31
Requirements relating to notified bodies
1.A notified body shall be established under the national law of a Member State and shall 
have legal personality.
2. Notified bodies shall satisfy the organisational, quality management, resources and process 
requirements that are necessary to fulfil their tasks, as well as suitable cybersecurity 
requirements.
3. The organisational structure, allocation of responsibilities, reporting lines and operation of 
notified bodies shall ensure confidence


**<font color='magenta'>Total time:</font>** 10.622 sec.

In [8]:
# Check the document sources for the last query
docs = vectordb.similarity_search(query)
print(f"Query: {query}")
print(f"Retrieved documents: {len(docs)}")
for doc in docs:
    doc_details = doc.to_json()['kwargs']
    print("Source: ", doc_details['metadata']['source'])
    print("Text: ", doc_details['page_content'], "\n")

Query: What are the operational obligations of notified bodies?
Retrieved documents: 4
Source:  ./aiact_final_draft.pdf
Text:  ENUnited in diversityEN5. Where objections are raised, the Commission shall, without delay, enter into 
consultations with the relevant Member States and the conformity assessment body. In 
view thereof, the Commission shall decide whether the authorisation is justified. The 
Commission shall address its decision to the Member State concerned and to the relevant 
conformity assessment body.
▌
Article 31
Requirements relating to notified bodies
1.A notified body shall be established under the national law of a Member State and shall 
have legal personality.
2. Notified bodies shall satisfy the organisational, quality management, resources and process 
requirements that are necessary to fulfil their tasks, as well as suitable cybersecurity 
requirements.
3. The organisational structure, allocation of responsibilities, reporting lines and operation of 
notified bo

### Tracing (optional)

In [26]:
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = "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"

In [None]:

# Load a Llama-3-8B instruct 
import transformers
import sys
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
#import chromadb
#from chromadb.config import Settings
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

time_start = time()
model_config = transformers.AutoConfig.from_pretrained(
   model_id,
    trust_remote_code=True,
    #max_new_tokens=1024
)
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
time_end = time()
print(f"Prepare model, tokenizer: {round(time_end-time_start, 3)} sec.")




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Prepare model, tokenizer: 6.284 sec.


In [None]:
# Create a query pipeline using transformers and test it!
time_start = time()
query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        #max_length=1024,
        device_map="auto",)
time_end = time()
print(f"Prepare pipeline: {round(time_end-time_start, 3)} sec.")


# Test the model and pipeline 
def test_model(tokenizer, pipeline, message):
    """
    Perform a query
    print the result
    Args:
        tokenizer: the tokenizer
        pipeline: the pipeline
        message: the prompt
    Returns
        None
    """    
    time_start = time()
    sequences = pipeline(
        message,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,)
    time_end = time()
    total_time = f"{round(time_end-time_start, 3)} sec."
    
    question = sequences[0]['generated_text'][:len(message)]
    answer = sequences[0]['generated_text'][len(message):]
    
    return f"Question: {question}\nAnswer: {answer}\nTotal time: {total_time}"

from IPython.display import display, Markdown
def colorize_text(text):
    for word, color in zip(["Reasoning", "Question", "Answer", "Total time"], ["blue", "red", "green", "magenta"]):
        text = text.replace(f"{word}:", f"\n\n**<font color='{color}'>{word}:</font>**")
    return text

response = test_model(tokenizer,
                    query_pipeline,
                   "Please explain what is EU AI Act.")
display(Markdown(colorize_text(response)))



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Prepare pipeline: 0.0 sec.




**<font color='red'>Question:</font>** Please explain what is EU AI Act.


**<font color='green'>Answer:</font>**  The EU AI Act is a proposed regulation aimed at regulating the development, deployment, and use of artificial intelligence (AI) in the European Union. The regulation is intended to ensure that AI systems are developed and used in a way that is safe, transparent, and trustworthy.

The EU AI Act proposes a number of measures to achieve these goals, including:

1. Mandatory risk assessments: AI developers would be required to conduct risk assessments to identify potential risks associated with their AI systems.
2. Transparency requirements: AI systems would be required to provide clear and understandable information about their decision-making processes and the data they use.
3. Safety and security requirements: AI systems would be required to meet certain safety and security standards, such as ensuring that they do not pose a risk to human life or national security.
4. Liability and accountability: The regulation would establish clear liability and accountability mechanisms for AI developers and users in the event of accidents or harm caused by AI systems.
5


**<font color='magenta'>Total time:</font>** 7.84 sec.