# Quadrant VS

### Packages

In [2]:
import pandas as pd
import numpy as np
from llama_index.core import Document, Settings
#from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.embeddings.ollama import OllamaEmbedding

from llama_index.core import VectorStoreIndex, QueryBundle, Response, Settings
from llama_index.embeddings.ollama import OllamaEmbedding
#from llama_index.llms.ollama import Ollama


from sentence_transformers import SentenceTransformer
from elasticsearch import Elasticsearch

  from tqdm.autonotebook import tqdm, trange


In [3]:
from llama_index.llms.ollama import Ollama
from pathlib import Path
import qdrant_client
from llama_index.core import VectorStoreIndex, ServiceContext, download_loader, StorageContext,  Settings
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import json

### Get Data

In [4]:

def get_docs_from_json(file):
    with open(file,mode='rt') as f:
        JIRA_dict = json.load(f)
    
    documents = [
        Document(
            text = obj["FA_Summary"],
            metadata = {
                "idFail_Analysis": obj["idFail_Analysis"],
                "Jira_Number": obj["Jira_Number"],
                "Failure_Category": obj["Failure_Category"],
                "Failure_Subcategory": obj["Failure_Subcategory"],
                "Corrective_Action": obj["Corrective_Action"],
                "Customer": obj["Customer"],
                "Product_Line": obj["Product_Line"],
                "Product_Type_or_Project": obj["Product_Type_or_Project"],
            }
        )
        for obj in JIRA_dict
    ]

    return documents

json_docs = get_docs_from_json(file="CFA_failure_json.json")
json_docs

[Document(id_='66fd9a2e-0206-4cf0-8194-5198a846128b', embedding=None, metadata={'idFail_Analysis': 2, 'Jira_Number': 'RMAEXT-2013', 'Failure_Category': 'Controller', 'Failure_Subcategory': 'Transient Bit Flip', 'Corrective_Action': 'A new firmware to address this issue is now available.', 'Customer': 'LGE', 'Product_Line': 'iNAND', 'Product_Type_or_Project': 'Whale'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='GAT part read from PFMU address 0x34405A that expected LDT. but the PFMU indicated to MST. That means it  reused. So further analysis is not able to do. However VC integrity test shows invalid value as 0xFFFFFFDF. That could be occurred due to bit flip.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='fb1bacb8-4cd4-4b01-9ee7-020b4ded5a57', embedding=None, metadata={'idFail_Analysis': 3, 'Jira_Number'

In [14]:
len(json_docs)

12024

In [4]:
# Use this if you want to directly convert the json to Doucment without including
# the metadata
'''
JSON_reader = download_loader("JSONReader")
loader = JSON_reader()

#the load_data takes .json file and returns a list of documents
json_docs = loader.load_data(Path('CFA_sample_json.json'))
'''

  JSON_reader = download_loader("JSONReader")


### Client Connection

In [15]:
#once client is created (if created, there is a folder attached), no need to run it again and again
client = qdrant_client.QdrantClient(path="./qdrant_data_sample2")

RuntimeError: Storage folder ./qdrant_data_sample2 is already accessed by another instance of Qdrant client. If you require concurrent access, use Qdrant server instead.

In [24]:
#create a vector store
vector_store = QdrantVectorStore(
    client=client, 
    collection_name="CFA_sample",
    
)
# Now create a storage_conext to store the vector store locally in the qdrant_data2 folder
storage_context = StorageContext.from_defaults(vector_store=vector_store)

storage_context.persist(persist_dir="./persists_storage_context2")

### Mentioning LLM and EMBEDDING MODELS

In [25]:
llm = Ollama(model='mistral:latest')
ollama_embedding = HuggingFaceEmbedding(
    model_name="Alibaba-NLP/gte-large-en-v1.5",trust_remote_code=True
)

#OllamaEmbedding("mistral:latest") #OllamaEmbedding("nomic-embed-text:latest")

### Ingestion Pipeline

In [33]:
pipeline = IngestionPipeline(
        transformations=[
            SentenceSplitter(chunk_size=768, chunk_overlap=50),
            ollama_embedding,
        ],
        vector_store=vector_store
    )

In [34]:
pipeline.run(documents=json_docs, show_progress=True)

Parsing nodes: 100%|██████████| 12024/12024 [00:02<00:00, 4228.47it/s]
Generating embeddings: 100%|██████████| 12067/12067 [01:36<00:00, 125.07it/s]


ValueError: could not broadcast input array from shape (1024,) into shape (768,)

In [88]:
#now add what llm you want to use + the embedding model you want to use in the ingestion pipeline

Settings.llm = llm
# OllamaEMBEDDING or HuggingFaceEmbedding
Settings.embed_model = ollama_embedding

In [89]:
index = VectorStoreIndex.from_vector_store(vector_store,storage_context=storage_context, show_progress=True)
#VectorStoreIndex.from_documents(json_docs, storage_context=storage_context,show_progress=True)
query_engine = index.as_query_engine(llm,
                                     similarity_top_k=10,show_progress=True)

Parsing nodes: 100%|██████████| 1/1 [00:03<00:00,  3.85s/it]
Generating embeddings: 100%|██████████| 2048/2048 [1:27:18<00:00,  2.56s/it]
WARNING:llama_index.vector_stores.qdrant.base:Collection CFA_sample already exists, skipping collection creation.
Generating embeddings: 100%|██████████| 1427/1427 [1:01:34<00:00,  2.59s/it]


In [90]:
query_input = "What is the main cause of Transient Bit Flip. Can you also show which issues numbers faced this issues and their corrective answers"

res = query_engine.query(QueryBundle(
    query_input,
    embedding = Settings.embed_model.get_query_embedding(query_input)
))

print(res)

 The main cause of a Transient Bit Flip issue in the provided context is not explicitly stated for each case. However, it can be inferred that the Transient Bit Flip occurs in the controller SRAM or memory, leading to corruption in firmware structures or other issues like massive errors found on active blocks.

For the first issue (idFail_Analysis: 7066 and Jira_Number: RMAFA-6502), faced by Flex-In, the corrective action provided is a new firmware that addresses this issue to further reduce the DPPM level.

For the second issue (idFail_Analysis: 3360 and Jira_Number: RMAFA-4719), faced by Lenovo, the corrective action provided is also a new firmware to address the Transient Bit Flip issue. It is mentioned that previous cases RMAEXT-2064, RMAEXT-1372 reveal that it's GAT bit flip issue.

While the exact root cause of the Transient Bit Flip isn't explicitly stated in the context information, it can be inferred that factors like controller SRAM instability, unstable power supply, or fabr

In [45]:
'''
#user_input = input("Type something (or 'quit' to exit): ")
while(True):
    user_input = input("Type something (or 'quit' to exit): ")
    if user_input.lower() == 'quit':
        break

    res = query_engine.query(QueryBundle(
    query_input,
    embedding = Settings.embed_model.get_query_embedding(query_input)))
    print(res)
'''

 The given data does not provide specific details about the cause of Transient Bit Flip in the provided context. However, it's important to note that such events can be caused by factors like cosmic radiation, power supply instability, or high operating temperatures.

Regarding the failure issues and their corrective actions:
- `idFail_Analysis` 5059 (related to "No Trouble Found") had its proposed corrective action as setting the block size at least 0.15mm, controlling the switch force between 1.2~4.9N, and providing a dummy to all lid production vendors for switch force buy-off at their site.
- `idFail_Analysis` 15152 (related to "Application" and "Host Access Pattern") had its corrective action not explicitly mentioned in the data provided. However, it was suggested that retesting the drive in a different system could help determine if the same error message is observed. Additionally, a code review was underway for potential fixes related to this issue.
 The main cause of Transient 

from IPython.display import Markdown, display

display(Markdown(f"<b>{response}</b>"))