In [7]:
from typing import List
import openai
import pinecone 
import os
from pinecone import Pinecone
from dotenv import load_dotenv
from pinecone import ServerlessSpec
# Load environment variables
load_dotenv()

# OpenAI and Pinecone setup
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

# configure client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Set your API keys
openai.api_key = OPENAI_API_KEY
openai.base_url=OPENAI_BASE_URL

cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'


spec = ServerlessSpec(cloud=cloud, region=region)
# we create a new index

error_index = "error-summary-recommendations-small"
if error_index not in pc.list_indexes().names():
    pc.create_index(
        "error-summary-recommendations-small",
        dimension=1536,  # dimensionality of text-embedding-ada-002
        metric='dotproduct',
        spec=spec
    )




In [8]:
# Create or connect to an index

index = pc.Index(error_index)

In [15]:
import csv
import json

from uuid import uuid4

def get_embedding(text: str) -> List[float]:
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding


def process_csv(input_file, output_file):
    with open(input_file, 'r', newline='', encoding='utf-8') as csvfile, \
         open(output_file, 'w', encoding='utf-8') as txtfile:
        csv_reader = csv.DictReader(csvfile)
        for row in csv_reader:
            error = row['Error'].strip()
            recommendation = row['Recommendation'].strip()
            
            # Write to text file
            txtfile.write(f"Error: {error}\n")
            txtfile.write(f"Recommendation: {recommendation}\n\n")
            
            # Prepare data for Pinecone indexing
            index_data = {
                "text": f"Error: {error}\nRecommendation: {recommendation}",
                "metadata": {
                    "logic_app_name": row['Logic App Name'],
                    "s_no": row['S. No']
                }
            }
            metadata = {
                "logic_app_name": row['Logic App Name'],
                "s_no": row['S. No'],
                "text": f"Error: {error}\nRecommendation: {recommendation}",
            }
            
            # Assume 'index_data' is the JSON object created by the previous script
            vector = get_embedding(index_data['text'])
            # Create a single tuple for upsert
            id = str(uuid4())
            upsert_tuple = (id, vector,metadata )
            
            # Upsert the single item
            index.upsert(vectors=[upsert_tuple])
            # Print the JSON object that can be used for Pinecone indexing
            print(json.dumps(index_data))


# Usage

input_csv = 'Error Summary.csv'
output_txt = 'output_errors_recommendations.txt'
process_csv(input_csv, output_txt)

{"text": "Error: InvalidTemplate. Unable to process template language expressions in action 'Create_OilLog_record' inputs at line '0' and column '0': 'The template language expression 'int(xpath(xml(body('Get_blob_content')),'//*[local-name()=\"CMDDATA\"]/*[local-name()=\"PRODNO\"]/text()')[0])' cannot be evaluated because array index '0' cannot be selected from empty array. Please see https://aka.ms/logicexpressions for usage details.'.\nRecommendation: This error can be ignored because it occurs when the specified XPath query does not find any matching elements in the XML content.", "metadata": {"logic_app_name": "AGRO365-OilBusiness-ODATASendConnector-Prod", "s_no": "1"}}
{"text": "Error: ActionResponseTimedOut. The execution of template action 'Response_to_Adyen' is failed: the client application timed out waiting for a response from service. This means that workflow took longer to respond than the alloted timeout value. The connection maintained between the client application and 

In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
import os

from dotenv import load_dotenv
from pinecone import ServerlessSpec
# Load environment variables
load_dotenv()

# OpenAI and Pinecone setup
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
model_name = 'text-embedding-ada-002'

embed = OpenAIEmbeddings(
    model=model_name,
    openai_api_key=OPENAI_API_KEY,
    openai_api_base=OPENAI_BASE_URL
)

  from tqdm.autonotebook import tqdm
  warn_deprecated(


In [16]:
from langchain.vectorstores import Pinecone

errorlog_vectorstore = Pinecone(
    index, embed.embed_query, "text"
)
query='message": "BadGateway"'

results = errorlog_vectorstore.similarity_search(query, k=3)



In [17]:
results

[Document(page_content='Error: "message": "BadGateway",\nRecommendation: This error requires resubmission because a "Bad Gateway" (502) error occurs when the server acting as a gateway or proxy receives an invalid response from the upstream server.', metadata={'logic_app_name': 'AGRO365-NewBatch-ReceiveConnectorSync-Prod', 's_no': '5'}),
 Document(page_content='Error: "message": "Rate limit is exceeded. Try again in 8 seconds. Please see https://docs.microsoft.com/azure/logic-apps/handle-throttling-problems-429-errors for more details."\nRecommendation: Resubmit. Rate limit is exceeded', metadata={'logic_app_name': 'CircleK-Invoice-CheckReference-Prod', 's_no': '10'}),
 Document(page_content="Error: ActionResponseTimedOut. The execution of template action 'Response_to_Adyen' is failed: the client application timed out waiting for a response from service. This means that workflow took longer to respond than the alloted timeout value. The connection maintained between the client applicat