In [None]:
%pip install openai
%pip install requests # To get error text from http server
import openai
import requests
openai.api_key="sk-***"  # Replace this with your API key

### Define an API to retrieve log details from external site. 
#### Using a static file stored in git in this case. 

In [11]:
# Definition of the function to get an error text
def get_error_message(msg_id="123456"):

    # This is a sample error message static text. This needs to be replaced by a db call or log file read
    url = "https://raw.githubusercontent.com/sakamakik-outlook/openai-hackday/main/data/errorlog.txt"

    return requests.get(url).text


# Function to be passed to OpenAI API
functions = [
    {
        "name": "get_error_message",
        "description": "Get the error message for a given message id",
        "parameters": {
            "type": "object",
            "properties": {
                "msg_id": {
                    "type": "string"
                }
            }
        },
    }
]

### Standard OpenAI API call without any log info. 
#### GPT cannot answer the question, saying "I don't have access to personal information or specific messages."

In [23]:
messages = [{
    "role": "user", 
    "content": """
        What happened to my message 123456?
    """}]

response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo-0613",
    messages=messages
    )

print(response["choices"][0]["message"]["content"])

I'm sorry, as an AI language model, I don't have access to personal information or specific messages. Therefore, I cannot tell you what happened to a specific message like 123456.


### Adding a function to the parameters. 
#### 1. GPT will check if the function call is required. If yes, it returns "function_call" in the response
#### 2. If "function_call" is returned, we execute the function and get the result
#### 3. Run a new API call with the queston + result of function call (Error log in this case)


In [24]:
import json

messages = [{
    "role": "user", 
    "content": """
        What happened to my message 123456?
    """}]

# Step 1: call the API
response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        messages=messages, 
        functions=functions,   # <---- this is the new part. Pass the functions to the API.
        function_call="auto",
        )

response_message = response["choices"][0]["message"]

# Step 2: check if GPT wanted to call a function
if response_message.get("function_call"):
        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        available_functions = {
                "get_error_message": get_error_message,
        }  # only one function in this example, but you can have multiple
        function_name = response_message["function_call"]["name"]
        fuction_to_call = available_functions[function_name]
        function_args = json.loads(response_message["function_call"]["arguments"])
        function_response = fuction_to_call(
                msg_id=function_args.get("msg_id")
        )

        # Step 4: send the info on the function call and function response to GPT
        messages.append(response_message)  # extend conversation with assistant's reply
        messages.append(
                {
                "role": "function",
                "name": function_name,
                "content": function_response,
                }
        )  # extend conversation with function response
        second_response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo-0613",
                messages=messages,
        )  # get a new response from GPT where it can see the function response
        print(second_response)

{
  "id": "chatcmpl-7dtjoYElkzLJkj07iPiRmgR9mtUSG",
  "object": "chat.completion",
  "created": 1689743404,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Based on the error message, it appears that there was a connection failure while processing your message with ID 123456. The error indicates that there was a problem with the database connection, specifically with the host name or IP address \"10.1.1.1\" and the service name or port number 446. The error code is -4498 and the SQL state is 08506. \n\nIt seems that the connection was re-established, but there may have been some impact on the processing of your message. You may need to review the logs or contact the relevant support team to investigate further and determine the status of your message."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 1509,
    "completion_tokens": 128,
    "total_tokens": 1637


In [1]:
import re
from collections import defaultdict

def parse_error_log(file_path):
    # Define regular expressions for the patterns we want to match
    regex_message = re.compile(r"^Info: processing message (\d+)")
    regex_exception = re.compile(r"^\w+\.\w+\.\w+\.\w+\.\w+\: ")
    regex_sql = re.compile(r"SQL \[(.*?)\]")
    regex_errorcode = re.compile(r"ERRORCODE=(.*?),")
    regex_sqlstate = re.compile(r"SQLSTATE=(.*?);")

    # Dictionaries to store the counts of each item
    message_counts = defaultdict(int)
    exception_counts = defaultdict(int)
    sql_counts = defaultdict(int)
    errorcode_counts = defaultdict(int)
    sqlstate_counts = defaultdict(int)

    with open(file_path, "r") as f:
        for line in f:
            # Try to match each pattern and increment the count if a match is found
            message_match = regex_message.search(line)
            if message_match:
                message_counts[message_match.group(1)] += 1

            exception_match = regex_exception.match(line)
            if exception_match:
                exception = exception_match.group(0).strip(": ")
                exception_counts[exception] += 1

            sql_match = regex_sql.search(line)
            if sql_match:
                sql_counts[sql_match.group(1)] += 1

            errorcode_match = regex_errorcode.search(line)
            if errorcode_match:
                errorcode_counts[errorcode_match.group(1)] += 1

            sqlstate_match = regex_sqlstate.search(line)
            if sqlstate_match:
                sqlstate_counts[sqlstate_match.group(1)] += 1

    return {
        "message_counts": dict(message_counts),
        "exception_counts": dict(exception_counts),
        "sql_counts": dict(sql_counts),
        "errorcode_counts": dict(errorcode_counts),
        "sqlstate_counts": dict(sqlstate_counts),
    }

# Parse the error log file and get the results
results = parse_error_log("data/errorlog.txt")

print(results)


{'message_counts': {'1234567': 1}, 'exception_counts': {}, 'sql_counts': {'UPDATE DBO.STG_BILLPAY_INV_BT SET INV_SENT_FLG = ?, MDY_TS = ?, MDY_USR = ? WHERE INV_PDF_NM = ?': 1}, 'errorcode_counts': {'-4498': 3}, 'sqlstate_counts': {'08506': 1}}


In [4]:
import re

input_file_path = "data/errorlog.txt"
# Define regular expressions for the patterns we want to match
regex_message = re.compile(r"^Info: processing message (\d+)")
regex_exception = re.compile(r"^\w+\.\w+\.\w+\.\w+\.\w+\: ")
regex_sql = re.compile(r"SQL \[(.*?)\]")
regex_errorcode = re.compile(r"ERRORCODE=(.*?),")
regex_sqlstate = re.compile(r"SQLSTATE=(.*?);")

output= ""
with open(input_file_path, "r") as input_file:
    for line in input_file:
        # Check if the line matches any of the patterns
        if (regex_message.search(line) or regex_exception.match(line) or
            regex_sql.search(line) or regex_errorcode.search(line) or
            regex_sqlstate.search(line)):
            output+=line
            # If it does, write the line to the output file
                



In [5]:
output

'Info: processing message 1234567\norg.springframework.dao.DataAccessResourceFailureException: PreparedStatementCallback; SQL [UPDATE DBO.STG_BILLPAY_INV_BT SET INV_SENT_FLG = ?, MDY_TS = ?, MDY_USR = ? WHERE INV_PDF_NM = ?]; [jcc][t4][2027][11212][3.53.70] A connection failed but has been re-established. The host name or IP address is "10.1.1.1" and the service name or port number is 446.\nSpecial registers may or may not be re-attempted (Reason code = 2). ERRORCODE=-4498, SQLSTATE=08506; nested exception is com.ibm.db2.jcc.b.ClientRerouteException: [jcc][t4][2027][11212][3.53.70] A connection failed but has been re-established. The host name or IP address is "10.1.1.1" and the service name or port number is 446.\nSpecial registers may or may not be re-attempted (Reason code = 2). ERRORCODE=-4498, SQLSTATE=08506\nSpecial registers may or may not be re-attempted (Reason code = 2). ERRORCODE=-4498, SQLSTATE=08506\n'

In [6]:
!pip install openai

Collecting openai
  Downloading openai-0.27.8-py3-none-any.whl (73 kB)
     ---------------------------------------- 73.6/73.6 KB 1.3 MB/s eta 0:00:00
Installing collected packages: openai
Successfully installed openai-0.27.8


You should consider upgrading via the 'c:\users\ravi\appdata\local\programs\python\python37\python.exe -m pip install --upgrade pip' command.


In [18]:
import openai
import os
from collections import defaultdict
import re

openai.api_key = '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'  # replace with your actual OpenAI API key

def parse_log_file(file_path):
    # Define regular expressions for the patterns we want to match
    regex_message = re.compile(r"^Info: processing message (\d+)")
    regex_exception = re.compile(r"^\w+\.\w+\.\w+\.\w+\.\w+\: ")
    regex_sql = re.compile(r"SQL \[(.*?)\]")
    regex_errorcode = re.compile(r"ERRORCODE=(.*?),")
    regex_sqlstate = re.compile(r"SQLSTATE=(.*?);")

    # Dictionaries to store the counts of each item
    message_counts = defaultdict(int)
    exception_counts = defaultdict(int)
    sql_counts = defaultdict(int)
    errorcode_counts = defaultdict(int)
    sqlstate_counts = defaultdict(int)

    with open(file_path, "r") as f:
        for line in f:
            # Try to match each pattern and increment the count if a match is found
            message_match = regex_message.search(line)
            if message_match:
                message_counts[message_match.group(1)] += 1

            exception_match = regex_exception.match(line)
            if exception_match:
                exception = exception_match.group(0).strip(": ")
                exception_counts[exception] += 1

            sql_match = regex_sql.search(line)
            if sql_match:
                sql_counts[sql_match.group(1)] += 1

            errorcode_match = regex_errorcode.search(line)
            if errorcode_match:
                errorcode_counts[errorcode_match.group(1)] += 1

            sqlstate_match = regex_sqlstate.search(line)
            if sqlstate_match:
                sqlstate_counts[sqlstate_match.group(1)] += 1

    return {
        "message_counts": dict(message_counts),
        "exception_counts": dict(exception_counts),
        "sql_counts": dict(sql_counts),
        "errorcode_counts": dict(errorcode_counts),
        "sqlstate_counts": dict(sqlstate_counts),
    }



# Parse all error log files in the directory and get the results
all_results = defaultdict(lambda: defaultdict(int))
for filename in os.listdir(r"C:\Users\ravi\Downloads\openai-hackday-main\openai-hackday-main\data"):
    print(filename)
    if filename.endswith(".log"):
        file_results = parse_log_file(os.path.join("data", filename))
        print(file_results)
        for key in file_results:
            for item, count in file_results[key].items():
                all_results[key][item] += count

# Now the all_results dictionary contains the combined counts for all files

# Now, let's say a user enters a query
user_query = "What is the most common exception?"

# We can use the GPT-3 API to answer this question based on the data in all_results
response = openai.Completion.create(
  engine="text-davinci-002",
  prompt=f"{user_query}\n\n{all_results}",
  temperature=0.5,
  max_tokens=100
)

print(response.choices[0].text.strip())

Android_2k.log
{'message_counts': {}, 'exception_counts': {}, 'sql_counts': {}, 'errorcode_counts': {}, 'sqlstate_counts': {}}
Apache.log
{'message_counts': {}, 'exception_counts': {}, 'sql_counts': {}, 'errorcode_counts': {}, 'sqlstate_counts': {}}
Apache.log.csv
Apache_2k.log
{'message_counts': {}, 'exception_counts': {}, 'sql_counts': {}, 'errorcode_counts': {}, 'sqlstate_counts': {}}
BGL_2k.log
{'message_counts': {}, 'exception_counts': {}, 'sql_counts': {}, 'errorcode_counts': {}, 'sqlstate_counts': {}}
Certificate Error 7e0e33b5f17845de89cc12e12a2f0bf6.log
{'message_counts': {}, 'exception_counts': {}, 'sql_counts': {}, 'errorcode_counts': {}, 'sqlstate_counts': {}}
DB errors 5c3c2269fc0145f0a8aabf5aa33e2650.log
{'message_counts': {}, 'exception_counts': {}, 'sql_counts': {}, 'errorcode_counts': {}, 'sqlstate_counts': {}}
errorlog.log
{'message_counts': {'1234567': 1}, 'exception_counts': {}, 'sql_counts': {'UPDATE DBO.STG_BILLPAY_INV_BT SET INV_SENT_FLG = ?, MDY_TS = ?, MDY_USR 

In [19]:
import re
from collections import defaultdict

def parse_log_file(file_path):
    # Define regular expressions for the patterns we want to match
    regex_timestamp = re.compile(r'\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\b')
    regex_loglevel = re.compile(r'\b(INFO|WARN|ERROR|FATAL)\b')
    regex_errorcode = re.compile(r'\bERRORCODE=(\d+)\b')

    # Dictionaries to store the counts of each item
    timestamp_counts = defaultdict(int)
    loglevel_counts = defaultdict(int)
    errorcode_counts = defaultdict(int)

    with open(file_path, "r") as f:
        for line in f:
            # Try to match each pattern and increment the count if a match is found
            timestamp_match = regex_timestamp.search(line)
            if timestamp_match:
                timestamp_counts[timestamp_match.group(0)] += 1

            loglevel_match = regex_loglevel.search(line)
            if loglevel_match:
                loglevel_counts[loglevel_match.group(0)] += 1

            errorcode_match = regex_errorcode.search(line)
            if errorcode_match:
                errorcode_counts[errorcode_match.group(1)] += 1

    return {
        "timestamp_counts": dict(timestamp_counts),
        "loglevel_counts": dict(loglevel_counts),
        "errorcode_counts": dict(errorcode_counts),
    }

# Parse the log file and get the results
all_results = defaultdict(lambda: defaultdict(int))
for filename in os.listdir(r"C:\Users\ravi\Downloads\openai-hackday-main\openai-hackday-main\data"):
    print(filename)
    if filename.endswith(".log"):
        file_results = parse_log_file(os.path.join("data", filename))
        print(file_results)
#results = parse_log_file("/path/to/your/log/file.txt")

#print(results)


Android_2k.log
{'timestamp_counts': {}, 'loglevel_counts': {}, 'errorcode_counts': {}}
Apache.log
{'timestamp_counts': {}, 'loglevel_counts': {}, 'errorcode_counts': {}}
Apache.log.csv
Apache_2k.log
{'timestamp_counts': {}, 'loglevel_counts': {}, 'errorcode_counts': {}}
BGL_2k.log
{'timestamp_counts': {}, 'loglevel_counts': {'INFO': 1597, 'FATAL': 347, 'ERROR': 41}, 'errorcode_counts': {}}
Certificate Error 7e0e33b5f17845de89cc12e12a2f0bf6.log
{'timestamp_counts': {'2022-05-12 14:30:00': 1, '2022-05-12 14:32:15': 1}, 'loglevel_counts': {}, 'errorcode_counts': {}}
DB errors 5c3c2269fc0145f0a8aabf5aa33e2650.log
{'timestamp_counts': {}, 'loglevel_counts': {'ERROR': 11}, 'errorcode_counts': {}}
errorlog.log
{'timestamp_counts': {}, 'loglevel_counts': {}, 'errorcode_counts': {}}
Hadoop_2k.log
{'timestamp_counts': {'2015-10-18 18:01:47': 1, '2015-10-18 18:01:48': 2, '2015-10-18 18:01:49': 1, '2015-10-18 18:01:50': 15, '2015-10-18 18:01:51': 24, '2015-10-18 18:01:52': 3, '2015-10-18 18:01:53'

{'timestamp_counts': {}, 'loglevel_counts': {}, 'errorcode_counts': {}}
Server error 9f9bf05e426f401f862db0cd057e3947.log
{'timestamp_counts': {'2022-08-30 14:30:42': 1, '2022-08-30 14:32:15': 1, '2022-08-30 14:35:10': 1, '2022-08-30 14:40:15': 1, '2022-08-30 14:50:10': 1, '2022-08-30 15:05:30': 1, '2022-08-30 15:15:20': 1, '2022-08-31 09:20:12': 1, '2022-08-31 10:30:30': 1, '2022-08-31 12:45:10': 1, '2022-08-31 13:20:20': 1, '2022-08-31 14:50:10': 1, '2022-08-31 16:30:15': 1}, 'loglevel_counts': {'ERROR': 13}, 'errorcode_counts': {}}
Spark_2k.log
{'timestamp_counts': {}, 'loglevel_counts': {'INFO': 2000}, 'errorcode_counts': {}}
SSH_2k.log
{'timestamp_counts': {}, 'loglevel_counts': {}, 'errorcode_counts': {}}
Thunderbird_2k.log
{'timestamp_counts': {}, 'loglevel_counts': {}, 'errorcode_counts': {}}
Windows_2k.log
{'timestamp_counts': {'2016-09-28 04:30:30': 1, '2016-09-28 04:30:31': 128, '2016-09-28 04:30:32': 109, '2016-09-28 04:30:33': 127, '2016-09-28 04:30:34': 17, '2016-09-28 04

In [10]:
%pwd

'C:\\Users\\ravi\\Downloads\\openai-hackday-main\\openai-hackday-main'

In [None]:
os.listdir