# AI Demo - Set Up Instructions
##### Run the first two commands in Terminal, Bash or Command Prompt

## Virtual Environment Setup (Optional)

`python -m venv openai-env`

## Activation

- Windows: `openai-env\Scripts\activate`
- MacOS or Unix: `source openai-env/bin/activate`

If done correctly, you terminal should see `(openai-env)` in your terminal and you can select it as a Kernel moving forward.

## API Key Setup

Follow the steps in the Quickstart Guide to set up your API key.

You can also do the following 

+ Navigate to your project folder `cd ~/Desktop/source/my-project`

+ Create an .env file `touch .env`

+ 



## Documentation

- OpenAI Documentation: https://platform.openai.com/docs/introduction
- OpenAI API Reference: https://platform.openai.com/docs/api-reference

## Libraries: 
- ```pip install --upgrade openai```
- ```pip install pyreadstat```
- ```pip install pandas```
- ```pip install urllib3```

In [1]:
import pandas as pd 
import pyreadstat
import urllib.request

# Correct raw URL from GitHub
url = "https://github.com/zachhollow/GSS-2022-ChatGPT-Demo/raw/bfdacca0d979b272356c3b31638c219f808e8ea6/ReplicationData/GSS2018.sav"
local_file = "GSS2018.sav"

# Download the file locally
urllib.request.urlretrieve(url, local_file)

# Use pyreadstat to read the SPSS file
df, meta = pyreadstat.read_sav(local_file)

# Display the first few rows of the dataframe
print("DataFrame imported successfully.\n\nHere is a preview:\n")
print(df.head())

# Preview total number of variables 
count = len(meta.column_names)
print(f"\nNumber of variables: {count}")

DataFrame imported successfully.

Here is a preview:

   ABANY  ABDEFECT  ABFELEGL  ABHELP1  ABHELP2  ABHELP3  ABHELP4  ABHLTH  \
0    2.0       1.0       NaN      1.0      1.0      1.0      1.0     1.0   
1    1.0       1.0       3.0      2.0      2.0      2.0      2.0     1.0   
2    NaN       NaN       NaN      1.0      2.0      1.0      1.0     NaN   
3    NaN       NaN       1.0      1.0      1.0      1.0      1.0     NaN   
4    2.0       1.0       NaN      2.0      2.0      2.0      1.0     1.0   

   ABINSPAY  ABMEDGOV1  ...  XMARSEX  XMARSEX1  XMOVIE  XNORCSIZ    YEAR  \
0       1.0        2.0  ...      1.0       1.0     NaN       6.0  2018.0   
1       2.0        NaN  ...      1.0       NaN     2.0       6.0  2018.0   
2       2.0        1.0  ...      NaN       1.0     2.0       6.0  2018.0   
3       1.0        NaN  ...      NaN       NaN     2.0       6.0  2018.0   
4       2.0        NaN  ...      1.0       NaN     2.0       6.0  2018.0   

   YEARSJOB  YEARSUSA  YEARVAL  

In [2]:
%%capture captured
#Capture all column names and labels saved as meta
print("The full list of column names and labels:\n")
for i in range(len(meta.column_names)):
    var_name = meta.column_names[i]
    var_label = meta.column_labels[i]
    print(f"{var_name}: {var_label}")
    
# Uncomment the line below to print each variable name and label
# print(captured)

In [3]:
# Convert our SPSS file to a CSV as it's more compatible with Code Interpreter.
csv_file_path = "GSS2018.csv"
df.to_csv(csv_file_path, index=False)
print(f"File has been converted and saved to {csv_file_path}")

File has been converted and saved to GSS2018.csv


In [None]:
# TO-DO: Effectively need to add CRUD
client = OpenAI()

file = client.files.create(
                file=open(f"{csv_file_path}.csv", "rb"),
                purpose='assistants',
            )

In [None]:
file = client.files.retrieve("file-x6YV6lMHUF44eC03mKiVVR56")

assistant = client.beta.assistants.create(
    instructions="You are a data analyst. Summarize data and provide data visualizations.",
    name="Data Analyst",
    tools=[{"type": "code_interpreter"}, {"type": "file_search"}], 
    model="gpt-4-turbo",\
    tool_resources={
    "code_interpreter": {
      "file_ids": [file.id]
    }
  }
)

thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content=input("\nYou: ") 
)

# First, we create a EventHandler class to define
# how we want to handle the events in the response stream.
 
class EventHandler(AssistantEventHandler):    
  @override
  def on_text_created(self, text) -> None:
    print(f"\nAssistant > ", end="", flush=True)
      
  @override
  def on_text_delta(self, delta, snapshot):
    print(delta.value, end="", flush=True)
      
  def on_tool_call_created(self, tool_call):
    print(f"\nAssistant > {tool_call.type}\n", flush=True)
  
  def on_tool_call_delta(self, delta, snapshot):
    if delta.type == 'code_interpreter':
      if delta.code_interpreter.input:
        print(delta.code_interpreter.input, end="", flush=True)
      if delta.code_interpreter.outputs:
        print(f"\n\nOutput >", flush=True)
        for output in delta.code_interpreter.outputs:
          if output.type == "logs":
            print(f"\n{output.logs}", flush=True)
 
# Then, we use the `stream` SDK helper 
# with the `EventHandler` class to create the Run 
# and stream the response.
 
with client.beta.threads.runs.stream(
  thread_id=thread.id,
  assistant_id=assistant.id,
  instructions="Please address the user as Zach. The user has a background in data analytics and machine learning using Python.",
  event_handler=EventHandler(),
) as stream:
  stream.until_done()


image_data = client.files.content("file-x6YV6lMHUF44eC03mKiVVR56")
image_data_bytes = image_data.read()

with open("./my-image.png", "wb") as file:
    file.write(image_data_bytes)