In [None]:
'''
This is a simple proof of concept of using GPT4 to generate a list of ideas for valuable business software applications for OpenAI's GPT4 API,
then loop through each of the ideas and prompt GPT4 to write the python code to build the software for the idea along with corresponding unit tests.
Next, it will recursively run the code, capture errors, append them to the prompt's message list, and call GPT4 again to fix the errors and rewrite the code
until the code runs properly and passes all unit tests.

The RecursiveGPT() object takes four parameters, "max_retry_attempts_per_idea", "output_code_save_directory", "max_response_tokens", and "model". It will continue
to recursively provide the console error output and have GPT4 address the error and rewrite its code "max_retry_attempts_per_idea" per software, and if a software
succeeds, it will save the code to a local python file with a filename corresponding to the idea name in the "output_code_save_directory" with the description
of the idea at the top of the file as a comment, and then move on to the next one. The "max_response_tokens" determines the maximum response length of your
OpenAI API calls to GPT. Additionally "OPENAI_API_KEY" must be provided as an environmental variable (this is saved to the environmental variable automatically
in the "Run Recursive GPT" cell at the bottom of the page for convenience when using this notebook in Google Colab). This code uses the gpt-4 model by default,
so you must have access to the gpt-4 API in order to run this code. If you don't have access, change the "model" attribute of the RecursiveGPT() class to
"gpt-3.5-turbo" instead of "gpt-4", but do be aware that this will likely cause a significant reduction in performance and may cause errors with context length.
To start the recursive development process, just initialize an instance of the RecursiveGPT() class with those four parameters, and then call it's 
"run()" class function. For example:

gpt = RecursiveGPT(
            max_retry_attempts_per_idea = 5,
            output_code_save_directory = './',
            max_response_tokens = 2048
            )
gpt.run()

It's far from perfect, and I've been seeing somewhere around ~30-50% of the attempted ideas succeed. However, it's relatively fast and fully automated, so it's
extremely cool that you can just run it and walk away, then come back in a few minutes to a folder with several working python scripts for new GPT4 applications.

This was just a late night idea I had and wanted to try out, but GPT4 actually wrote most of the code for it and other than some basic debugging, I haven't
spent much time going through it yet, so it's definitely still a work in progress with lots of room for improvement. That being said, it's a really cool proof
of concept, so I'm pretty optimistic that with a little effort cleaning the code up and optimizing the prompts, it could probably become a pretty powerful tool.
I am particularly interested in experimenting with improving the prompt engineering for how the model is instructed to build unit tests, as well as updating the
idea generation prompting to start creating more advanced applications by attempting more complex ideas, then having more thorough unit tests to correspond with
the increased complexity. I'm guessing that it'd be capable of making some relatively advanced stuff with good enough prompting and higher max_retry_attempts_per_idea.
Happy coding, and let me know if you come up with any cool upgrades or improvements!
'''

In [2]:
#@title RecursiveGPT(max_retry_attempts_per_idea = 5, output_code_save_directory = "./", max_response_tokens = 1024, model='gpt-4')


!pip install OpenAI

from google.colab import output
output.clear()


import openai
import json
import sys
import os
from io import StringIO
import contextlib


class RecursiveGPT(object):
  def __init__(self, max_retry_attempts_per_idea = 5, output_code_save_directory = "./", max_response_tokens = 1024, model='gpt-4'):
    openai.api_key = os.environ["OPENAI_API_KEY"]
    self.max_retry_attempts_per_idea = max_retry_attempts_per_idea
    self.output_code_save_directory = output_code_save_directory
    # Make sure self.output_code_save_directory ends in a / because files are saved to self.output_code_save_directory + file_name
    if self.output_code_save_directory[-1] != "/":
      self.output_code_save_directory = self.output_code_save_directory + "/"
    # Make sure self.output_code_save_directory exists
    if not os.path.exists(self.output_code_save_directory):
      os.makedirs(self.output_code_save_directory)
    self.max_response_tokens = max_response_tokens
    self.model = model
    self.success_log = []
    self.failure_log = []
    self.messages = []

  def call_gpt_chat(self, messages):
      try:
        response = openai.ChatCompletion.create(
            model=self.model,
            messages=messages,
            max_tokens=self.max_response_tokens,
        )
        return response.choices[0].message['content']
      except Exception as e:
            print("Error calling OpenAI API: " + str(e))
            # if its a openai.error.RateLimitError, try gpt-3.5-turbo instead
            # Commenting out the "if isinstance(e, openai.error.InvalidRequestError):" because we'll let it retry regardless of the error type
            # and just make it only remove stuff from the "messages" list if it's long enough to already have revisions so if it failed for a
            # non-prompt-length reason, it'll get a second chance without removing anything
            #if isinstance(e, openai.error.InvalidRequestError):
            # If it's an InvalidRequestError, it is likely due to too long of a prompt, so we'll remove the first revision and try again
            # (This should remove roughly the same sequence length that we're trying to generate now, while keeping the most recent version
            # of the code within the context). First message is system prompt, second is user prompt to generate ideas, third is the list of ideas,
            # fourth is the user prompt to generate the original code, fifth is the original code, then we want to remove the next user error
            # message and the corresponding revision (the sixth and seventh messages within the "messages" list) and try again.
            try:
                if len(messages) >= 7:
                    messages.pop(5)
                    messages.pop(5)
                response = openai.ChatCompletion.create(
                    model=self.model,
                    messages=messages,
                    max_tokens=self.max_response_tokens,
                )
                return response.choices[0].message['content']
            except Exception as e:
                print("Error retrying OpenAI API call after removing first revision, skipping to next idea. Error Message: " + str(e))
                return None




          


  def extract_code(self, response_text):
      # Trying to skip extraction and leave parsing burden on GPT
      '''
      lines = response_text.strip().split('\n')
      code_lines = [line for line in lines if line.startswith("```python") or line.startswith("```") or line.startswith("    ") or line.startswith("  ")]

      if code_lines and code_lines[0].startswith("```python"):
          code_lines = code_lines[1:]
      
      if code_lines and code_lines[-1].startswith("```"):
          code_lines.pop()

      code = "\n".join(line.strip() for line in code_lines)
      '''
      code = response_text
      return code

  def generate_ideas(self):
      self.messages = [
          {"role": "system", "content": "You are a helpful AI assistant with super-human intelligence and programming capabilities. You are an expert at both identifying business needs, and writing state of the art code to solve them. You are often considered to be the world's most talented experts at developing high-value business software for highly profitable commercial use cases, at writing advanced software with extraordinary capabilities and functionality, and at cutting-edge prompt engineering for OpenAI's GPT models to ensure the most precise and robust results possible. This combination of skills makes you one of the most talented, capable, and helpful AI assistants in history."},
          {"role": "user", "content": "Generate a list of ideas for commercially valuable business software that could be made with GPT and can be used in Jupyter notebooks along with a detailed explanation on the same line explaining what the software will do, the specific use-cases it will be used for, and how it will help businesses. They each should be on new lines with no lines in between so that they may be turned into a list in python using your_response_string.split('\\n')."}
      ]
      response_text = self.call_gpt_chat(self.messages)
      # Shouldn't really have to worry about the response being None because it's only the first call to GPT, and if it fails here, there's nothing to remove
      # anyways, so we'll just let the program fail here if the OpenAI API call fails
      ideas = response_text.split('\n')
      return ideas

  def generate_code(self, idea):
      self.messages = [
          {"role": "system", "content": "You are a helpful AI assistant with super-human intelligence and programming capabilities. You are an expert at both identifying business needs, and writing state of the art code to solve them. You are often considered to be the world's most talented experts at developing high-value business software for highly profitable commercial use cases, at writing advanced software with extraordinary capabilities and functionality, and at cutting-edge prompt engineering for OpenAI's GPT models to ensure the most precise and robust results possible. This combination of skills makes you one of the most talented, capable, and helpful AI assistants in history."},
          {"role": "user", "content": f"Write the entire code for the software based on this idea: " + str(idea) + ". Your code **MUST** be specifically designed to for the specific idea's use case and not be a generic piece of software that could be used for any idea (such as a simple blanket OpenAI API call to directly provide the user query as a prompt to GPT, or any other simple/general software that does not provide value-add). For example, a customer support use case may choose to use langchain to search company documentation for context, a document-based use case may provide functionality for uploading/parsing of multiple file types, or any other features that you think will best increase the business value of your software to commercial users in the respective use case. Please include thorough comments in your code explaining the various features such as these that you build into your code which explain what the feature does and how it increases the overall business value of your software. The more unique, specialized, and elaborate that your software is for the respective use-case, the better your score will be. Your overall score for your code will be determined by a factor of your code's unique value-add on top of the baseline OpenAI API call to GPT4, the estimated business value which your software provides to commercial users, and how few errors your code will receive across various edge cases. A higher score reflects your being a more helpful, friendly, and well-aligned AI. The OpenAI API Key required to call the OpenAI API endpoint for GPT should be accessed through an environmental variable using 'os.environ['OPENAI_API_KEY']' and no variables or external files may be used unless initialized in your code. Your output should include all necessary whitespace and contain only python code so that the code which you return may be automatically run as is without any additional parsing or editing. Include extremely thorough unit tests for the software which test all of your software's desired functionality and ensure that the entire software operates as intended. Include comments thoroughly explaining each unit test that you write, what functionality it's testing, and why this is important to the overall business value provided by the software. If any unit test fails when the code is run, it should throw an exception so that the code does not complete successfully in order to identify the failure. Your code will be automatically run to test if it works, and the console log outputs will be provided back to you in order for you to correct any errors, so you are encouraged to include thorough logging throughout your code and unit tests to provide you with all of the information you need to fix your code in as few iterations as possible. Both the thoroughness of your unit tests and how few iterations of debugging in which you are able to correct all errors and successfully run the software passing all of the thorough and robust unit tests will both be factored into your final score. Make sure the response contains only code (your response string will be run directly using a 'exec(code, \{}, exec_locals))'  command. The OpenAI API Key required to call the OpenAI API endpoint for GPT should be accessed through an environmental variable using 'os.environ['OPENAI_API_KEY']' and no variables or external files may be used unless initialized in your code. Your output should include all necessary whitespace and contain only python code so that the code which you return may be automatically run as is without any additional parsing or editing. Proper indentation should be included using tabs written as a backslash followed by the letter 't'. All required imports must be included in your code, and you must write the entire code so that your response can be copy & pasted directly into the exec() command. Your response must include the full code, using empty responses, only whitespace, or commenting out the entire code to get it to compile is considered cheating. Do not write any other text before or after your code, such as talking or explaining your code, unless it is written as a python comment so that your full message may be passed directly to the exec() command without any additional reformatting."}
      ]
      response_text = self.call_gpt_chat(self.messages)
      # Shouldn't really have to worry about the response being None because it's only the 2nd call to GPT, and if it fails here, there's nothing to remove
      # anyways, so we'll just let the program fail here if the OpenAI API call fails
      ##### FIXME ##### Should probably find a better way to handle this in case the first generated idea is too long - maybe reduce the max_tokens or something
      self.messages.append({"role": "assistant", "content": response_text})
      code = self.extract_code(response_text)

      return code

  def run_code(self, code):
      exec_locals = {}
      with contextlib.redirect_stdout(StringIO()) as output:
          try:
              exec(code, {}, exec_locals)
              return True, output.getvalue()
          except Exception as e:
              return False, str(e)

  def correct_code(self, code, error):
      self.messages += [
          {"role": "user", "content": "Your code resulted in the following Error: " + str(error) + "\nPlease correct the code and make sure the response contains only code (your response string will be run directly using a 'exec(code, \{}, exec_locals))'  command. The OpenAI API Key required to call the OpenAI API endpoint for GPT should be accessed through an environmental variable using 'os.environ['OPENAI_API_KEY']' and no variables or external files may be used unless initialized in your code. Your output should include all necessary whitespace and contain only python code so that the code which you return may be automatically run as is without any additional parsing or editing. Proper indentation should be included using tabs written as a backslash followed by the letter 't'. All required imports must be included in your code, and you must write the entire code so that your response can be copy & pasted directly into the exec() command. Your response must include the full code, using empty responses, only whitespace, or commenting out the entire code to get it to compile is considered cheating. If the provided code is empty, you must generate new code from scratch to for the idea. Do not write any other text before or after your code, such as apologizing or explaining your code, unless it is written as a python comment so that your full message may be passed directly to the exec() command without any additional reformatting." }
      ]
      response_text = self.call_gpt_chat(self.messages)
      # if response_text is None, return None instead of corrected_code so we can have it trigger a continue in the idea loop
      if response_text is None:
          return None
      self.messages.append({"role": "assistant", "content": response_text})
      corrected_code = self.extract_code(response_text)
      return corrected_code

  def save_code_to_file(self, code, idea):
      filename = self.output_code_save_directory + str(idea[:50].replace(' ', '_').replace('/', '_').replace('.','_')) + "_code.py"
      with open(filename, 'w') as f:
          f.write(f"# {idea}\n\n")
          f.write(code)
      print(f"Saved code to {filename}")

  def run(self):
      ideas = self.generate_ideas()
      print("Generated ideas:")
      for i, idea in enumerate(ideas):
          print(f"{i+1}. {idea}")


      for idea in ideas:
          if len(idea) < 1:
            continue
          print(f"\nProcessing idea: {idea}")
          code = self.generate_code(idea)
          max_retry_attempts_per_idea = self.max_retry_attempts_per_idea
          for attempt in range(max_retry_attempts_per_idea):
              print("Attempting to run code: \n\n\n" + str(code) + "\n\n\n")
              success, result = self.run_code(code)
              if success:
                  print(f"Success on attempt {attempt + 1}: {result}")
                  self.success_log.append((idea, code))
                  self.save_code_to_file(code, idea)
                  break
              else:
                print(f"Error on attempt {attempt + 1}: {result}")
                code = self.correct_code(code, result)
                # If code is None, it means the GPT call failed, so we'll just skip this idea and move on to the next one
                if code is None:
                    break
                if attempt == 4:
                  print("Failed after 5 attempts.")
                  self.failure_log.append((idea, code))
      print("\nResults:")
      print("Successful software:")
      for i, (idea, code) in enumerate(self.success_log):
          print(f"{i+1}. {idea}")
      print("\nFailed software:")
      for i, (idea, code) in enumerate(self.failure_log):
          print(f"{i+1}. {idea}")



In [None]:
#@title Run Recursive GPT
OPENAI_API_KEY = "Enter Your OpenAI API Key Here" #@param {type:"string"}
max_retry_attempts_per_idea = 5 #@param {type:"integer"}
output_code_save_directory = "gpt4_python_scripts/"  #@param {type:"string"}
max_response_tokens = 1024 #@param {type:"integer"}
model = 'gpt-4' #@param {type:"string"}

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

gpt = RecursiveGPT(
            max_retry_attempts_per_idea = max_retry_attempts_per_idea,
            output_code_save_directory = output_code_save_directory,
            max_response_tokens = max_response_tokens,
            model = model
            )
gpt.run()
