Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Breaking down large files into smaller chunks based on context window size #4

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions gpt_migrate/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, sourcedir, targetdir, sourcelang, targetlang, sourceentry, so
self.targetport = targetport
self.guidelines = guidelines
self.ai = ai
self.context_window_size = 1000


@app.command()
Expand Down
42 changes: 21 additions & 21 deletions gpt_migrate/steps/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def debug_error(error_message,relevant_files,globals):

identify_action_template = prompt_constructor(HIERARCHY, GUIDELINES, IDENTIFY_ACTION)

prompt = identify_action_template.format(error_message=error_message[-min(MAX_ERROR_MESSAGE_CHARACTERS, len(error_message)):],
prompt = identify_action_template.format(error_message=error_message,
target_directory_structure=build_directory_structure(globals.targetdir))

actions = llm_run(prompt,
Expand Down Expand Up @@ -132,27 +132,27 @@ def debug_testfile(error_message,testfile,globals):

relevant_files = construct_relevant_files([("migration_source/"+testfile, source_file_content)])

file_name = f"gpt_migrate/{testfile}.tests.py"
try:
with open(os.path.join(globals.targetdir, file_name), 'r') as file:
old_file_content = file.read()
except:
print("File not found: "+file_name+". Please ensure the file exists and try again. You can resume the debugging process with the `--step test` flag.")
raise typer.Exit()
file_name = f"gpt_migrate/{testfile}.tests.py"
try:
with open(os.path.join(globals.targetdir, file_name), 'r') as file:
old_file_content = file.read()
except:
print("File not found: "+file_name+". Please ensure the file exists and try again. You can resume the debugging process with the `--step test` flag.")
raise typer.Exit()

debug_file_template = prompt_constructor(HIERARCHY, GUIDELINES, WRITE_CODE, DEBUG_TESTFILE, SINGLEFILE)

prompt = debug_file_template.format(error_message=error_message[-min(MAX_ERROR_MESSAGE_CHARACTERS, len(error_message)):],
file_name=file_name,
old_file_content=old_file_content,
relevant_files=relevant_files,
guidelines=globals.guidelines),

_, language, file_content = llm_write_file(prompt,
target_path=file_name,
waiting_message=f"Debugging {file_name}...",
success_message=f"Re-wrote {file_name} based on error message.",
globals=globals)
debug_file_template = prompt_constructor(HIERARCHY, GUIDELINES, WRITE_CODE, DEBUG_TESTFILE, SINGLEFILE)
prompt = debug_file_template.format(error_message=error_message[-min(MAX_ERROR_MESSAGE_CHARACTERS, len(error_message)):],
file_name=file_name,
old_file_content=old_file_content,
relevant_files=relevant_files,
guidelines=globals.guidelines),

_, language, file_content = llm_write_file(prompt,
target_path=file_name,
waiting_message=f"Debugging {file_name}...",
success_message=f"Re-wrote {file_name} based on error message.",
globals=globals)

with open(os.path.join(globals.targetdir, file_name), 'r') as file:
new_file_content = file.read()
Expand Down
32 changes: 18 additions & 14 deletions gpt_migrate/steps/migrate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from utils import split_file_into_chunks
from utils import prompt_constructor, llm_write_file, llm_run, build_directory_structure, copy_files, write_to_memory, read_from_memory
from config import HIERARCHY, GUIDELINES, WRITE_CODE, GET_EXTERNAL_DEPS, GET_INTERNAL_DEPS, ADD_DOCKER_REQUIREMENTS, REFINE_DOCKERFILE, WRITE_MIGRATION, SINGLEFILE, EXCLUDED_FILES
import os
Expand Down Expand Up @@ -57,20 +58,23 @@ def write_migration(sourcefile, external_deps_list, globals):
with open(os.path.join(globals.sourcedir, sourcefile), 'r') as file:
sourcefile_content = file.read()

prompt = write_migration_template.format(targetlang=globals.targetlang,
sourcelang=globals.sourcelang,
sourcefile=sourcefile,
sourcefile_content=sourcefile_content,
external_deps=','.join(external_deps_list),
source_directory_structure=globals.source_directory_structure,
target_directory_structure=build_directory_structure(globals.targetdir),
guidelines=globals.guidelines)

llm_write_file(prompt,
target_path=None,
waiting_message=f"Creating migration file for {sourcefile}...",
success_message=None,
globals=globals)
# Split the source file content into chunks based on the context window size for more efficient processing
chunks = split_file_into_chunks(sourcefile_content, globals.context_window_size)
for chunk in chunks:
prompt = write_migration_template.format(targetlang=globals.targetlang,
sourcelang=globals.sourcelang,
sourcefile=sourcefile,
sourcefile_content=chunk,
external_deps=','.join(external_deps_list),
source_directory_structure=globals.source_directory_structure,
target_directory_structure=build_directory_structure(globals.targetdir),
guidelines=globals.guidelines)

llm_write_file(prompt,
target_path=None,
waiting_message=f"Creating migration file for {sourcefile}...",
success_message=None,
globals=globals)

def add_env_files(globals):

Expand Down
107 changes: 57 additions & 50 deletions gpt_migrate/steps/test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from utils import split_file_into_chunks
from utils import prompt_constructor, llm_write_file, construct_relevant_files, find_and_replace_file
from config import HIERARCHY, GUIDELINES, WRITE_CODE, CREATE_TESTS, SINGLEFILE
import subprocess
Expand Down Expand Up @@ -38,60 +39,69 @@ def create_tests(testfile,globals):
# Makedir gpt_migrate in targetdir if it doesn't exist
if not os.path.exists(os.path.join(globals.targetdir, 'gpt_migrate')):
os.makedirs(os.path.join(globals.targetdir, 'gpt_migrate'))

old_file_content = ""
with open(os.path.join(globals.sourcedir, testfile), 'r') as file:
old_file_content = file.read()

create_tests_template = prompt_constructor(HIERARCHY, GUIDELINES, WRITE_CODE, CREATE_TESTS, SINGLEFILE)

prompt = create_tests_template.format(targetport=globals.targetport,
old_file_content=old_file_content,
guidelines=globals.guidelines)

_, _, file_content = llm_write_file(prompt,
target_path=f"gpt_migrate/{testfile}.tests.py",
waiting_message="Creating tests file...",
success_message=f"Created {testfile}.tests.py file in directory gpt_migrate.",
globals=globals)
return f"{testfile}.tests.py"
def create_tests(testfile,globals):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sweep: Don't run chunking in this one at all


old_file_content = ""
with open(os.path.join(globals.sourcedir, testfile), 'r') as file:
old_file_content = file.read()

# Split the old file content into chunks based on the context window size for more efficient processing
chunks = split_file_into_chunks(old_file_content, globals.context_window_size)
for chunk in chunks:
create_tests_template = prompt_constructor(HIERARCHY, GUIDELINES, WRITE_CODE, CREATE_TESTS, SINGLEFILE)

prompt = create_tests_template.format(targetport=globals.targetport,
old_file_content=chunk,
guidelines=globals.guidelines)

_, _, file_content = llm_write_file(prompt,
target_path=f"gpt_migrate/{testfile}.tests.py",
waiting_message="Creating tests file...",
success_message=f"Created {testfile}.tests.py file in directory gpt_migrate.",
globals=globals)
return f"{testfile}.tests.py"

def validate_tests(testfile,globals):
try:
with yaspin(text="Validating tests...", spinner="dots") as spinner:
# find all instances of globals.targetport in the testfile and replace with the port number globals.sourceport
find_and_replace_file(os.path.join(globals.targetdir, f"gpt_migrate/{testfile}"), str(globals.targetport), str(globals.sourceport))
time.sleep(0.3)
result = subprocess.run(["python3", os.path.join(globals.targetdir,f"gpt_migrate/{testfile}")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True, text=True, timeout=15)
spinner.ok("✅ ")
print(result.stdout)
find_and_replace_file(os.path.join(globals.targetdir, f"gpt_migrate/{testfile}"), str(globals.sourceport), str(globals.targetport))
typer.echo(typer.style(f"Tests validated successfully on your source app.", fg=typer.colors.GREEN))
return "success"
except subprocess.CalledProcessError as e:
find_and_replace_file(os.path.join(globals.targetdir, f"gpt_migrate/{testfile}"), str(globals.sourceport), str(globals.targetport))
print("ERROR: ",e.output)
error_message = e.output
error_text = typer.style(f"Validating {testfile} against your existing service failed. Please take a look at the error message and try to resolve the issue. Once these are resolved, you can resume your progress with the `--step test` flag.", fg=typer.colors.RED)
typer.echo(error_text)
def validate_tests(testfile,globals):
try:
with yaspin(text="Validating tests...", spinner="dots") as spinner:
find_and_replace_file(os.path.join(globals.targetdir, f"gpt_migrate/{testfile}"), str(globals.targetport), str(globals.sourceport))
time.sleep(0.3)
# Split the test file into chunks based on the context window size for more efficient processing
chunks = split_file_into_chunks(testfile, globals.context_window_size)
for chunk in chunks:
result = subprocess.run(["python3", os.path.join(globals.targetdir,f"gpt_migrate/{chunk}")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True, text=True, timeout=15)
spinner.ok("✅ ")
print(result.stdout)
find_and_replace_file(os.path.join(globals.targetdir, f"gpt_migrate/{testfile}"), str(globals.sourceport), str(globals.targetport))
typer.echo(typer.style(f"Tests validated successfully on your source app.", fg=typer.colors.GREEN))
return "success"
except subprocess.CalledProcessError as e:
find_and_replace_file(os.path.join(globals.targetdir, f"gpt_migrate/{testfile}"), str(globals.sourceport), str(globals.targetport))
print("ERROR: ",e.output)
error_message = e.output
error_text = typer.style(f"Validating {testfile} against your existing service failed. Please take a look at the error message and try to resolve the issue. Once these are resolved, you can resume your progress with the `--step test` flag.", fg=typer.colors.RED)
typer.echo(error_text)

if typer.confirm("Would you like GPT-Migrate to try to fix this?"):
return error_message
else:
tests_content = ""
with open(os.path.join(globals.targetdir, f"gpt_migrate/{testfile}"), 'r') as file:
tests_content = file.read()
require_human_intervention(error_message,relevant_files=construct_relevant_files([(f"gpt_migrate/{testfile}", tests_content)]),globals=globals)
raise typer.Exit()
except subprocess.TimeoutExpired as e:
print(f"gpt_migrate/{testfile} timed out due to an unknown error and requires debugging.")
return f"gpt_migrate/{testfile} timed out due to an unknown error and requires debugging."
if typer.confirm("Would you like GPT-Migrate to try to fix this?"):
return error_message
else:
tests_content = ""
with open(os.path.join(globals.targetdir, f"gpt_migrate/{testfile}"), 'r') as file:
tests_content = file.read()
require_human_intervention(error_message,relevant_files=construct_relevant_files([(f"gpt_migrate/{testfile}", tests_content)]),globals=globals)
raise typer.Exit()
except subprocess.TimeoutExpired as e:
print(f"gpt_migrate/{testfile} timed out due to an unknown error and requires debugging.")
return f"gpt_migrate/{testfile} timed out due to an unknown error and requires debugging."

def run_test(testfile,globals):
try:
with yaspin(text="Running tests...", spinner="dots") as spinner:
time.sleep(0.3)
result = subprocess.run(["python3", os.path.join(globals.targetdir,f"gpt_migrate/{testfile}")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True, text=True, timeout=15)
chunks = split_file_into_chunks(testfile, globals.context_window_size)
for chunk in chunks:
result = subprocess.run(["python3", os.path.join(globals.targetdir,f"gpt_migrate/{chunk}")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True, text=True, timeout=15)
spinner.ok("✅ ")

print(result.stdout)
Expand All @@ -115,7 +125,4 @@ def run_test(testfile,globals):

except subprocess.TimeoutExpired as e:
print(f"gpt_migrate/{testfile} timed out due to an unknown error and requires debugging.")
return f"gpt_migrate/{testfile} timed out due to an unknown error and requires debugging."



return f"gpt_migrate/{testfile} timed out due to an unknown error and requires debugging."
34 changes: 22 additions & 12 deletions gpt_migrate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def llm_run(prompt,waiting_message,success_message,globals):

return output

def split_file_into_chunks(filepath, chunk_size):
with open(filepath, 'r') as file:
content = file.read()
return [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]

def llm_write_file(prompt,target_path,waiting_message,success_message,globals):

file_content = ""
Expand All @@ -54,12 +59,15 @@ def llm_write_file(prompt,target_path,waiting_message,success_message,globals):
if file_name=="INSTRUCTIONS:":
return "INSTRUCTIONS:","",file_content

if target_path:
with open(os.path.join(globals.targetdir, target_path), 'w') as file:
file.write(file_content)
else:
with open(os.path.join(globals.targetdir, file_name), 'w') as file:
file.write(file_content)
# Split the file content into chunks based on the context window size for more efficient processing
chunks = split_file_into_chunks(file_content, globals.context_window_size)
for chunk in chunks:
if target_path:
with open(os.path.join(globals.targetdir, target_path), 'a') as file:
file.write(chunk)
else:
with open(os.path.join(globals.targetdir, file_name), 'a') as file:
file.write(chunk)

if success_message:
success_text = typer.style(success_message, fg=typer.colors.GREEN)
Expand All @@ -80,12 +88,14 @@ def llm_write_files(prompt,target_path,waiting_message,success_message,globals):
for result in results:
file_name,language,file_content = result

if target_path:
with open(os.path.join(globals.targetdir, target_path), 'w') as file:
file.write(file_content)
else:
with open(os.path.join(globals.targetdir, file_name), 'w') as file:
file.write(file_content)
chunks = split_file_into_chunks(file_content, globals.context_window_size)
for chunk in chunks:
if target_path:
with open(os.path.join(globals.targetdir, target_path), 'a') as file:
file.write(chunk)
else:
with open(os.path.join(globals.targetdir, file_name), 'a') as file:
file.write(chunk)

if not success_message:
success_text = typer.style(f"Created {file_name} at {globals.targetdir}", fg=typer.colors.GREEN)
Expand Down