In [None]:
import json
import regex as re 
import requests 
import sys
import pathlib
import os
import time

utils_dir = pathlib.Path.cwd().parent
sys.path.append(str(utils_dir)+"\\")
import utils
import natsort
import threading
import math

import numpy as np
from google import genai
from google.genai.types import *



In [32]:
genai_client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)


In [None]:


with open(utils_dir / r"topics.json","r") as in_json:
    topics = json.load(in_json)

model = "google/gemini-2.5-pro"

interaction_info = {}
context_window = 190000
max_completion_length = 8192
novel_desired_length = 150000
num_novels = 1000
chapter_length = 15000
#per minute
RPM_limit = 2000
ITPM_limit = 800000
OTPM_limit = 160000
requests_sent = 0
itokens = 0
otokens = 0
spend_limit = 1000.0
itoken_cost = 1.25
otoken_cost = 10.0


error_js_name = "error.json"
text_out_dir = r"generated_text"
thread_pause_semaphore = threading.Semaphore(1)

gemini_config = GenerateContentConfig(max_output_tokens=max_completion_length)
def request_anthropic(prompt, model, max_tokens=chapter_length):
    
    
    print("request posted")
    response = genai_client.models.generate_content(model=model,contents=prompt,config=gemini_config)
    print("request returned")
    
    itokens = response.usage_metadata.prompt_token_count
    otokens = response.usage_metadata.candidates_token_count
    text =  response.text
    print(text[:200])

    if response.candidates[0].finish_reason == FinishReason.STOP:
        print(f"request returned")
        return response.to_json_dict(),text,itokens,otokens
    else:
        print(f"Error: {response.candidates[0].finishReason}, {response.text}")
        raise Exception("invalid stop reason")


def check_for_error(response, error_js_name):
    if(response["choices"][0]["finish_reason"]!= "stop"):
        print(f"invalid stop reason: {response["choices"][0]["finish_reason"]}")
        json.dump(response, error_js_name)
        sys.exit()


In [None]:
metadata_out_dir = "metadata"
interaction_info = {}
if(not text_out_dir in os.listdir()):
    os.mkdir(text_out_dir)
    
files = utils.get_all_files_with_extension(f"{os.getcwd()}\\{metadata_out_dir}",".json")
files = natsort.natsorted(files)
print(f"files: {files}")
for file in files:
    with open(f"{os.getcwd()}\\{metadata_out_dir}\\{file}","r") as infile:
        interaction_info = json.load(infile)
    text = interaction_info["novel_text"]
    
    index = int(re.search("(\d+)",os.path.basename(file)).group(0))
    with open(f"{os.getcwd()}\\{text_out_dir}\\claude_sonnet{index}.txt","w",encoding="utf-8") as outfile:
        outfile.write(text)

  index = int(re.search("(\d+)",os.path.basename(file)).group(0))


files: ['metadata0.json', 'metadata1.json', 'metadata2.json', 'metadata3.json', 'metadata4.json', 'metadata5.json', 'metadata6.json', 'metadata7.json', 'metadata8.json', 'metadata9.json', 'metadata10.json', 'metadata11.json', 'metadata12.json', 'metadata13.json', 'metadata14.json', 'metadata15.json', 'metadata16.json', 'metadata17.json', 'metadata18.json', 'metadata19.json', 'metadata20.json', 'metadata21.json', 'metadata22.json', 'metadata23.json', 'metadata24.json', 'metadata25.json', 'metadata26.json', 'metadata27.json', 'metadata28.json', 'metadata29.json', 'metadata30.json', 'metadata31.json', 'metadata32.json', 'metadata33.json', 'metadata34.json', 'metadata35.json', 'metadata36.json', 'metadata37.json', 'metadata38.json', 'metadata39.json', 'metadata40.json', 'metadata41.json', 'metadata42.json', 'metadata43.json', 'metadata44.json', 'metadata45.json', 'metadata46.json', 'metadata47.json', 'metadata48.json', 'metadata49.json', 'metadata50.json', 'metadata51.json', 'metadata52.js

In [None]:
skip_indices = []
itokens = 0
otokens = 0
requests_sent = 0
itokens_semaphore = threading.Semaphore(1)
otokens_semaphore = threading.Semaphore(1)
requests_sent_semaphore = threading.Semaphore(1)
cost_dict = {}
if(os.path.exists("cost.json")):
    with open("cost.json","r") as infile:
        cost_dict = json.load(infile)
else:
    cost_dict["total_cost"] = 0
    cost_dict["total_itokens"] = 0
    cost_dict["total_otokens"] = 0
    with open("cost.json","w")as outfile:
        json.dump(cost_dict,outfile)
def update_cost_metadata(itokens, otokens):
    cost_dict["total_itokens"] += itokens
    cost_dict["total_otokens"] += otokens
    print(f"million itokens: {cost_dict["total_itokens"]/1000000.0}, estimated cost: {(cost_dict["total_itokens"]/1000000.0)*itoken_cost}")
    print(f"million otokens: {cost_dict["total_otokens"]/1000000.0}, estimated cost: {(cost_dict["total_otokens"]/1000000.0)*otoken_cost}")
    cost_dict["total_cost"] = ((cost_dict["total_itokens"]/1000000.0)*itoken_cost)+((cost_dict["total_otokens"]/1000000.0)*otoken_cost)
    with open("cost.json","w") as outfile:
        json.dump(cost_dict,outfile)
    if(cost_dict["total_cost"]>1000.0):
        return False
    else:
        return True

def check_rate_limits():
        #will block all other threads from generating until the calling thread is done waiting out the rate limiting
        global itokens
        global otokens
        global requests_sent
        with thread_pause_semaphore:
            if(itokens>ITPM_limit or otokens>OTPM_limit or requests_sent>RPM_limit):
                time.sleep(70)
                itokens = 0
                otokens = 0
                requests_sent = 0

@utils.multithreading
def generate_novel(topic:str,index:int) -> None:
    interaction_info = {}
    metadata_path = f"{os.getcwd()}\\{metadata_out_dir}\\metadata{index}.json"
    global itokens 
    global otokens 
    global requests_sent
    
    
    if(index in skip_indices):
        print(f"novel index is in skipped indices: {index}")
        return

    if(not os.path.exists(metadata_path)):
        interaction_info["novel_text"] = ""
        interaction_info["responses"] = []
    else:
        print(f"found metadata at path {metadata_path}")
        with open(metadata_path,"r") as infile:
            interaction_info = json.load(infile)    
    

    novel_current_length = len(interaction_info["novel_text"])
    first_prompt = f"I want you to act as a novelist writing about {topic}. The total length of the novel is about {novel_desired_length} characters and after generating each section of the novel i will tell you how much of the text you have generated so far. Try to generate text with some irregularities in order to fool ai detection software. In the novel you should include plot points characters dialogue and an overall plot. Generate only the text for this novel and do not add any annotations. Pace the plot and story according to the current number of characters I tell you. Generate the text in full paragraphs and chapters of about {chapter_length} character length. Continue generating chapter 1 until I tell you to stop. Generate EXCLUSIVELY novel text with no annotations confirmations or listing the chapter number."
    
    
    while(len(interaction_info["novel_text"])<novel_desired_length):
        prompt = ""
        if(novel_current_length == 0):
            prompt = first_prompt
        else:
            prompt = interaction_info["novel_text"] + f"Here is the previous generated text. You have generated {novel_current_length} characters so far. Generate chapter {math.floor(novel_current_length/chapter_length)+1} until i tell you to stop, also remember to generate text in complete paragraphs and do not generate lines with a single sentence. Generate EXCLUSIVELY novel text"
            
            if(len(prompt)+max_completion_length>context_window):
                prompt = prompt[len(prompt)-(context_window-max_completion_length)::]
        check_rate_limits() 
        try: 
            response,text,itk,otk = request_anthropic(prompt,model,max_tokens=max_completion_length)
        except Exception as err:
            print(f"request anthropic error catch:\n{err}")
            return err
        with requests_sent_semaphore:
            requests_sent += 1 
            
        
        with itokens_semaphore:
            itokens += itk
            
        with otokens_semaphore:
            otokens += otk
            

        with itokens_semaphore,otokens_semaphore:
            if(not update_cost_metadata(itk,otk)):
                print(f"cost limit hit")
                sys.exit()
            
        interaction_info["responses"].append(response)
        try:
            interaction_info["novel_text"] += text
        except Exception as err:
            print(f"value errors from trying to get the text:\n{err}")
            return err
        novel_current_length += len(text)
        print(f"current novel: {index}")
        print(f"response text length: {len(text)}")
        print(f"total novel length: {novel_current_length}")
        print(f"preview: \n{text[:500]}")
        with open(metadata_path,"w") as outfile:
           json.dump(interaction_info,outfile,indent = 5) 

        if(otk < 200):
            print(f"adding {index} to skipped indices")
            skip_indices.append(index)
            return None
    return None



        

In [None]:
def is_written(index:int) -> bool:
    metadata_path = f"{os.getcwd()}\\{text_out_dir}\\metadata{index}.json"
    if(os.path.exists(metadata_path)):
        interaction_info = {}
        with open(metadata_path,"r") as infile:
            interaction_info = json.load(infile)
        
        if(len(interaction_info["novel_text"])>=novel_desired_length):
            return True
        else:
            if(index in skip_indices):
                return True
            else:
                return False
    else:
        return False

def is_generation_finished():
    files = utils.get_all_files_with_extension(f"{os.getcwd()}\\{text_out_dir}",".json")
    files = natsort.natsorted(files)
    for file in files:
        index = int(re.search("(\d+)",os.path.basename(file)).group(0))
        if(not is_written(index)):
            return False



if(os.path.exists(f"{os.getcwd()}\\skip_indices.json")):
    with open(f"{os.getcwd()}\\skip_indices.json","r") as infile:
        skip_indices = json.load(infile)
for novel_index in range(num_novels):
    if(not is_written(novel_index)):
        topic = topics[novel_index]
        err = generate_novel(topic,novel_index)
        while(err != None):
            print("restarting generation")
            time.sleep(10)
            err = generate_novel(topic,novel_index)
        with open(f"{os.getcwd()}\\skip_indices.json","w") as outfile:
            json.dump(skip_indices,outfile)
    else:
        print(f"skipping index: {novel_index}")

  index = int(re.search("(\d+)",os.path.basename(file)).group(0))


skipping index: 0
skipping index: 1
skipping index: 2
skipping index: 3
skipping index: 4
skipping index: 5
skipping index: 6
skipping index: 7
skipping index: 8
skipping index: 9
skipping index: 10
skipping index: 11
skipping index: 12
skipping index: 13
skipping index: 14
skipping index: 15
skipping index: 16
skipping index: 17
skipping index: 18
skipping index: 19
skipping index: 20
skipping index: 21
skipping index: 22
skipping index: 23
skipping index: 24
skipping index: 25
skipping index: 26
skipping index: 27
skipping index: 28
skipping index: 29
skipping index: 30
skipping index: 31
skipping index: 32
skipping index: 33
skipping index: 34
skipping index: 35
skipping index: 36
skipping index: 37
skipping index: 38
skipping index: 39
skipping index: 40
skipping index: 41
skipping index: 42
skipping index: 43
skipping index: 44
skipping index: 45
skipping index: 46
skipping index: 47
skipping index: 48
skipping index: 49
skipping index: 50
skipping index: 51
skipping index: 52
ski

request returned
The fifteen-mile drive east from Inverness to Nairn followed the coastline, the grey expanse of the Moray Firth a constant, brooding presence to their left. Fraser drove this time, his earlier, twitch
request returned
million itokens: 348.928831, estimated cost: 436.16103875
million otokens: 33.964801, estimated cost: 339.64801
current novel: 913
response text length: 8155
total novel length: 48980
preview: 
The fifteen-mile drive east from Inverness to Nairn followed the coastline, the grey expanse of the Moray Firth a constant, brooding presence to their left. Fraser drove this time, his earlier, twitchy energy having subsided into a grim, focused silence. He was learning. The initial shock was being replaced by the dull, heavy armour of professional necessity. Gash sat beside him, watching the landscape shift from industrial estates to rolling farmland and then to the neat, stone-built houses of 
request posted
request returned
The forty-eight-hour deadline imposed 