In [None]:
#%pip install openai

In [24]:
import os
import openai
import getpass
import random
import datetime
import time

# Get API key from user

In [2]:
openai.api_key = getpass.getpass(prompt = "OPENAI API Key")

OPENAI API Key········


# Define prompt options

In [3]:
Sponsor = [["NSF","2-page", "Data Management Plan"],
           ["NIH","2-page", "Data Sharing Plan"],
           ["IMLS","2-page", "Digital Product Plan"]]

PI = ["PI-1", "PI-2", "PI-3"]
DataManager = ["DM-1", "DM-2", "DM-3"]

SubjectAreas = ["Human Subjects Research", "Environmental Analysis", "Algorithm Development"]

DataTypes = [
    ["Survey data", "MB", "Spreadsheet", "CSV", "ICPSR", "DDI", "Creative Commons", ["CC0", "CC BY 4.0", "CC BY SA 4.0"]],
    ["GIS data", "GB", "Shapefiles", "GML", "Dryad", "DataCite and README", "Creative Commons", ["CC0", "CC BY 4.0", "CC BY SA 4.0"]],
    ["Instrument derived data", "GB", "CSV", "CSV", "Dryad", "DataCite and README", "Creative Commons", ["CC0", "CC BY 4.0", "CC BY SA 4.0"]],
    ["Software code", "MB", random.choice(["Matlab","Python","R","Fortran"]) + "source code", "source code","Zenodo", "DataCite and README", "Open Source", ["Apache-2.0", "GPL-3.0-only", "BSD-3-Clause"]]
]


# Define prompt generation function

In [31]:
def promptGen():
    sponsorPrompt = random.choice(Sponsor)
    piPrompt = random.choice(PI)
    dmPrompt = random.choice(DataManager)
    saPrompt = random.choice(SubjectAreas)
    dtPrompt = random.choice(DataTypes)
    dtVolPrompt = random.randrange(5,100)
    dtLicPrompt = random.choice(dtPrompt[7])

    promptText = ("Write a " + sponsorPrompt[1] + " " + sponsorPrompt[0] + " " + sponsorPrompt[2]
          + " for a research project led by " + piPrompt + " with data management led by " + dmPrompt
          + "; in the subject area of " + saPrompt
          + "; for which " + str(dtVolPrompt) + dtPrompt[1] + " of " + dtPrompt[0] + " will be generated for use during the project, resulting in " + str(int(round(.8*dtVolPrompt,0))) + dtPrompt[1] + " of " + dtPrompt[3] + " files that will be shared and preserved in the " + dtPrompt[4] + " repository"
          + " and documented using the " + dtPrompt[5] + " standard(s)"
          + "; shared under the terms of the " + dtLicPrompt + " " + dtPrompt[6] + " license"
          + "; and preserved using UNM's LibNova Labdrive system for long-term preservation in addition to any additional preservation provided by the target repository" 
         )
    prompt = [{"role": "user", "content": promptText}]
    
    messages = [
        {"role":"system", "content":"You are a helpful assistant"},
        {"role":"user", "content":"Write a " + sponsorPrompt[1] + " " + sponsorPrompt[0] + " " + sponsorPrompt[2] + " for a research project led by " + piPrompt + " with data management led by " + dmPrompt},
        {"role":"user", "content":"The subject area of the data management plan is " + saPrompt},
        {"role":"user", "content":"The project will generate " + str(dtVolPrompt) + dtPrompt[1] + " of " + dtPrompt[0] + " for use during the project, resulting in " + str(int(round(.8*dtVolPrompt,0))) + dtPrompt[1] + " of " + dtPrompt[3] + " files that will be shared and preserved in the " + dtPrompt[4] + " repository"},
        {"role":"user", "content":"The data generated by the project will be documented using the " + dtPrompt[5] + " standard(s)"},
        {"role":"user", "content":"The generated materials will be shared under the terms of the " + dtLicPrompt + " " + dtPrompt[6] + " license"},
        {"role":"user", "content":"The generated materials will be preserved using UNM's LibNova Labdrive system for long-term preservation in addition to any additional preservation provided by the target repository"}
    ]

    return {"prompt":prompt, "messages":messages}

def promptDialog():
    sponsorPrompt = random.choice(Sponsor)
    piPrompt = random.choice(PI)
    dmPrompt = random.choice(DataManager)
    saPrompt = random.choice(SubjectAreas)
    dtPrompt = random.choice(DataTypes)
    dtVolPrompt = random.randrange(5,100)
    dtLicPrompt = random.choice(dtPrompt[7])
    
    messages = [
        {"role":"system", "content":"You are a helpful assistant"},
        {"role":"user", "content":"Write a " + sponsorPrompt[1] + " " + sponsorPrompt[0] + " " + sponsorPrompt[2] + " for a research project led by " + piPrompt + " with data management led by " + dmPrompt + "; in the subject area of " + saPrompt},
        {"role":"user", "content":"The subject area of the data management plan is " + saPrompt},
        {"role":"user", "content":"The project will generate " + str(dtVolPrompt) + dtPrompt[1] + " of " + dtPrompt[0] + " for use during the project, resulting in " + str(int(round(.8*dtVolPrompt,0))) + dtPrompt[1] + " of " + dtPrompt[3] + " files that will be shared and preserved in the " + dtPrompt[4] + " repository"},
        {"role":"user", "content":"The data generated by the project will be documented using the " + dtPrompt[5] + " standard(s)"},
        {"role":"user", "content":"The generated materials will be shared under the terms of the " + dtLicPrompt + " " + dtPrompt[6] + " license"},
        {"role":"user", "content":"The generated materials will be preserved using UNM's LibNova Labdrive system for long-term preservation in addition to any additional preservation provided by the target repository"}
    ]
    return messages



# Define prompt submission and return function

In [12]:
# see https://platform.openai.com/docs/api-reference/completions/create for explanations of the submission parameters
def dmpGen(promptText):
    promptText = promptText
    response = openai.ChatCompletion.create(
      max_tokens=2048,
      top_p=.25,
      frequency_penalty=1.0,
      presence_penalty=1.0,
      model="gpt-3.5-turbo",
      messages=promptText
    )
    return {"prompt":promptText, "response":response, "dmp":response['choices'][0]['message']['content']}

# The resulting prompt and output was

In [16]:
prompts = promptGen() 
singlePrompt = dmpGen(prompts['prompt'])
multiPrompt = dmpGen(prompts['messages'])

print("The results for a single string prompt")
print("--------------------------------------")
print("\nThe prompt was:\n" , singlePrompt["prompt"])
print("++++++++++++++++++++++++++++++++++++++++++++")
print("\nThe generated version-0 DMP was: \n", singlePrompt["dmp"])
print("\n==========================================================")
print("The results for a multi-message prompt")
print("--------------------------------------")
print("\nThe prompt was:\n" , multiPrompt["prompt"])
print("++++++++++++++++++++++++++++++++++++++++++++")
print("\nThe generated version-0 DMP was: \n", multiPrompt["dmp"])
print("============================================================")


The results for a single string prompt
--------------------------------------

The prompt was:
 [{'role': 'user', 'content': "Write a 2-page NSF Data Management Plan for a research project led by PI-2 with data management led by DM-1; in the subject area of Environmental Analysis; for which 66GB of GIS data will be generated for use during the project, resulting in 53GB of GML files that will be shared and preserved in the Dryad repository and documented using the DataCite and README standard(s); shared under the terms of the CC0 Creative Commons license; and preserved using UNM's LibNova Labdrive system for long-term preservation in addition to any additional preservation provided by the target repository"}]
++++++++++++++++++++++++++++++++++++++++++++

The generated version-0 DMP was: 
 .

Introduction

This Data Management Plan (DMP) outlines the data management practices for a research project led by PI-2 in the subject area of Environmental Analysis. The project aims to generate 6

# Generate multiple request-response sets

In [32]:
filename = "requestResponseSets_" + str(datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%S.%fZ")) + ".txt"
with open(filename, 'w') as f:
    numberOfPrompts = 40
    requestNumber = 0
    while requestNumber < numberOfPrompts:
        requestNumber = requestNumber + 1
        print("Request: " + str(requestNumber) + " out of " + str(numberOfPrompts))
        prompts = promptGen() 
        singlePrompt = dmpGen(prompts['prompt'])
        time.sleep(60) # wait to avoid generating ChatGPT rate limits
        multiPrompt = dmpGen(prompts['messages'])
        time.sleep(60) # wait to avoid generating ChatGPT rate limits
        f.write("======================================\n")
        f.write("== Prompt set: " + str(requestNumber) + "\n" )
        f.write("======================================\n")
        f.write("\n")
        f.write("The results for a single string prompt\n")
        f.write("--------------------------------------\n")
        f.write("\nThe prompt was:\n" + str(singlePrompt["prompt"]) + "\n")
        f.write("++++++++++++++++++++++++++++++++++++++++++++\n")
        f.write("\nThe generated version-0 DMP was: \n" + singlePrompt["dmp"] + "\n")
        f.write("\n==========================================================\n")
        f.write("The results for a multi-message prompt\n")
        f.write("--------------------------------------")
        f.write("\nThe prompt was:\n" + str(multiPrompt["prompt"]) + "\n")
        f.write("++++++++++++++++++++++++++++++++++++++++++++\n")
        f.write("\nThe generated version-0 DMP was: \n" + multiPrompt["dmp"] + "\n")
        f.write("============================================================\n")
        f.write("\n")
        f.write("\n")

Request: 1 out of 40
Request: 2 out of 40
Request: 3 out of 40
Request: 4 out of 40
Request: 5 out of 40
Request: 6 out of 40
Request: 7 out of 40
Request: 8 out of 40
Request: 9 out of 40
Request: 10 out of 40
Request: 11 out of 40
Request: 12 out of 40
Request: 13 out of 40
Request: 14 out of 40
Request: 15 out of 40
Request: 16 out of 40
Request: 17 out of 40
Request: 18 out of 40
Request: 19 out of 40
Request: 20 out of 40
Request: 21 out of 40
Request: 22 out of 40
Request: 23 out of 40
Request: 24 out of 40
Request: 25 out of 40
Request: 26 out of 40
Request: 27 out of 40
Request: 28 out of 40
Request: 29 out of 40
Request: 30 out of 40
Request: 31 out of 40
Request: 32 out of 40
Request: 33 out of 40
Request: 34 out of 40
Request: 35 out of 40
Request: 36 out of 40
Request: 37 out of 40
Request: 38 out of 40
Request: 39 out of 40
Request: 40 out of 40
