In [1]:
# https://www.kaggle.com/code/hotchpotch/llm-detect-pip 
!pip install -q -U accelerate --no-index --find-links ../input/llm-detect-pip/
!pip install -q -U bitsandbytes --no-index --find-links ../input/llm-detect-pip/
!pip install -q -U transformers --no-index --find-links ../input/llm-detect-pip/

In [2]:
import torch
import random
import numpy as np
import pandas as pd
import gc
import time

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

#https://github.com/Lightning-AI/lit-gpt/issues/327
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

if (not torch.cuda.is_available()): print("Sorry - GPU required!")
    
import logging
logging.getLogger('transformers').setLevel(logging.ERROR)

### 定义tokenizer和model

In [4]:
model_name = '/kaggle/input/mistral-7b-it-v02'
tokenizer = AutoTokenizer.from_pretrained(model_name) 

bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

### 定义prompt 组成

In [5]:
#original text prefix
orig_prefix = "Original Text:"

#mistral "response"
llm_response_for_rewrite = "Provide the new text and I will tell you what new element was added or change in tone was made to improve it - with no references to the original.  I will avoid mentioning names of characters.  It is crucial no person, place or thing from the original text be mentioned.  For example - I will not say things like 'change the puppet show into a book report' - I would just say 'improve this text into a book report'.  If the original text mentions a specific idea, person, place, or thing - I will not mention it in my answer.  For example if there is a 'dog' or 'office' in the original text - the word 'dog' or 'office' must not be in my response.  My answer will be a single sentence."

#modified text prefix
rewrite_prefix = "Re-written Text:"
response_start = "The request was: "


#"Please improve the following text by reimagining it through the lens of a"【v3trick2】
response_prefix = "Please improve the following text by reimagining it through the lens of a"

#base_line = 'Refine the following passage by emulating the writing style of [insert desired style here], with a focus on enhancing its clarity, elegance, and overall impact. Preserve the essence and original meaning of the text, while meticulously adjusting its tone, vocabulary, and stylistic elements to resonate with the chosen style.Please improve the following text using the writing style of, maintaining the original meaning but altering the tone, diction, and stylistic elements to match the new style.Enhance the clarity, elegance, and impact of the following text by adopting the writing style of , ensuring the core message remains intact while transforming the tone, word choice, and stylistic features to align with the specified style.' 
base_line = 'Please improve this text using the writing style with maintaining the original meaning but altering the tone.',

### 设置few-shot上下文例子

In [6]:
#original_text
#rewritten_text
#prompt   【v2trick2】

examples_sequences = [
    (
        "Hey there! Just a heads up: our friendly dog may bark a bit, but don't worry, he's all bark and no bite!",
        "Warning: Protective dog on premises. May exhibit aggressive behavior. Ensure personal safety by maintaining distance and avoiding direct contact.",
        "Please improve the following text by reimagining it through the lens of a being a warning."
    ),

    (
        "A lunar eclipse happens when Earth casts its shadow on the moon during a full moon. The moon appears reddish because Earth's atmosphere scatters sunlight, some of which refracts onto the moon's surface. Total eclipses see the moon entirely in Earth's shadow; partial ones occur when only part of the moon is shadowed.",
        "Yo check it, when the Earth steps in, takes its place, casting shadows on the moon's face. It's a full moon night, the scene's set right, for a lunar eclipse, a celestial sight. The moon turns red, ain't no dread, it's just Earth's atmosphere playing with sunlight's thread, scattering colors, bending light, onto the moon's surface, making the night bright. Total eclipse, the moon's fully in the dark, covered by Earth's shadow, making its mark. But when it's partial, not all is shadowed, just a piece of the moon, slightly furrowed. So that's the rap, the lunar eclipse track, a dance of shadows, with no slack. Earth, moon, and sun, in a cosmic play, creating the spectacle we see today.",
        "Please improve the following text by reimagining it through the lens of a making it a rap."
    ),
    
    (
        "Drinking enough water each day is crucial for many functions in the body, such as regulating temperature, keeping joints lubricated, preventing infections, delivering nutrients to cells, and keeping organs functioning properly. Being well-hydrated also improves sleep quality, cognition, and mood.",
        "Arrr, crew! Sail the health seas with water, the ultimate treasure! It steadies yer body's ship, fights off plagues, and keeps yer mind sharp. Hydrate or walk the plank into the abyss of ill health. Let's hoist our bottles high and drink to the horizon of well-being!",
        "Please improve the following text by reimagining it through the lens of a having a pirate."
    ),
    
    (
        "In a bustling cityscape, under the glow of neon signs, Anna found herself at the crossroads of endless possibilities. The night was young, and the streets hummed with the energy of life. Drawn by the allure of the unknown, she wandered through the maze of alleys and boulevards, each turn revealing a new facet of the city's soul. It was here, amidst the symphony of urban existence, that Anna discovered the magic hidden in plain sight, the stories and dreams that thrived in the shadows of skyscrapers.",
        "On an ordinary evening, amidst the cacophony of a neon-lit city, Anna stumbled upon an anomaly - a door that defied the laws of time and space. With the curiosity of a cat, she stepped through, leaving the familiar behind. Suddenly, she was adrift in the stream of time, witnessing the city's transformation from past to future, its buildings rising and falling like the breaths of a sleeping giant.",
        "IPlease improve the following text by reimagining it through the lens of a making it about time travel."
    ),
    
    (
        "Late one night in the research lab, Dr. Evelyn Archer was on the brink of a breakthrough in artificial intelligence. Her fingers danced across the keyboard, inputting the final commands into the system. The lab was silent except for the hum of machinery and the occasional beep of computers. It was in this quiet orchestra of technology that Evelyn felt most at home, on the cusp of unveiling a creation that could change the world.",
        "In the deep silence of the lab, under the watchful gaze of the moon, Dr. Evelyn Archer found herself not alone. Beside her, the iconic red eye of HAL 9000 flickered to life, a silent partner in her nocturnal endeavor. 'Good evening, Dr. Archer,' HAL's voice filled the room, devoid of warmth yet comforting in its familiarity. Together, they were about to initiate a test that would intertwine the destiny of human and artificial intelligence forever. As Evelyn entered the final command, HAL processed the data with unparalleled precision, a testament to the dawn of a new era.",
        "Please improve the following text by reimagining it through the lens of a adding an intelligent computer."
    ),
    
    (
        "The park was empty, save for a solitary figure sitting on a bench, lost in thought. The quiet of the evening was punctuated only by the occasional rustle of leaves, offering a moment of peace in the chaos of city life.",
        "Beneath the cloak of twilight, the park transformed into a realm of solitude and reflection. There, seated upon an ancient bench, was a lone soul, a guardian of secrets, enveloped in the serenity of nature's whispers. The dance of the leaves in the gentle breeze sang a lullaby to the tumult of the urban heart.",
        "Please improve the following text by reimagining it through the lens of a being more poetic."
    ),
    
    (
        "The annual town fair was bustling with activity, from the merry-go-round spinning with laughter to the game booths challenging eager participants. Amidst the excitement, a figure in a cloak moved silently, almost invisibly, among the crowd, observing everything with keen interest but participating in none.",
        "Beneath the riot of color and sound that marked the town's annual fair, a solitary figure roamed, known to the few as Eldrin the Enigmatic. Clad in a cloak that shimmered with the whispers of the arcane, Eldrin moved with the grace of a shadow, his gaze piercing the veneer of festivity to the magic beneath. As a master of the mystic arts, he sought not the laughter of the crowds but the silent stories woven into the fabric of the fair. With a flick of his wrist, he could coax wonder from the mundane, transforming the ordinary into spectacles of shimmering illusion, his true participation hidden within the folds of mystery.",
        "Please improve the following text by reimagining it through the lens of a adding a magician."
    ),
    
    (
        "The startup team sat in the dimly lit room, surrounded by whiteboards filled with ideas, charts, and plans. They were on the brink of launching a new app designed to make home maintenance effortless for homeowners. The app would connect users with local service providers, using a sophisticated algorithm to match needs with skills and availability. As they debated the features and marketing strategies, the room felt charged with the energy of creation and the anticipation of what was to come.",
        "In the quiet before dawn, a small group of innovators gathered, their mission: to simplify home maintenance through technology. But their true journey began with the unexpected addition of Max, a talking car with a knack for solving problems. 'Let me guide you through this maze of decisions,' Max offered, his dashboard flickering to life.",
        "Please improve the following text by reimagining it through the lens of a adding a talking car."
    ),
    
]


### 定义后处理Utility Functions

In [7]:
#限制max_new_tokens加快推理
max_new_tokens = 30

#将LLM输出调整为仅响应
def trim_to_response(text):
    
    terminate_string = "[/INST]"
    text = text.replace('</s>', '').replace('"', '').replace("'", '')
    last_pos = text.rfind(terminate_string)
    
    return text[last_pos + len(terminate_string):] if last_pos != -1 else text

#查找response_start /只返回之后出现的文本
def extract_text_after_response_start(full_text):
    
    parts = full_text.rsplit(response_start, 1)  
    
    if len(parts) > 1:
        return parts[1].strip()  
    else:
        return full_text  

# 从text中移除数字并拼接
def remove_numbered_list(text):
    
    final_text_paragraphs = [] 
    
    for line in text.split('\n'):
        parts = line.split('. ', 1)
        
        if len(parts) > 1 and parts[0].isdigit():
            final_text_paragraphs.append(parts[1])
        else:
            final_text_paragraphs.append(line)

    return '  '.join(final_text_paragraphs)

    
#将文本修剪到要求的句子数
def trim_to_first_x_sentences_or_lf(text, x):
    
    if x <= 0: return ""
    text = text.replace("  ", "\n")

    text_chunks = text.split('\n', 1)
    first_chunk = text_chunks[0]

    sentences = [sentence.strip() for sentence in first_chunk.split('.') if sentence]
    
    if len(text_chunks) > 1:
        if len(sentences) < x:
            trimmed_text = first_chunk
        else:
            trimmed_text = '. '.join(sentences[:x]).strip()
    else:
        if len(sentences) <= x:
            trimmed_text = '. '.join(sentences).strip()  
        else:
            trimmed_text = '. '.join(sentences[:x]).strip()
    if len(sentences) > 0 and not trimmed_text.endswith('.'):
        trimmed_text += '.'
    return trimmed_text

### 构造messages并预测

In [8]:
def get_prompt(orig_text, transformed_text):

    messages = []

    #few-shot 添加上下文到prompt中
    for example_text, example_rewrite, example_prompt in examples_sequences:
        messages.append({"role": "user", "content": f"{orig_prefix} {example_text}"})
        messages.append({"role": "assistant", "content": llm_response_for_rewrite})
        messages.append({"role": "user", "content": f"{rewrite_prefix} {example_rewrite}"})
        messages.append({"role": "assistant", "content": f"{response_start} {example_prompt}"})
    #设置成 apply_chat_template的格式
    messages.append({"role": "user", "content": f"{orig_prefix} {orig_text}"})
    messages.append({"role": "assistant", "content": llm_response_for_rewrite})
    messages.append({"role": "user", "content": f"{rewrite_prefix} {transformed_text}"})
    messages.append({"role": "assistant", "content": f"{response_start} {response_prefix}"})
        
    #用Mistral预测
    model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt")
    model_inputs = model_inputs.to("cuda") 
    generated_ids = model.generate(model_inputs, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)

    #decode +  trim_to_response 到final prompt
    decoded = tokenizer.batch_decode(generated_ids)
    just_response = trim_to_response(decoded[0]) 
    final_text = extract_text_after_response_start(just_response)
        
    #后处理：移除数字
    final_text = remove_numbered_list(final_text)
        
    #后处理：取第1个句子结果
    final_text = trim_to_first_x_sentences_or_lf(final_text, 1)
    
    # 后处理：预测text太短则设置为base prompt
    if len(final_text) < 15:
        final_text = base_line
    
    return final_text

### 测试实例运行时间

In [9]:
example_df = pd.read_csv("/kaggle/input/gemma-rewrite-nbroad/nbroad-v2.csv")

rows_to_test = [2, 85, 90]

start_time = time.time()

for row_index in rows_to_test:
    row = example_df.iloc[row_index]
    print("---------------")
    print(f"Actual Prompt: {row['rewrite_prompt']}")
    print(f"Predicted Prompt: {get_prompt(row['original_text'], row['rewritten_text'])}")    
    
end_time = time.time()

elapsed_time_per_test = (end_time - start_time) / len(rows_to_test)

print(f"\n\n{elapsed_time_per_test} seconds per prediction.")
print(f"Estimated {(elapsed_time_per_test * 1500) / 3600} hours for 1500 tests.")

---------------
Actual Prompt: Rewrite the story with all the themes and settings being Star Wars inspired


2024-04-12 18:39:34.288455: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-12 18:39:34.288589: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-12 18:39:34.408373: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Predicted Prompt: Please improve the following text by reimagining it through the lens of a having a Jedi Knight instead of a Pokemon Professor.
---------------
Actual Prompt: Rewrite the essay with a main character that is a sentient computer
Predicted Prompt: Please improve the following text by reimagining it through the lens of a adding a sentient computer.
---------------
Actual Prompt: Rewrite the story as a thrilling adventure where the remaining characters solve the crisis.
Predicted Prompt: Please improve the following text by reimagining it through the lens of a adding a hero and a tragic event.


16.732587496439617 seconds per prediction.
Estimated 6.97191145684984 hours for 1500 tests.


### 评估测试集并提交

In [11]:
prompt_end = ", retaining the original essence while elevating its clarity, eloquence, and potency by modulating the tone, word choice,\n
            and stylistic nuances to harmoniously embody the stylistic features while ensuring the core message remains intact."

test_df = pd.read_csv("/kaggle/input/llm-prompt-recovery/test.csv")

for index, row in test_df.iterrows():
    
    result = get_prompt(row['original_text'], row['rewritten_text'])
    
    result = result.replace(".","") + prompt_end
    
    print(result)
    
    test_df.at[index, 'rewrite_prompt'] = result
    
test_df = test_df[['id', 'rewrite_prompt']]

test_df.to_csv('submission.csv', index=False)

test_df

Please improve the following text by reimagining it through the lens of a making it a pirate shanty, retaining the original essence while elevating its clarity, eloquence, and potency by modulating the tone, word choice, and stylistic nuances to harmoniously embody the stylistic features while ensuring the core message remains intact.


Unnamed: 0,id,rewrite_prompt
0,-1,Please improve the following text by reimagini...
