In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = str(0) + "," + str(1) 

import json
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch
from tqdm import tqdm
from itertools import islice

In [2]:
print(os.getpid())

1262


In [3]:
# from huggingface_hub import login

# # Replace "your_huggingface_token" with your actual token
# login("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")

In [4]:
# import transformers
# import tokenizers
# print(transformers.__version__)

In [16]:
# Model checkpoint and paths
CHECKPOINT = "unsloth/Llama-3.3-70B-Instruct-bnb-4bit" # unsloth/Llama-3.3-70B-Instruct-bnb-4bit
CACHE_DIR = "/mntssd/mnt3/shanshanbai/my_storage_from_qian/.cache/huggingface/hub/"
INPUT_FILE = "/mntssd/mnt3/shanshanbai/my_storage_from_qian/results/generated tweets/building_info_6000.jsonl"
OUTPUT_FILE = "/mntssd/mnt3/shanshanbai/my_storage_from_qian/results/generated tweets/tweets_better_6000.jsonl"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)

# Define the quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,        # Specify Int8 quantization
    # llm_int8_threshold=6.0    # Adjust this if needed for performance
)

# Load the model with sharding
model = AutoModelForCausalLM.from_pretrained(
    CHECKPOINT,
    device_map="balanced",      # Automatically shard layers across GPUs/CPU
    quantization_config=quantization_config,
    torch_dtype="float16",  # Use FP16 to save memory (optional)
    # offload_folder="./offload",  # Optional: Folder for CPU offloading if GPUs run out of memory
    cache_dir=CACHE_DIR
)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [7]:
# Create text generation pipeline
text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="balanced",
    repetition_penalty=1.4, # Discourage repetition
    temperature=1.2,       # Increase randomness
    top_k=60,              # Consider top 50 probable words
    top_p=0.9,            # Use nucleus sampling
    max_new_tokens=250,
    return_full_text=False,  # To focus on the generated part only
)


Device set to use cuda:0


In [8]:
context_prompt = (
    "Generate tweets as if they were posted by real Twitter users in a specific building.\n"
    "Tweets should sent from the type of building describes in 'building tag'.\n"
    "Diversity within tweets for a single building: Ensure that each tweet reflects a unique perspective or experience.\n"
    "Consider varying the tone (e.g., humorous, synic, formal, casual), the length (short and concise, longer and detailed), and the use of mention or hashtag.\n"
    "Diversity across buildings: Avoid reusing templates or expressions between buildings.\n"
    "Imaginative Scenarios: Highlight varied aspects of the building, such as its architecture, services, location, history, or events. Be creative and explore different angles.\n"
    "Personas: Imagine switching personas for each tweet, simulating thoughts from different types of users, such as tourists, professionals, or families.\n"
    "Do not mention building names if it's a residential building."
    "Tweet should not start with 'Just', 'Scored','Love','Loving','Shopped','Living','Shopping','Spend','Shout','Exploring','Ran', 'Spent','Finally','Exciting','Excited','Woke','Still','Feeling','Moved','Die Aussicht','Je','My','Really'\n"
    "You must generate only one tweet in each language specified under 'tweet language distribution', written directly in that language.\n\n"
    "Returns output in this format: {building id: list of generated tweets}"
    
    "Example:\n\n"
    '{"building id":227579, "building city": "London", "building tags": "apartments", "building names": "Moo", "tweet language distribution": ["English", "German", "Chinese"]}\n\n'
    '{227579: ["Finally moved in my little aprtment in London! #NewBeginnings", "@Viola Erstaunlich ruhig, trotz der zentralen Lage.", "最近在练习冥想，好像时间都慢下来了。"]}'
)

In [9]:
# Format data into a prompt
def format_data_prompt(metadata):
    return json.dumps(metadata, ensure_ascii=False) + ' returns {building id: list of generated tweets}'

# Process a single line of metadata
def process_metadata(metadata):
    data_prompt = format_data_prompt(metadata)
    prompt = [
        {"role": "system", "content": context_prompt},
        {"role": "user", "content": data_prompt}]
    return prompt


In [10]:
# Function to write results iteratively
def write_result(outfile, index, result):
    try:
        outfile.write(json.dumps({"index": index, "output": result}, ensure_ascii=False) + "\n")
        outfile.flush()  # Immediately save to disk
    except Exception as e:
        print(f"Error writing result for index {index}: {e}")

In [11]:
# # Start processing from the 4000th row
# START_ROW = 1917

# with open(INPUT_FILE, "r", encoding="utf-8") as infile, open(OUTPUT_FILE, "a", encoding="utf-8") as outfile:
#     # Skip the first START_ROW - 1 rows
#     for index, line in enumerate(tqdm(islice(infile, START_ROW - 1, None), desc="Processing buildings"), start=START_ROW):
#         try:
#             metadata = json.loads(line)
#             prompt = process_metadata(metadata)

#             # Call the text generation pipeline
#             result = text_pipeline(prompt)
#             generated_text = result[0]["generated_text"]

#             # Write the generated result with the index
#             write_result(outfile, index, generated_text)
#         except Exception as e:
#             # Write "failed" with the exception message
#             write_result(outfile, index, str(e))

In [17]:
# Define your index list
INDEX_LIST = {4354,5946}  # Replace with the actual indices you need

# Open input and output files
with open(INPUT_FILE, "r", encoding="utf-8") as infile, open(OUTPUT_FILE, "a", encoding="utf-8") as outfile:
    for index, line in enumerate(tqdm(infile, desc="Processing buildings"), start=1):
        if index in INDEX_LIST:  # Process only if index is in the list
            try:
                metadata = json.loads(line)
                prompt = process_metadata(metadata)

                # Call the text generation pipeline
                result = text_pipeline(prompt)
                generated_text = result[0]["generated_text"]

                # Write the generated result with the index
                write_result(outfile, index, generated_text)
            except Exception as e:
                # Write "failed" with the exception message
                write_result(outfile, index, str(e))

Processing buildings: 0it [00:00, ?it/s]

Processing buildings: 6000it [04:44, 21.08it/s]
