In [7]:
import argparse
import torch
import numpy as np
import random
import os
os.environ['BNB_CUDA_VERSION'] = '118' # need to use Cuda 11.8
os.environ['LD_LIBRARY_PATH']= '/usr/local/cuda-11/lib64' # path to your cuda-11 lib64

# import required modules:

# from exp.exp_model import Exp_Model
from data_load.dataloader import DataLoader
from explain_module.util import summarize_trial, remove_reflections, save_results#, save_agents
from explain_module.agents import PredictReflectAgent
from predict_module.merge_peft_adapter import merge_peft_adapter
from predict_module.supervised_finetune import supervised_finetune
from predict_module.train_reward_model import train_reward_model
from predict_module.tuning_lm_with_rl import tuning_lm_with_rl
from transformers import LlamaTokenizer, pipeline #, AutoModelForCausalLM, BitsAndBytesConfig
from trl import AutoModelForCausalLMWithValueHead
import os, json
import pandas as pd

In [None]:
# enter your openai api key
os.environ["OPENAI_API_KEY"] = 'enter_your_openai_api_key_here' # for openai api_key in 'tenacity'

# set the random seed (you set your own seed)
fix_seed = 100
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

In [9]:
parser = argparse.ArgumentParser(description='generating')

# load data
parser.add_argument("--price_dir", type=str, default="data/price/preprocessed/")
parser.add_argument("--tweet_dir", type=str, default="data/tweet/raw/")
parser.add_argument("--seq_len", type=int, default=5)

# supervised finetuning
parser.add_argument("--wandb", action="store_true", default=False)
parser.add_argument("--data_path", type=str, default="./data/merge_sample.json")
parser.add_argument("--output_path", type=str, default="./saved_models/lora-Vicuna")
parser.add_argument("--model_path", type=str, default="lmsys/vicuna-7b-v1.5-16k")   
parser.add_argument("--eval_steps", type=int, default=200)
parser.add_argument("--save_steps", type=int, default=200)
parser.add_argument("--resume_from_supervised_checkpoint", type=str, default=None)
parser.add_argument("--ignore_data_skip", type=str, default="False")

# training reward model
parser.add_argument("--num_reflect_trials", type=int, default=2)
parser.add_argument("--datasets_dir", type=str, default="./datasets/")
parser.add_argument('--local_rank', type=int, default=0, help="Used for multi-gpu")
parser.add_argument('--resume_from_reward_checkpoint', type=bool, default=False, help="If you want to resume training where it left off.")
parser.add_argument('--deepspeed', type=str, default=None, help="Path to deepspeed config if using deepspeed. You may need this if the model that you want to train doesn't fit on a single GPU.")
parser.add_argument('--per_device_train_batch_size', type=int, default=1)
parser.add_argument('--per_device_eval_batch_size', type=int, default=1)
parser.add_argument('--reward_gradient_accumulation_steps', type=int, default=32)
parser.add_argument('--reward_learning_rate', type=float, default=2e-5)
parser.add_argument('--weight_decay', type=int, default=0.001)
parser.add_argument('--reward_base_model', type=str, default="lmsys/vicuna-7b-v1.5-16k", help="The model that you want to train from the Hugging Face hub. E.g. gpt2, gpt2-xl, bert, etc.")
parser.add_argument('--bf16', type=bool, default=False, help="This essentially cuts the training time in half if you want to sacrifice a little precision and have a supported GPU.")
parser.add_argument('--num_train_epochs', type=int, default=1, help="The number of training epochs for the reward model.")
parser.add_argument('--train_subset', type=int, default=100000, help="The size of the subset of the training data to use")
parser.add_argument('--eval_subset', type=int, default=50000, help="The size of the subset of the eval data to use")
parser.add_argument('--gradient_checkpointing', type=bool, default=False, help="Enables gradient checkpointing.")
parser.add_argument('--optim', type=str, default="adamw_hf", help="Enables gradient checkpointing.")
parser.add_argument('--lr_scheduler_type', type=str, default="linear", help="The lr scheduler")
parser.add_argument('--reward_adapter', type=str, default="./saved_models/reward_model_vicuna-7b")

# reinforcement learning
parser.add_argument('--rl_base_model', type=str, default="./saved_models/lora-Vicuna-adapter-merged", help="the model name")
parser.add_argument('--tokenizer_name', type=str, default="lmsys/vicuna-7b-v1.5-16k", help="the tokenizer name")
parser.add_argument('--reward_model_name', type=str, default="./saved_models/reward_model_vicuna-7b-adapter-merged", help="the reward model name")
parser.add_argument('--log_with', type=str, default=None, help="use 'wandb' to log with wandb")
parser.add_argument('--rl_learning_rate', type=float, default=1.4e-5, help="the learning rate")
parser.add_argument('--output_max_length', type=int, default=128, help="maximum length for generation")
parser.add_argument('--mini_batch_size', type=int, default=1, help="the PPO minibatch size")
parser.add_argument('--batch_size', type=int, default=1, help="the batch size")
parser.add_argument('--ppo_epochs', type=int, default=4, help="the number of ppo epochs")
parser.add_argument('--rl_gradient_accumulation_steps', type=int, default=1, help="the number of gradient accumulation steps")
parser.add_argument('--adafactor', type=bool, default=False, help="whether to use the adafactor optimizer")
parser.add_argument('--early_stopping', type=bool, default=True, help="whether to early stop")
parser.add_argument('--target_kl', type=float, default=0.1, help="kl target for early stopping")
parser.add_argument('--reward_baseline', type=float, default=0, help="a baseline value that is subtracted from the reward")
parser.add_argument('--batched_gen', type=bool, default=True, help="whether to use the batched text gen")
parser.add_argument('--save_freq', type=int, default=None, help="n steps to save the model")
parser.add_argument('--output_dir', type=str, default="./saved_models/tuning_llama_rl_checkpoints/", help="directory to save the model")
parser.add_argument('--seed', type=int, default=0, help="the seed")

# for evaluation
parser.add_argument("--num_shots", type=int, default=4)
parser.add_argument("--save_dir", type=str, default="results/")

llama_model_path="lmsys/vicuna-7b-v1.5-16k" # 'Llama_models/vicuna-7b-v1.5-16k' # "lmsys/vicuna-7b-v1.5-16k"
args = parser.parse_args(args=[
    '--model_path',llama_model_path,
    '--reward_base_model',llama_model_path,
    '--tokenizer_name',llama_model_path,
    '--price_dir',"data/price/preprocessed/", # "data/sample_price/preprocessed/",
    '--tweet_dir',"data/tweet/raw/", # "data/sample_tweet/raw/",
    '--adafactor',"True",
    '--ppo_epochs','20',
    '--output_max_length','128',
    ])
print('Args in experiment:')
print(args)

dataloader = DataLoader(args)

Args in experiment:
Namespace(price_dir='data/price/preprocessed/', tweet_dir='data/tweet/raw/', seq_len=5, wandb=False, data_path='./data/merge_sample.json', output_path='./saved_models/lora-Vicuna', model_path='lmsys/vicuna-7b-v1.5-16k', eval_steps=200, save_steps=200, resume_from_supervised_checkpoint=None, ignore_data_skip='False', num_reflect_trials=2, datasets_dir='./datasets/', local_rank=0, resume_from_reward_checkpoint=False, deepspeed=None, per_device_train_batch_size=1, per_device_eval_batch_size=1, reward_gradient_accumulation_steps=32, reward_learning_rate=2e-05, weight_decay=0.001, reward_base_model='lmsys/vicuna-7b-v1.5-16k', bf16=False, num_train_epochs=1, train_subset=100000, eval_subset=50000, gradient_checkpointing=False, optim='adamw_hf', lr_scheduler_type='linear', reward_adapter='./saved_models/reward_model_vicuna-7b', rl_base_model='./saved_models/lora-Vicuna-adapter-merged', tokenizer_name='lmsys/vicuna-7b-v1.5-16k', reward_model_name='./saved_models/reward_mode

In [10]:
data = dataloader.load(flag="test")

agent_cls = PredictReflectAgent
test_agents = [agent_cls(row['ticker'], row['summary'], row['target']) for _, row in data.iterrows()]
print("Loaded Test Agents.")

Loaded Test Agents.


In [12]:
model = AutoModelForCausalLMWithValueHead.from_pretrained(
    "./saved_models/sep_model",
    load_in_4bit=True,
    device_map="auto"
)
tokenizer = LlamaTokenizer.from_pretrained(args.output_dir+"final")#(args.output_dir+"step_saved")
reward_model = pipeline(
    "sentiment-analysis",
    model=args.reward_model_name,
    device_map="auto",
    model_kwargs={"load_in_4bit": True},
    tokenizer=tokenizer
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at ./saved_models/reward_model_vicuna-7b-adapter-merged and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# This part using OPENAI inference will cost money!!

for agent in test_agents:
    agent.run_n_shots(
                        model=model,
                        tokenizer=tokenizer,
                        reward_model=reward_model,
                        num_shots=args.num_shots
                        )

correct, incorrect = summarize_trial(test_agents)
print(f'Finished evaluation, Correct: {len(correct)}, Incorrect: {len(incorrect)}')


Facts:
2025-07-15
- Analysts have raised the price target for NVDA to $192, with some predicting the stock could reach $200.
- Huawei has introduced a new AI compute platform, challenging Nvidia in the AI space.
- There are mixed opinions on NVDA stock, with some analysts giving permission to sell shares while others name it a top pick ahead of earnings.
- Analysts and experts believe NVDA will have a good quarter and see long-term upside potential, especially with AI exports to China.
- Dan Niles recently turned bullish on Nvidia, while Needham maintains a buy rating and lifted its target to $200.
- The stock is predicted to perform well in the tech sector, along with AMD and INTC.
- NVDA's market cap is surging, and the company is leading in AI technology.
- Some sources provide long-term stock price predictions for NVDA, projecting where the stock could be by 2025, 2026, and 2030.

Price Movement: 



Positive

Explanation: Recent market analysis, opinions, and predictions have generated a positive price movement outlook for NVDA stock. Analysts and experts maintain a bullish stance, expecting a good quarter ahead and long-term upside potential. The strong positioning of NVDA in AI technology and its market cap surge reflect the company's growth trajectory. With some analysts raising the price target for NVDA to $192 and $200, and others giving a green light to buy shares, the market sentiment is optimistic. The tech sector, including NVDA, AMD, and INTC, is predicted to perform well, contributing to the positive Price Movement. The mixed opinions on selling shares while others name the stock a top pick ahead of earnings demonstrate the market's confidence in NVDA's future prospects. While there are long-term stock price predictions for NVDA, this analysis indicates a positive Price Movement in the short term, driven by the company's strong market position, evolving industry trends,

In [14]:
save_results(test_agents, args.save_dir)
print('Inference results saved at \'{}\''.format(args.save_dir))


Inference results saved at 'results/'
