In [None]:
pip install transformers datasets torch

In [None]:
import torch
from torch.utils.data import Dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments

# Step 1: Prepare the dataset
texts = [
    "Balance sheets provide a snapshot of a company's financial condition.",
    "The Federal Reserve announced a new policy to adjust interest rates.",
    "Quarterly earnings reports are critical for assessing company performance.",
    "Investment strategies often rely on technical and fundamental analysis.",
    "Credit ratings are essential for evaluating a company's debt obligations.",
    "Risk management is a cornerstone of sound financial planning.",
    "Portfolio diversification helps mitigate market volatility.",
    "Market trends indicate a potential bull run in the tech sector.",
    "Mergers and acquisitions can reshape industry dynamics.",
    "Economic indicators such as GDP growth and unemployment rates provide insight into market health.",
    "Fiscal policy can influence inflation and consumer spending.",
    "Sovereign bonds are seen as a safe-haven asset during economic downturns.",
    "Equity markets react swiftly to changes in investor sentiment.",
    "Commodities like oil and gold are often used as hedges against inflation.",
    "Algorithmic trading has transformed modern financial markets.",
    "Derivatives are used to hedge against risks in financial portfolios.",
    "Long-term investments can yield substantial returns over time.",
    "Short-term market fluctuations are often driven by investor sentiment.",
    "Asset management firms focus on maximizing returns while managing risk.",
    "Corporate governance is crucial for maintaining investor trust.",
    "Financial modeling helps in forecasting future market trends.",
    "Budget constraints force companies to prioritize their expenditures.",
    "Mergers and acquisitions often lead to significant industry consolidation.",
    "The yield curve is a valuable indicator of economic expectations.",
    "Market liquidity can be a critical factor during financial crises.",
    "Inflation erodes purchasing power over time.",
    "Capital allocation decisions are key to a company's long-term success.",
    "The price-to-earnings ratio is a popular metric for valuing stocks.",
    "Debt-to-equity ratios help investors assess financial leverage.",
    "Regulatory changes can have a profound impact on market behavior.",
    "Exchange rates influence the profitability of multinational companies.",
    "Investor sentiment often swings with major geopolitical events.",
    "Sustainable investing is gaining popularity among socially conscious investors.",
    "The balance between risk and return is central to portfolio management.",
    "Financial derivatives can be complex and require specialized knowledge.",
    "Economic cycles affect asset prices in predictable patterns.",
    "Private equity investments typically have longer time horizons.",
    "Venture capital fuels innovation in emerging industries.",
    "Corporate bonds offer fixed income with varying levels of risk.",
    "Credit default swaps are used to manage exposure to credit risk.",
    "Financial regulations aim to maintain market stability.",
    "Market capitalization provides a measure of a company's size.",
    "The liquidity of an asset can determine its market price.",
    "Quantitative easing is a tool used by central banks to stimulate the economy.",
    "Investor behavior is influenced by both rational analysis and emotion.",
    "Diversification of assets can reduce overall portfolio risk.",
    "Macroeconomic trends play a significant role in investment decisions.",
    "Equity analysts monitor price targets and earnings forecasts.",
    "Financial reports detail revenue, expenses, and net income.",
    "Budget forecasts help businesses plan for future growth.",
    "Interest rate adjustments can signal changes in economic policy.",
    "Asset allocation models evolve as market conditions change.",
    "Economic recovery is often accompanied by rising stock prices.",
    "Capital markets provide funding for corporate expansion.",
    "Global trade dynamics affect commodity prices worldwide."
]

# Tokenize the dataset
model_name = 'gpt2'  # You can also use 'gpt2-medium', 'gpt2-large', 'gpt2-xl'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token

encodings = tokenizer('\n'.join(texts), return_tensors='pt', padding=True, truncation=True)

# Step 2: Create a Dataset class
class FinanceDataset(Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = item['input_ids']  # Use input_ids as labels for language modeling
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

# Create the dataset
dataset = FinanceDataset(encodings)

# Step 3: Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,              # total number of training epochs
    report_to="none"
)

# Step 4: Initialize Trainer
model = GPT2LMHeadModel.from_pretrained(model_name)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
)

# Step 5: Train the model
trainer.train()


# Step 6: Usage Code for Text Generation
def generate_text(prompt, max_length=200):
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            top_k=50,
            top_p=0.95,
            temperature=0.7
        )

    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text


Step,Training Loss


In [None]:
prompt_examples = [
    "What is a balance sheet",
    "What is the Federal Reserve",
    "What are Investment strategies",
    "What are Credit ratings"
]

for prompt in prompt_examples:
    generated = generate_text(prompt)
    print(f"Prompt: '{prompt}' -> Answer: '{generated}'")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: 'What is a balance sheet' -> Answer: 'What is a balance sheet?

The balance of the financial system.
Financial system is the system of financial systems. It is an economic system that is based on the principles of capitalism. The system has a central bank and a monetary policy. This is what the central banks are. They are the banks that are responsible for the economy. These are central bankers that have the power to make decisions. In other words, they are in charge of making decisions for society. So, central banking is not a system, it is just a mechanism for making money. Central banking has been around for a long time. There are many different types of central financial institutions. Some are based in the United States, some in Europe, and some are located in Asia. Most of these institutions are run by the government. For example, the Federal Reserve Bank of New York is run in a very large and very centralized manner. And, in fact, there are a lot of different central bank

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: 'What is the Federal Reserve' -> Answer: 'What is the Federal Reserve System?

The Federal reserve system is a central bank that is run by the federal government. The Federal government is responsible for the monetary policy of the United States.
. Federal money is used to pay for government services. It is not used for any purpose other than to buy money. In fact, the money used in the banking system was used by banks to purchase and hold money, and to make loans. This money was also used as a currency for money and other currencies. Money is also known as money in circulation. Currency is money that can be exchanged for other goods and services, such as currency. A currency is called a unit of measurement. For example, a dollar is an ounce of gold. An ounce is equal to one ounce. If a pound of silver were equal in weight to a gram of oil, it would be equal,, to an acre of land. Therefore, if a gallon of water were to be divided into two parts, one'


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: 'What are Investment strategies' -> Answer: 'What are Investment strategies for investing in the future?

The investment strategy for the the investment in future is a strategy that is designed to provide investors with a high return on investment. The investment is intended to be a return to the market.
. It is not intended for a a short term investment, but for an extended period of time. This is the period when the investor is expected to invest in a new product or service. In the case of a long term, the investors are expected not to reinvest in any new products or services. They are not expected, however, to make any investments in new businesses. Investment decisions are made by the government. Investors are required to take into account the risks of the financial system. These risks are often referred to as risk factors. Risk factors include:
- the nature of financial markets
 - the size of stocks and bonds
, - whether the risk is likely to occur
 and the extent to which

In [None]:
# Count the total number of parameters in the model
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {total_params}")

Total number of parameters: 124439808
