In [7]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from ChatData import ChatData
from torch.optim import Adam
from torch.utils.data import DataLoader
import tqdm
import torch

import pandas as pd

### Whole Script

In [2]:
def train(chatData, model, optim):

    epochs = 12

    for i in tqdm.tqdm(range(epochs)):
        for X, a in chatData:
            X = X.to(device)
            a = a.to(device)
            optim.zero_grad()
            loss = model(X, attention_mask=a, labels=X).loss
            loss.backward()
            optim.step()
        torch.save(model.state_dict(), "model_state.pt")
        print(infer("hello how are you"))

In [3]:
def infer(inp):
    inp = "<startofstring> "+inp+" <bot>: "
    inp = tokenizer(inp, return_tensors="pt")
    X = inp["input_ids"].to(device)
    a = inp["attention_mask"].to(device)
    output = model.generate(X, attention_mask=a)
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    return output

In [4]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({"pad_token": "<pad>", 
                                "bos_token": "<startofstring>",
                                "eos_token": "<endofstring>"})
tokenizer.add_tokens(["<bot>:"])

model = GPT2LMHeadModel.from_pretrained("gpt2")
model.resize_token_embeddings(len(tokenizer))

model = model.to(device)

In [5]:
# print(tokenizer.decode(model.generate(**tokenizer("hey i was good at basketball but ",
#                          return_tensors="pt"))[0]))

chatData = ChatData("./Hackathon - Dataset - Sheet1.csv", tokenizer)
chatData =  DataLoader(chatData, batch_size=128)

model.train()

optim = Adam(model.parameters(), lr=1e-3)

<startofstring> The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage. <bot>: Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy. Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources. Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both envi

In [13]:
chatData.dataset.X

["<startofstring> The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage. <bot>: Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy. Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources. Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both en

In [7]:
device

'cuda'

In [8]:
train(chatData, model, optim)

427

In [8]:
df = pd.read_csv("./Hackathon - Dataset - Sheet1.csv")

In [18]:
df['Problem'][5]

'Businesses worldwide expend substantial financial resources on paper-based transaction evidence like printed receipts. This not only adds to operational costs but also contributes to environmental degradation due to paper wastage and lack of recycling.'

### TinyLLama

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from torch.optim import Adam
from torch.utils.data import DataLoader
import tqdm
import torch
from ChatData import ChatData


def train(chatData, model, optim, epochs):

    for epoch in range(epochs):
        for X, a in tqdm.tqdm(chatData):
            X = X.to(device)
            a = a.to(device)
            optim.zero_grad()
            loss = model(X, attention_mask=a, labels=X).loss
            # X = X.cpu().detach()
            print(loss.item())
            loss.backward()
            optim.step()
        torch.save(model.state_dict(), f"model_state_epoch_{epoch}.pt")

def infer(inp):
    inp = "<startofstring> " + inp + " <bot>: "
    inp = tokenizer(inp, return_tensors="pt")
    X = inp["input_ids"].to(device)
    a = inp["attention_mask"].to(device)
    output = model.generate(X, attention_mask=a, max_length=200, num_beams=5, no_repeat_ngram_size=2)
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    return output

# device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"

tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model.resize_token_embeddings(len(tokenizer))

model = model.to(device)

chatData = ChatData("./Hackathon - Dataset - Sheet1.csv", tokenizer)
chatData = DataLoader(chatData, batch_size=8)

model.train()

optim = Adam(model.parameters(), lr=1e-3)

  from .autonotebook import tqdm as notebook_tqdm
Downloading generation_config.json: 100%|██████████| 124/124 [00:00<?, ?B/s] 
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


<startofstring> The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage. <bot>: Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy. Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources. Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both envi

In [2]:
train(chatData, model, optim, epochs=3)  # You can adjust the number of epochs as needed

  0%|          | 0/6 [00:00<?, ?it/s]

3.3277106285095215


 17%|█▋        | 1/6 [01:42<08:32, 102.51s/it]

11.077624320983887


 33%|███▎      | 2/6 [04:05<08:24, 126.12s/it]

13.548771858215332


 50%|█████     | 3/6 [06:39<06:56, 138.82s/it]

18.784494400024414


 67%|██████▋   | 4/6 [09:15<04:51, 145.86s/it]

19.23782730102539


 83%|████████▎ | 5/6 [12:05<02:34, 154.61s/it]

15.849577903747559


100%|██████████| 6/6 [14:49<00:00, 148.27s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

10.3925199508667


 17%|█▋        | 1/6 [02:09<10:46, 129.32s/it]


KeyboardInterrupt: 

In [3]:
infer("Businesses worldwide expend substantial financial resources on paper-based transaction evidence like printed receipts. This not only adds to operational costs but also contributes to environmental degradation due to paper wastage and lack of recycling.")

'<startofstring> Businesses worldwide expend substantial financial resources on paper-based transaction evidence like printed receipts. This not only adds to operational costs but also contributes to environmental degradation due to paper wastage and lack of recycling. <bot>: ofof>>of devicesof>:of energyofesof likeof significantof productionof massiveof toofbotof shortof plof oftenofienceof particularlyof clof sustof fashionof electronicofasticofLAofacof industryofagedof haveof theirof eof clothesof issueofuresof greatofributesof computeroficof pressof Essof replacementofatedofovenofallyof environmentof significantlyof Parkof areof reduceof productoferbof curof environmentalof factof Electronof productsofodaofutionof recof highofyof discof theof characterofoldofayof electronof Thisof attemptof increasingof lifeof'

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"

def generate_response(prompt, max_length=300, num_return_sequences=1):
    # Tokenize and encode the prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    # Move input_ids to the same device as the model
    input_ids = input_ids.to(device)

    # Move model to the same device as input_ids
    model.to(input_ids.device)

    # Generate response
    output_ids = model.generate(input_ids, max_length=max_length, num_return_sequences=num_return_sequences, no_repeat_ngram_size=2)

    # Decode and return the generated response
    generated_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    return generated_response

# Example usage
prompt = "Propose a solution for this problem: Businesses worldwide expend substantial financial resources on paper-based transaction evidence like printed receipts. This not only adds to operational costs but also contributes to environmental degradation due to paper wastage and lack of recycling."
response = generate_response(prompt)
print("Generated Response:", response)


Generated Response: Propose a solution for this problem: Businesses worldwide expend substantial financial resources on paper-based transaction evidence like printed receipts. This not only adds to operational costs but also contributes to environmental degradation due to paper wastage and lack of recycling.

Solution:
1. Implement a digital payment system that uses blockchain technology to store transaction data securely. The system can be integrated with existing payment systems to provide a seamless payment experience. 
2. Use biometric authentication to secure transactions. Biometric data can include fingerprints, facial recognition, or iris scans. These methods can reduce the risk of fraud and ensure that only authorized users can access sensitive information.  
3. Provide a mobile app that allows users to track their transactions and view their transaction history. Users can also receive alerts when their payment is pending or when a transaction is declined.   
4. Incentivize use

In [7]:
torch.save(model.state_dict(), "tinyllama_untrained.pth")

In [8]:
model = AutoModelForCausalLM()
model.load_state_dict(torch.load("./tinyllama_untrained.pth"))
model.eval()

OSError: AutoModelForCausalLM is designed to be instantiated using the `AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path)` or `AutoModelForCausalLM.from_config(config)` methods.

	Problem
1	The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage.