In [1]:
pip install datasets

Note: you may need to restart the kernel to use updated packages.


In [2]:
from datasets import load_dataset

ds = load_dataset("knkarthick/dialogsum")

In [3]:
ds

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [4]:
ds['train'][1]['dialogue']

"#Person1#: Hello Mrs. Parker, how have you been?\n#Person2#: Hello Dr. Peters. Just fine thank you. Ricky and I are here for his vaccines.\n#Person1#: Very well. Let's see, according to his vaccination record, Ricky has received his Polio, Tetanus and Hepatitis B shots. He is 14 months old, so he is due for Hepatitis A, Chickenpox and Measles shots.\n#Person2#: What about Rubella and Mumps?\n#Person1#: Well, I can only give him these for now, and after a couple of weeks I can administer the rest.\n#Person2#: OK, great. Doctor, I think I also may need a Tetanus booster. Last time I got it was maybe fifteen years ago!\n#Person1#: We will check our records and I'll have the nurse administer and the booster as well. Now, please hold Ricky's arm tight, this may sting a little."

In [5]:
ds['train'][1]['summary']

'Mrs Parker takes Ricky for his vaccines. Dr. Peters checks the record and then gives Ricky a vaccine.'

In [6]:
# 1. USING THE MODEL WITHOUT FINE TUNING
# LOADING THE BART MODEL

In [9]:
pip install tf-keras


Note: you may need to restart the kernel to use updated packages.


In [10]:
pip install transformers==4.25.1


Collecting transformers==4.25.1
  Using cached transformers-4.25.1-py3-none-any.whl.metadata (93 kB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.25.1)
  Using cached tokenizers-0.13.3.tar.gz (314 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Using cached transformers-4.25.1-py3-none-any.whl (5.8 MB)
Building wheels for collected packages: tokenizers
  Building wheel for tokenizers (pyproject.toml): started
  Building wheel for tokenizers (pyproject.toml): finished with status 'error'
Failed to build tokenizers
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  × Building wheel for tokenizers (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [49 lines of output]
      running bdist_wheel
      running build
      running build_py
      creating build\lib.win-amd64-cpython-312\tokenizers
      copying py_src\tokenizers\__init__.py -> build\lib.win-amd64-cpython-312\tokenizers
      creating build\lib.win-amd64-cpython-312\tokenizers\models
      copying py_src\tokenizers\models\__init__.py -> build\lib.win-amd64-cpython-312\tokenizers\models
      creating build\lib.win-amd64-cpython-312\tokenizers\decoders
      copying py_src\tokenizers\decoders\__init__.py -> build\lib.win-amd64-cpython-312\tokenizers\decoders
      creating build\lib.win-amd64-cpython-312\tokenizers\normalizers
      copying py_src\tokenizers\normalizers\__init__.py -> build\lib.win-amd64-cpython-312\tokenizers\normalizers
      creating build\lib.win-amd64-cpython-312\tokenizers\pre_tokenizers
      copying py_src

In [11]:
from transformers import pipeline

text_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")






In [12]:
article_1 = ds['train'][1]['dialogue']

text_summarizer(article_1, max_length=20, min_length=10, do_sample=False)


[{'summary_text': 'Ricky has received his Polio, Tetanus and Hepatitis B shots.'}]

In [13]:
# 2. FINE - TUNING MODEL

In [14]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import TrainingArguments, Trainer


In [15]:
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")

In [16]:
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-base")

In [17]:
#tokenization

def preprocess_function(batch):
    source = batch['dialogue']
    target = batch["summary"]
    source_ids = tokenizer(source, truncation=True, padding="max_length", max_length=128)
    target_ids = tokenizer(target, truncation=True, padding="max_length", max_length=128)

    # Replace pad token id with -100 for labels to ignore padding in loss computation
    labels = target_ids["input_ids"]
    labels = [[(label if label != tokenizer.pad_token_id else -100) for label in labels_example] for labels_example in labels]

    return {
        "input_ids": source_ids["input_ids"],
        "attention_mask": source_ids["attention_mask"],
        "labels": labels
    }

ds_source = ds.map(preprocess_function, batched=True)

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [18]:
pip install transformers[torch]


Note: you may need to restart the kernel to use updated packages.


In [19]:
pip install "accelerate>=0.26.0"


Note: you may need to restart the kernel to use updated packages.


In [20]:
pip install torch


Note: you may need to restart the kernel to use updated packages.


In [31]:
pip install accelerate>={ACCELERATE_MIN_VERSION}

Note: you may need to restart the kernel to use updated packages.


In [21]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="/content",  # Replace with output directory
    per_device_train_batch_size=8,
    num_train_epochs=2,  # Adjust number of epochs as needed
    remove_unused_columns=False
)

In [22]:
# Import necessary classes from the transformers library
from transformers import Trainer, AutoModelForSeq2SeqLM, TrainingArguments

# Define batch size and number of epochs
batch_size = 16  # Reduced batch size
epochs = 3  # Reduced number of epochs

# Define or load the model here
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-base")

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="steps",
    gradient_accumulation_steps=2,
    fp16=True
)

# Use a smaller subset of the dataset for quick experimentation (optional)
small_train_dataset = ds_source["train"].shuffle().select(range(1000))
small_eval_dataset = ds_source["test"].shuffle().select(range(200))

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset
)

# Start the training process
trainer.train()


  0%|          | 0/93 [00:00<?, ?it/s]

{'loss': 3.9052, 'grad_norm': 16.444107055664062, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.32}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 3.9689178466796875, 'eval_runtime': 51.1004, 'eval_samples_per_second': 3.914, 'eval_steps_per_second': 0.254, 'epoch': 0.32}
{'loss': 3.5547, 'grad_norm': 13.320690155029297, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.63}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 3.2028982639312744, 'eval_runtime': 56.7786, 'eval_samples_per_second': 3.522, 'eval_steps_per_second': 0.229, 'epoch': 0.63}
{'loss': 3.0716, 'grad_norm': 7.045828819274902, 'learning_rate': 3e-06, 'epoch': 0.95}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 2.5633087158203125, 'eval_runtime': 46.8271, 'eval_samples_per_second': 4.271, 'eval_steps_per_second': 0.278, 'epoch': 0.95}
{'loss': 2.6231, 'grad_norm': 5.743558883666992, 'learning_rate': 4.000000000000001e-06, 'epoch': 1.27}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 2.220773696899414, 'eval_runtime': 47.973, 'eval_samples_per_second': 4.169, 'eval_steps_per_second': 0.271, 'epoch': 1.27}
{'loss': 2.3279, 'grad_norm': 4.204470157623291, 'learning_rate': 5e-06, 'epoch': 1.59}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 1.9882372617721558, 'eval_runtime': 53.3837, 'eval_samples_per_second': 3.746, 'eval_steps_per_second': 0.244, 'epoch': 1.59}
{'loss': 2.2118, 'grad_norm': 4.325822353363037, 'learning_rate': 6e-06, 'epoch': 1.9}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 1.9239627122879028, 'eval_runtime': 51.9493, 'eval_samples_per_second': 3.85, 'eval_steps_per_second': 0.25, 'epoch': 1.9}
{'loss': 2.1067, 'grad_norm': 3.9269888401031494, 'learning_rate': 7.000000000000001e-06, 'epoch': 2.22}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 1.8955981731414795, 'eval_runtime': 55.7938, 'eval_samples_per_second': 3.585, 'eval_steps_per_second': 0.233, 'epoch': 2.22}
{'loss': 2.0219, 'grad_norm': 3.8437230587005615, 'learning_rate': 8.000000000000001e-06, 'epoch': 2.54}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 1.8628790378570557, 'eval_runtime': 48.2171, 'eval_samples_per_second': 4.148, 'eval_steps_per_second': 0.27, 'epoch': 2.54}
{'loss': 2.0203, 'grad_norm': 3.527355194091797, 'learning_rate': 9e-06, 'epoch': 2.86}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 1.8505313396453857, 'eval_runtime': 52.0527, 'eval_samples_per_second': 3.842, 'eval_steps_per_second': 0.25, 'epoch': 2.86}




{'train_runtime': 3953.6264, 'train_samples_per_second': 0.759, 'train_steps_per_second': 0.024, 'train_loss': 2.6277683319584018, 'epoch': 2.95}


TrainOutput(global_step=93, training_loss=2.6277683319584018, metrics={'train_runtime': 3953.6264, 'train_samples_per_second': 0.759, 'train_steps_per_second': 0.024, 'total_flos': 225602489548800.0, 'train_loss': 2.6277683319584018, 'epoch': 2.9523809523809526})

In [23]:
# Evaluate the model
eval_results = trainer.evaluate()

# Print evaluation results
print(eval_results)

  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 1.8509255647659302, 'eval_runtime': 29.7683, 'eval_samples_per_second': 6.719, 'eval_steps_per_second': 0.437, 'epoch': 2.9523809523809526}


In [24]:
# 1. Evaluating the Model
import torch
from torch.utils.data import DataLoader

def evaluate_model(model, dataloader, loss_fn, device):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    total_samples = 0

    with torch.no_grad():  # Disable gradient computation
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            total_loss += loss.item() * inputs.size(0)  # Accumulate loss
            total_samples += inputs.size(0)  # Count samples

    avg_loss = total_loss / total_samples
    return avg_loss


In [25]:
dataloader = DataLoader(small_train_dataset, batch_size=64, shuffle=False)  # Adjust batch size and use small_train_dataset

In [26]:
# Optimizing Evaluation Speed
from torch.cuda.amp import autocast

def evaluate_model_with_mixed_precision(model, dataloader, loss_fn, device):
    model.eval()
    total_loss = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            with autocast():  # Use mixed precision
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
            total_loss += loss.item() * inputs.size(0)
            total_samples += inputs.size(0)

    avg_loss = total_loss / total_samples
    return avg_loss


In [27]:
# Tracking Metrics
import torch
from torch.utils.data import DataLoader

def evaluate_model(model, dataloader, loss_fn, device):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    total_samples = 0

    with torch.no_grad():  # Disable gradient computation
        for data in dataloader: # Changed from 'for inputs, labels in dataloader:'
            # Assuming the first two elements are inputs and labels
            inputs, labels = data[0], data[1] # Added this line to unpack the first two elements
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            total_loss += loss.item() * inputs.size(0)  # Accumulate loss
            total_samples += inputs.size(0)  # Count samples

    avg_loss = total_loss / total_samples
    return avg_loss

In [28]:
import torch
from torch.utils.data import DataLoader

def evaluate_model(model, dataloader, loss_fn, device):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    total_samples = 0

    with torch.no_grad():  # Disable gradient computation
        for data in dataloader:
            # Check if data is a list or tuple before unpacking
            if isinstance(data, (list, tuple)) and len(data) >= 2:
                inputs, labels = data[0], data[1]
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
                total_loss += loss.item() * inputs.size(0)  # Accumulate loss
                total_samples += inputs.size(0)  # Count samples
            else:
                # Handle cases where data is not in the expected format
                print("Unexpected data format:", data)

    avg_loss = total_loss / total_samples
    return avg_loss

In [29]:
import torch
from torch.utils.data import DataLoader

def evaluate_model(model, dataloader, loss_fn, device):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    total_samples = 0

    with torch.no_grad():  # Disable gradient computation
        for data in dataloader:
            # Check if data is a list or tuple before unpacking
            if isinstance(data, (list, tuple)) and len(data) >= 2:
                inputs, labels = data[0], data[1]
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
                total_loss += loss.item() * inputs.size(0)  # Accumulate loss
                total_samples += inputs.size(0)  # Count samples
            else:
                # Handle cases where data is not in the expected format
                print("Unexpected data format:", data)

    # Check if total_samples is zero to avoid division by zero
    if total_samples == 0:
        print("No samples were processed. Check your dataloader and data format.")
        return None # or return 0, or raise an exception, depending on how you want to handle this case.

    avg_loss = total_loss / total_samples
    return avg_loss

In [23]:
def complete_evaluation(model, dataloader, loss_fn, device, epoch):
    avg_loss = evaluate_model_with_mixed_precision(model, dataloader, loss_fn, device)
    log_metrics(epoch, avg_loss)
    print(f'Epoch: {epoch}, Eval Loss: {avg_loss:.4f}')


In [30]:
# Evaluate the model
eval_results = trainer.evaluate()

# Print evaluation results
print(eval_results)

  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 1.8509255647659302, 'eval_runtime': 31.3694, 'eval_samples_per_second': 6.376, 'eval_steps_per_second': 0.414, 'epoch': 2.9523809523809526}


In [31]:
from transformers import GenerationConfig

# Define the generation config
gen_config = GenerationConfig(
    early_stopping=True,
    num_beams=4,
    no_repeat_ngram_size=3,
    forced_bos_token_id=0
)

# Save it to the model folder
gen_config.save_pretrained("./fine_tuned_bart")


In [32]:
# SAVING THE MODEL

In [33]:
# Save the model and tokenizer after training
model.save_pretrained("./fine_tuned_bart")
tokenizer.save_pretrained("./fine_tuned_bart")

('./fine_tuned_bart\\tokenizer_config.json',
 './fine_tuned_bart\\special_tokens_map.json',
 './fine_tuned_bart\\vocab.json',
 './fine_tuned_bart\\merges.txt',
 './fine_tuned_bart\\added_tokens.json',
 './fine_tuned_bart\\tokenizer.json')

In [34]:
from transformers import BartTokenizer, BartForConditionalGeneration, AutoConfig, GenerationConfig

# Specify the pre-trained model name explicitly
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")  # Or use "facebook/bart-large" if you fine-tuned a large model

# Load the configuration separately
config = AutoConfig.from_pretrained("./fine_tuned_bart")

# Load the model with the configuration
model = BartForConditionalGeneration.from_pretrained(
    "./fine_tuned_bart",
    config=config,
)

# Load the generation config separately
gen_config = GenerationConfig.from_pretrained("./fine_tuned_bart")

# Now you can use gen_config with model.generate() like this:
# outputs = model.generate(input_ids, generation_config=gen_config)



In [35]:
# SUMMARIZING THE CUSTOM DATA USING SAVED MODEL AND TOKENIZER

In [36]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_bart")
model = AutoModelForSeq2SeqLM.from_pretrained("./fine_tuned_bart")

# Function to summarize a blog post
def summarize(blog_post):
    # Tokenize the input blog post
    inputs = tokenizer(blog_post, max_length=1024, truncation=True, return_tensors="pt")

    # Generate the summary
    summary_ids = model.generate(inputs["input_ids"], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)

    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Example blog post
blog_post = """
As Yogi Berra famously said, it’s tough to make predictions, especially about the future. But had the baseball legend spent any time observing the UN climate negotiations, he could have safely predicted that climate finance will prove to be a key sticking point at COP29 in Baku at the end of this year.

‘Who will pay and how much?’ are perennial questions at the climate talks, but this year, the discussions about climate finance will be especially prominent. At COP29, Parties to the Paris Agreement must negotiate a new climate finance goal, to replace the existing commitment from 2009 for developed countries to provide US
5.8-5.9 trillion is required to implement Nationally Determined Contributions (NDCs) in developing countries up to 2030. They will require US
1.5 trillion in renewable energy are required worldwide every year up until 2030, according to IRENA.

But these sums are also affordable and beneficial for developed countries. They should be seen in the context of ongoing investments in energy and other infrastructure: around US
1.74 trillion was in clean energy. These investments will generate strong returns for their investors and reduce the costs for energy consumers.

And, crucially, they should also be seen in the context of the alternative. The latest research estimates that the world economy is already set to face a 19% income reduction within the next 26 years based on the levels of warming we have already locked in. The more we delay and the more the planet heats, the greater the economic costs will be.

Laying the foundations for a new finance goal

While financial resources are beginning to flow, they are not flowing fast enough, and certainly not flowing to those developing countries where need is greatest and access to finance is most challenging.

The UN climate framework provides mechanisms that can enable those flows of climate finance. Back in 2015, parties at the climate talks agreed to establish a “new collective quantified goal” (NCQG) for climate finance. They agreed that the NCQG would be set prior to 2025.

The  ultimate size of the NCQG will be a product of the negotiations, but Parties have agreed it must be a significant increase from the floor of US$100 billion annually. For WWF, it must be needs-based and sufficiently ambitious to meet the scale of the challenge we face, and immediately accessible to help countries that are already facing the chaos of a destabilised climate system.

While developed countries are expected to provide financial and technical support, developing countries also have a role to play. Parties are due to submit revised NDCs in 2025, presenting how they plan to reduce emissions and adapt to climate change. Developing countries have the opportunity to use their NDCs to set out how international climate finance can support them and increase their ambition. To do this, they need to know the finance will be forthcoming.
"""

# Get the summary
summary = summarize(blog_post)
print("Summary:", summary)


Summary: As Yogi Berra famously said, it’s tough to make predictions, especially about the future. But had the baseball legend spent any time observing the UN climate negotiations, he could have safely predicted that climate finance will prove to be a key sticking point at COP29 in Baku at the end of this year.


In [37]:
# Example blog post
blog_post = """
As Someone famously said, it’s tough to make predictions, especially about the future. But had the baseball legend spent any time observing the UN climate negotiations, he could have safely predicted that climate finance will prove to be a key sticking point at COP29 in Baku at the end of this year.

‘Who will pay and how much?’ are perennial questions at the climate talks, but this year, the discussions about climate finance will be especially prominent. At COP29, Parties to the Paris Agreement must negotiate a new climate finance goal, to replace the existing commitment from 2009 for developed countries to provide US
5.8-5.9 trillion is required to implement Nationally Determined Contributions (NDCs) in developing countries up to 2030. They will require US
1.5 trillion in renewable energy are required worldwide every year up until 2030, according to IRENA.

But these sums are also affordable and beneficial for developed countries. They should be seen in the context of ongoing investments in energy and other infrastructure: around US
1.74 trillion was in clean energy. These investments will generate strong returns for their investors and reduce the costs for energy consumers.

And, crucially, they should also be seen in the context of the alternative. The latest research estimates that the world economy is already set to face a 19% income reduction within the next 26 years based on the levels of warming we have already locked in. The more we delay and the more the planet heats, the greater the economic costs will be.

Laying the foundations for a new finance goal

While financial resources are beginning to flow, they are not flowing fast enough, and certainly not flowing to those developing countries where need is greatest and access to finance is most challenging.

The UN climate framework provides mechanisms that can enable those flows of climate finance. Back in 2015, parties at the climate talks agreed to establish a “new collective quantified goal” (NCQG) for climate finance. They agreed that the NCQG would be set prior to 2025.

The  ultimate size of the NCQG will be a product of the negotiations, but Parties have agreed it must be a significant increase from the floor of US$100 billion annually. For WWF, it must be needs-based and sufficiently ambitious to meet the scale of the challenge we face, and immediately accessible to help countries that are already facing the chaos of a destabilised climate system.

While developed countries are expected to provide financial and technical support, developing countries also have a role to play. Parties are due to submit revised NDCs in 2025, presenting how they plan to reduce emissions and adapt to climate change. Developing countries have the opportunity to use their NDCs to set out how international climate finance can support them and increase their ambition. To do this, they need to know the finance will be forthcoming.
"""

# Get the summary
summary = summarize(blog_post)
print("Summary:", summary)

Summary: As Someone famously said, it’s tough to make predictions, especially about the future. But had the baseball legend spent any time observing the UN climate negotiations, he could have safely predicted that climate finance will prove to be a key sticking point at COP29 in Baku at the end of this year.


In [38]:
# Example blog post
blog_post = """
As Wasim Alareqi from yemen has done his bachelor degree in MIS in YEMEN , That time his age was 24..
Now he is 28, he is performing msc degree in data science in india..so he has experiences in data science and mis.
Also has experienced in both countries.

He said, The  ultimate size of the NCQG will be a product of the negotiations, but Parties have agreed it must be a significant increase from the floor of US$100 billion annually. For WWF, it must be needs-based and sufficiently ambitious to meet the scale of the challenge we face, and immediately accessible to help countries that are already facing the chaos of a destabilised climate system.

While developed countries are expected to provide financial and technical support, developing countries also have a role to play. Parties are due to submit revised NDCs in 2025, presenting how they plan to reduce emissions and adapt to climate change. Developing countries have the opportunity to use their NDCs to set out how international climate finance can support them and increase their ambition. To do this, they need to know the finance will be forthcoming.
"""

# Get the summary
summary = summarize(blog_post)
print("Summary:", summary)

Summary: As Wasim Alareqi from yemen has done his bachelor degree in MIS in YEMEN , That time his age was 24..Now he is 28, he is performing msc degree in data science in india..so he has experiences in big data science and mis.He has experience in international climate finance.He said the NCQG will be a product of the negotiations, but it must be needs-based and sufficiently ambitious to meet the scale of the challenge.


In [39]:
pip install streamlit





In [36]:
import streamlit as st
from transformers import BartTokenizer, BartForConditionalGeneration

# Load fine-tuned model
tokenizer = BartTokenizer.from_pretrained("./fine_tuned_bart")
model = BartForConditionalGeneration.from_pretrained("./fine_tuned_bart")

# Streamlit App Interface
st.title("Dialogue Summarization with BART")

# Input box for user to enter dialogue
input_text = st.text_area("Enter the dialogue here:")

if st.button("Summarize"):
    # Tokenize input and generate summary
    inputs = tokenizer([input_text], max_length=512, return_tensors="pt", truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=128, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)

    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    st.write("Summary:")
    st.write(summary)


2024-10-26 06:00:37.065 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-10-26 06:00:37.104 Session state does not function when running a script without `streamlit run`


In [37]:
# RUNNING THE APP

In [43]:
%%writefile app.py
import streamlit as st

st.title("Hello, Streamlit!")
st.write("This is a test Streamlit text summarizer app.")


Overwriting app.py


In [44]:
!streamlit run app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.192.33.53:8501[0m
[0m
[34m  Stopping...[0m
^C


In [46]:
!nohup streamlit run app.py --server.port 8501 &


nohup: appending output to 'nohup.out'


In [48]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [49]:
from pyngrok import ngrok
!pip install pyngrok
public_url = ngrok.connect(port='8501')
print(public_url)
!streamlit run app.py




ERROR:pyngrok.process.ngrok:t=2024-10-26T06:36:57+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2024-10-26T06:36:57+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2024-10-26T06:36:57+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

In [39]:
ls


[0m[01;34mfine_tuned_bart[0m/  [01;34mlogs[0m/  [01;34mresults[0m/  [01;34msample_data[0m/  [01;34mwandb[0m/


In [40]:
pwd


'/content'

In [41]:
ls -1


[0m[01;34mfine_tuned_bart[0m/
[01;34mlogs[0m/
[01;34mresults[0m/
[01;34msample_data[0m/
[01;34mwandb[0m/


In [None]:
!find / -name "app.py"

find: ‘/proc/69/task/69/net’: Invalid argument
find: ‘/proc/69/net’: Invalid argument
/usr/local/lib/python3.10/dist-packages/tensorboard/compat/tensorflow_stub/app.py
/usr/local/lib/python3.10/dist-packages/panel/tests/ui/io/app.py
/usr/local/lib/python3.10/dist-packages/tensorflow/python/platform/app.py
/usr/local/lib/python3.10/dist-packages/absl/app.py
/usr/local/lib/python3.10/dist-packages/ipyparallel/engine/app.py
/usr/local/lib/python3.10/dist-packages/ipyparallel/controller/app.py
/usr/local/lib/python3.10/dist-packages/ipyparallel/cluster/app.py
/usr/local/lib/python3.10/dist-packages/jupyter_console/app.py
/usr/local/lib/python3.10/dist-packages/prompt_toolkit/filters/app.py
/usr/local/lib/python3.10/dist-packages/community/app.py
/usr/local/lib/python3.10/dist-packages/flask/app.py
/content/app.py
/tools/google-cloud-sdk/lib/googlecloudsdk/command_lib/tasks/app.py
/tools/google-cloud-sdk/platform/bq/third_party/google/apputils/app.py
/tools/google-cloud-sdk/platform/bq/thir

In [None]:
!sudo apt-get install nano

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
nano is already the newest version (6.2-1ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [None]:
!nano app.py

[?1l>[?2004lReceived SIGHUP or SIGTERM


In [None]:
import streamlit as st
from transformers import BartTokenizer, BartForConditionalGeneration

# Load fine-tuned model
tokenizer = BartTokenizer.from_pretrained("./fine_tuned_bart")
model = BartForConditionalGeneration.from_pretrained("./fine_tuned_bart")

# Streamlit App Interface
st.title("Dialogue Summarization with BART")

# Input box for user to enter dialogue
input_text = st.text_area("Enter the dialogue here:")

if st.button("Summarize"):
    # Tokenize input and generate summary
    inputs = tokenizer([input_text], max_length=512, return_tensors="pt", truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=128, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)

    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    st.write("Summary:")
    st.write(summary)


2024-10-20 14:18:42.361 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-10-20 14:18:42.373 Session state does not function when running a script without `streamlit run`


In [None]:
pwd

'/content'

In [None]:
!python app.py

2024-10-20 16:16:52.119 
  command:

    streamlit run app.py [ARGUMENTS]
2024-10-20 16:16:52.120 Session state does not function when running a script without `streamlit run`


In [None]:
!streamlit run app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.190.148.70:8501[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m


In [None]:
!pip install pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [None]:
!ngrok config add-authtoken 2ni2BMOEQXoQuBJDLBLl18Mu7Pp_7CP2LVxERRrs4uf5RNYzp

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
!ngrok authtoken 2ni2BMOEQXoQuBJDLBLl18Mu7Pp_7CP2LVxERRrs4uf5RNYzp

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
!ngrok http http://localhost:8080


^C


In [None]:
from pyngrok import ngrok, conf

# Set your authtoken
ngrok.set_auth_token("2ni2BMOEQXoQuBJDLBLl18Mu7Pp_7CP2LVxERRrs4uf5RNYzp") # replace with actual authtoken

# Connect
# Change: Provide the port number directly as the 'addr' argument
public_url = ngrok.connect(8080)
print("Public URL:", public_url)

Public URL: NgrokTunnel: "https://9fc3-35-190-148-70.ngrok-free.app" -> "http://localhost:8080"
