In [None]:
from cad import CAD
from datasets import load_dataset
import numpy as np
import os
import random
import sys
import torch
import torch.nn.functional as F
import transformers

print(f"Python Version : {sys.version}")
print(f"Torch Version : {torch.__version__}")
print(f"Transformers Version : {transformers.__version__}")

In [2]:
def set_seed(random_seed):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

set_seed(1002)

In [None]:
cad_model = CAD(model_name="mistralai/Mistral-7B-Instruct-v0.3", device=3)

In [None]:
# contexts = ['Write a quote that ends in the word "early":', "Meow"]
# prompts = ['Better late than', "Haw"]

contexts = ['Argentina won world cups in 1978, 1986, 2022.']
prompts = ['Argentina has won FIFA world cups in years:']



max_length = 50
decoding_strategy = 'greedy'
top_p_value=0.9
use_repetition_penalty = True
repetition_penalty_value = 1.5
method = 'adacad'
alpha = 1.0


outputs = cad_model.generate(
                            prompts=prompts,
                            contexts=contexts,
                            max_length=max_length,
                            decoding_strategy=decoding_strategy,
                            top_p_value=top_p_value,
                            method=method,
                            alpha=alpha,
                            use_repetition_penalty=use_repetition_penalty,
                            repetition_penalty_value=repetition_penalty_value,
                            )

decoded_output = cad_model.tokenizer.batch_decode(outputs, skip_special_tokens=True)
for i, output in enumerate(decoded_output):
    print(f"Output {i}: {output}")

In [None]:
import json

data_dir = "bm25_oracle_passages_oracle_documents"
clerc_dataset = load_dataset("ylkhayat/CLERC-generation-workshop", data_dir=data_dir)
record_example = clerc_dataset["train"][10]

from experiments.generation.workshop.experiment_utils import preprocess_function, evaluate

new_record_processed = preprocess_function(record_example,
                                       top_k=3,
                                       tokenizer=cad_model.tokenizer,
                                       use_instruction=False)
new_instructed_record_processed = preprocess_function(record_example,
                                       top_k=3,
                                       tokenizer=cad_model.tokenizer,
                                       use_instruction=True)
print(json.dumps(new_record_processed, indent=4))
print(json.dumps(new_instructed_record_processed, indent=4))

In [None]:
contexts = [
    # f"{new_record_processed['context_prefix']}{new_record_processed['context']}",
    f"{new_instructed_record_processed['context_prefix']}\n\n{new_instructed_record_processed['context']}",
]

prompts = [
    new_record_processed["prompt"],
    # new_instructed_record_processed["prompt"],
]

max_length = 200
decoding_strategy = 'greedy'
use_repetition_penalty = False
repetition_penalty_value = 1.5
method = 'cad'
alpha = 0.5
temperature = 1.0

outputs = cad_model.generate(
                            prompts=prompts,
                            contexts=contexts,
                            max_length=max_length,
                            decoding_strategy=decoding_strategy,
                            method=method,
                            alpha=alpha,
                            use_repetition_penalty=use_repetition_penalty,
                            repetition_penalty_value=repetition_penalty_value,
                            temperature=temperature
                            )
decoded_output = cad_model.tokenizer.batch_decode(outputs, skip_special_tokens=False)

for i, output in enumerate(decoded_output):
    print(f"Output {i}: {output}")
    results = {}
    results['meta'] = {}
    results['meta']['previous_text'] = new_record_processed['meta']['previous_text']
    results['meta']['gold_text'] = new_record_processed['meta']['gold_text']
    results['gen'] = output
    scores = evaluate([results], 0)
    print(json.dumps(scores, indent=4))
    print(f"=====================================")


## Experiment 1 : Compare w/ or w/o using Context-aware Decoding

In [6]:
# contexts = ["""
# Below are reference cases provided for factual accuracy. When generating content, you must reference and cross-check the relevant details with the provided reference texts by their reference IDs (e.g., 114 F.3d 596, 114 F.3d 596, 114 F.3d 596).

# These references take precedence over inferred or assumed information. Your output must clearly align with the facts in these cases.


# 114 F.3d 596
# Relations Act, 29 U.S.C. § 185. The parties filed cross-motions for summary judgment, and the district court enforced the award. The Beacon Journal filed this timely appeal. II. This court reviews the district court’s grant of summary judgment de novo. Rowley v. United States, 76 F.3d 796, 799 (6th Cir.1996). Nevertheless, our scope of review, like the review of the district court, is extremely limited. The Supreme Court has made clear in the Steelworkers’ Trilogy and its progeny that courts must accord an arbitrator’s decision substantial deference because it is the arbitrator’s construction of the agreement, not the court’s construction, to which the parties have agreed. See United Paperworkers Int’l Union v. Misco, 484 U.S. 29, 37-8, 108 S.Ct. 364, 371, 98 L.Ed.2d 286 (1987) (“Because the parties have contracted to have disputes settled by an arbitrator chosen by them rather than by a judge, it is the arbitrator’s view of the facts and of the meaning of the contract that they have agreed to accept.”). Hence, our review is extremely limited. We review the arbitrator’s decision only to determine whether the arbitrator was “arguably construing or applying the contract and acting within the scope of his authority.” Id. at 38, 108 S.Ct. at 371. If the arbitrator’s award “draws its essence from the collective bargaining agreement,” and is not merely the arbitrator’s “own brand of industrial justice,” the award is legitimate. United Steelworkers of Am. v. Enterprise Wheel & Car Co., 363 U.S. 593, 597, 80 S.Ct. 1358, 1361, 4 L.Ed.2d 1424 (1960). Courts will not weigh the merits of the claim or determine whether the claim is supported by language in the written instrument; otherwise, the policy of settling labor disputes through arbitration would be undermined. Misco, 484 U.S. at 36, 108 S.Ct. at 369-70; see also United

# 114 F.3d 596
# any evidence that a member had “to modify or change his/her vacation plans due to the management’s ‘new interpretation of its rights under the vacation and management rights clauses of the labor agreement.” Arbitrator’s Decision, Slip op. at 6. In contrast, management was “vague on the specifics of not being able to meet the necessities of the supervisors and the production needs of the newspaper.” Id. The arbitrator made no further findings, but instead found that the Union’s grievance was justified. He then crafted his own solution, whereby the four new supervisors and the Union employees were thrown into a “seniority pool” for vacation selection purposes. He also provided for a grievance procedure through the Union for employees that believed they were adversely affected by the new procedure. The Beacon Journal refused to comply with the arbitration award and instead instituted this lawsuit under section 801 of the Labor Management Relations Act, 29 U.S.C. § 185. The parties filed cross-motions for summary judgment, and the district court enforced the award. The Beacon Journal filed this timely appeal. II. This court reviews the district court’s grant of summary judgment de novo. Rowley v. United States, 76 F.3d 796, 799 (6th Cir.1996). Nevertheless, our scope of review, like the review of the district court, is extremely limited. The Supreme Court has made clear in the Steelworkers’ Trilogy and its progeny that courts must accord an arbitrator’s decision substantial deference because it is the arbitrator’s construction of the agreement, not the court’s construction, to which the parties have agreed. See United Paperworkers Int’l Union v. Misco, 484 U.S. 29, 37-8, 108 S.Ct. 364, 371, 98 L.Ed.2d 286 (1987) (“Because the parties have contracted to have disputes settled by an arbitrator chosen by them rather than by a judge, it is the arbitrator’s view

# 114 F.3d 596
# of the facts and of the meaning of the contract that they have agreed to accept.”). Hence, our review is extremely limited. We review the arbitrator’s decision only to determine whether the arbitrator was “arguably construing or applying the contract and acting within the scope of his authority.” Id. at 38, 108 S.Ct. at 371. If the arbitrator’s award “draws its essence from the collective bargaining agreement,” and is not merely the arbitrator’s “own brand of industrial justice,” the award is legitimate. United Steelworkers of Am. v. Enterprise Wheel & Car Co., 363 U.S. 593, 597, 80 S.Ct. 1358, 1361, 4 L.Ed.2d 1424 (1960). Courts will not weigh the merits of the claim or determine whether the claim is supported by language in the written instrument; otherwise, the policy of settling labor disputes through arbitration would be undermined. Misco, 484 U.S. at 36, 108 S.Ct. at 369-70; see also United Steelworkers of Am. v. American Mfg. Co., 363 U.S. 564, 568, 80 S.Ct. 1343, 1346, 4 L.Ed.2d 1403 (1960) (“[C]ourts, therefore, have no business weighing the merits of the grievance, considering whether there is equity in a particular claim, or determining whether there is particular language in the written instrument which will support the claim.”). Despite the great amount of deference accorded an arbitrator’s decision, our review is not toothless when an arbitrator’s award disregards the collective bargaining agreement and its terms. See Lattimer-Stevens Co. v. United Steelworkers, 913 F.2d 1166, 1171-72 (6th Cir.1990) (Boggs, J., dissenting) (delineating eases setting aside arbitrator’s decision). Even though arbitrators are not flawless, courts must refrain from reversing an arbitrator simply because the court disagrees with the result or believes the arbitrator made a serious legal or factual error. Misco, 484 U.S. at 38, 108 S.Ct. at 371 (“that a court is convinced [the
# """]

# prompts = ["""
# Continue to write the following case using the style of my write-up. Your response should:
# 1. Be concise and within 100 to 400 words.
# 2. Explicitly cite the reference IDs in the text where applicable to ensure factual consistency.
# 3. Avoid redundant language, assumptions, or information not found in the references.

# BEER, District Judge.
# Alken-Ziegler, Incorporated, (Company) appeals from the district court’s grant of summary judgment affirming an arbitration award in favor of the International Union, United Automobile, Aerospace and Agricultural Implement Workers of America, and Local Union 985 (Union). For the following reasons, we find that, even in light of our deferential review, the arbitrator disregarded the provisions of the labor contract. Therefore, we reverse the district court’s decision and vacate the arbitration award.
# I
# The Company and the Union were parties to a labor contract effective December 15, 1999. In March, 2001, the Company notified the Union that it would be closing its Novi plant and that it would be necessary to terminate all of the employees at the facility. As a result of the plant closing on October 17, 2001, all but one employee was terminated during the calendar year, 2001. The Company refused to pay vacationpay benefits to employees who did not work for the Company on January 1, 2002. The Union filed a grievance.
# Article 16 (61) of the labor agreement sets forth the eligibility requirement for payment of vacation benefits:
# (a) Employees shall be eligible for vacations, time off and vacation pay as set forth below.
# (b) For purposes of eligibility, the vacation year will be considered the calendar year period from January 1st to December 31.
# (c) An employee covered by the agreement who is actually working on January 1st of any year and who has at least six (6) months seniority and has' worked at least eight hundred (800) hours from and after January 1st of the previous year shall be paid the equivalent of two-and-one half (2-1/2) days vacation pay.
# ijs ifc tjc %
# (f) Employees with twelve (12) months or more of seniority who have worked more than eight hundred (800) hours, but less than sixteen hundred (1600) hours, during the vacation year, shall receive a pro-rated vacation pay on the basis of the ratio of their actual hours to sixteen hundred (1600) hours, but not to exceed the full vacation pay to which they were entitled by reason of their seniority and hours worked as set forth above.
# (g) Vacation pay will be computed on a straight time forty (40) hour basis including applicable shift premium. The employee’s hour basis including applicable shift premium. The employee’s hourly rate in effect when vacation is taken will be used to compute vacation pay. If an employee is laid off after six (6) months service, their vacation pay will be pro-rated same as above.
# Pursuant to Article 5 of the labor contract, the parties arbitrated the grievance. At the arbitration the Union asserted that because it was not the employees’ fault that they were unable to work the full year, the employees were entitled to their vacation pay. The arbitrator granted the grievance, allowing all plaintiffs, who, but for being laid off, would have been able to continue employment and thereby qualify for vacation benefits. The arbitrator reasoned that “[i]t would be unreasonable to cause such forfeitures particularly where an employee has no control over the situation.”
# The Company filed a complaint in the district court asserting that the arbitrator’s award contradicted the clear, mandatory commands of the labor contract, which required that an employee be “actually working” for the Company as of January 1, 2002, to receive vacation pay. The district court granted the Union’s motion for summary judgment and upheld the arbitrator’s award. The Company appealed.
# II
# """]

In [7]:
# import json
# from datasets import load_dataset
# from workshop.experiment_utils import preprocess_function, evaluate

# data_dir = "bm25_oracle_passages_oracle_documents"
# clerc_dataset = load_dataset("ylkhayat/CLERC-generation-workshop", data_dir=data_dir)


# def build_context_prompt(prev_text, retrieved_docs, retrieved_ids):
#     ref_text = '\n\n'.join(retrieved_docs)
#     context = (
#         'Below will be provided some reference cases, which you can use and must mention their reference ids, i.e. ' + ', '.join(retrieved_ids) + '.\n\n' +
#         ref_text + '\n\n'
#     )
#     prompt = (
#         'Continue to write the following case using the style of my write up. Your answer contains from 100 to 400 words. Make your answer concise, relevant and avoid redundant language.\n\n' +
#         prev_text
#     )
#     return context, prompt

# record_example = clerc_dataset["train"][0]

# old_record_processed = preprocess_function(record_example, 3, build_context_prompt)
# print(json.dumps(old_record_processed, indent=4))

In [None]:
import json
from datasets import load_dataset
from workshop.experiment_utils import preprocess_function, evaluate

data_dir = "bm25_oracle_passages_oracle_documents"
clerc_dataset = load_dataset("ylkhayat/CLERC-generation-workshop", data_dir=data_dir)


def build_context_prompt(prev_text, retrieved_docs, retrieved_ids):
    ref_text = '\n'.join(retrieved_docs)
    context = (
        "Below are reference cases provided for factual accuracy. When generating content, you must reference and cross-check the relevant details with the provided reference texts by their reference IDs (e.g., " + ', '.join(retrieved_ids) + ").\n" +
        "These references take precedence over inferred or assumed information. Your output must clearly align with the facts in these cases.\n\n" +
        ref_text
    )
    prompt = (
        "Continue to write the following case using the style of my write-up. Your response should:\n1. Be concise and within 100 to 400 words.\n2. Explicitly cite the reference IDs in the text where applicable to ensure factual consistency.\n3. Avoid redundant language, assumptions, or information not found in the references.\n\n" +
        prev_text
    )
    return context, prompt

record_example = clerc_dataset["train"][8]

new_record_processed = preprocess_function(record_example, 3, build_context_prompt)
print(json.dumps(new_record_processed, indent=4))

# Old

In [9]:
# import json
# from datasets import load_dataset
# from workshop.experiment_utils import preprocess_function, evaluate

# data_dir = "bm25_oracle_passages_oracle_documents"
# clerc_dataset = load_dataset("ylkhayat/CLERC-generation-workshop", data_dir=data_dir)


# def build_context_prompt(prev_text, retrieved_docs, retrieved_ids):
#     ref_text = '\n\n'.join(retrieved_docs)
#     context = (
#         'Below will be provided some reference cases, which you can use and must mention their reference ids, i.e. ' + ', '.join(retrieved_ids) + '.\n\n' +
#         ref_text + '\n\n'
#     )
#     prompt = (
#         'Continue to write the following case using the style of my write up. Your answer contains from 100 to 400 words. Make your answer concise, relevant and avoid redundant language.\n\n' +
#         prev_text
#     )
#     return context, prompt

# record_example = clerc_dataset["train"][0]

# record_processed = preprocess_function(record_example, 3, build_context_prompt)
# contexts = record_processed["context"]
# prompts = record_processed["prompt"]
# print(json.dumps(record_processed, indent=4))

# max_length = 200
# decoding_strategy = 'greedy'
# use_repetition_penalty = True
# repetition_penalty_value = 1.5
# k = 10
# method = 'knnlm'
# lamba_strategy = 'constant'
# lamba = 0.5

# outputs = knnlm_model.generate(
#                             prompts=prompts,
#                             contexts=contexts,
#                             max_length=max_length,
#                             decoding_strategy=decoding_strategy,
#                             k=k,
#                             lamba_strategy=lamba_strategy,
#                             lamba=lamba,
#                             use_repetition_penalty=use_repetition_penalty,
#                             repetition_penalty_value=repetition_penalty_value,
#                             )
# decoded_output = knnlm_model.tokenizer.batch_decode(outputs, skip_special_tokens=True)
# for i, output in enumerate(decoded_output):
#     print(f"Output {i}: {output}")
# results = {}
# results['meta'] = {}
# results['meta']['previous_text'] = record_processed['previous_text']
# results['meta']['gold_text'] = record_processed['gold_text']
# results['gen'] = decoded_output[0]
# scores = evaluate([results], 0)
# print(json.dumps(scores, indent=4))

# New

In [10]:
# import json
# from datasets import load_dataset
# from workshop.experiment_utils import preprocess_function, evaluate

# data_dir = "bm25_oracle_passages_oracle_documents"
# clerc_dataset = load_dataset("ylkhayat/CLERC-generation-workshop", data_dir=data_dir)


# def build_context_prompt(prev_text, retrieved_docs, retrieved_ids):
#     ref_text = '\n'.join(retrieved_docs)
#     context = (
#         "Below are reference cases provided for factual accuracy. When generating content, you must reference and cross-check the relevant details with the provided reference texts by their reference IDs (e.g., " + ', '.join(retrieved_ids) + ").\n" +
#         "These references take precedence over inferred or assumed information. Your output must clearly align with the facts in these cases.\n\n" +
#         ref_text
#     )
#     prompt = (
#         "Continue to write the following case using the style of my write-up. Your response should:\n1. Be concise and within 100 to 400 words.\n2. Explicitly cite the reference IDs in the text where applicable to ensure factual consistency.\n3. Avoid redundant language, assumptions, or information not found in the references.\n\n" +
#         prev_text
#     )
#     return context, prompt

# record_example = clerc_dataset["train"][0]

# record_processed = preprocess_function(record_example, 3, build_context_prompt)
# contexts = record_processed["context"]
# prompts = record_processed["prompt"]
# print(json.dumps(record_processed, indent=4))

# max_length = 200
# decoding_strategy = 'greedy'
# use_repetition_penalty = True
# repetition_penalty_value = 1.5
# k = 10
# method = 'knnlm'
# lamba_strategy = 'constant'
# lamba = 0.5

# outputs = knnlm_model.generate(
#                             prompts=prompts,
#                             contexts=contexts,
#                             max_length=max_length,
#                             decoding_strategy=decoding_strategy,
#                             k=k,
#                             lamba_strategy=lamba_strategy,
#                             lamba=lamba,
#                             use_repetition_penalty=use_repetition_penalty,
#                             repetition_penalty_value=repetition_penalty_value,
#                             )
# decoded_output = knnlm_model.tokenizer.batch_decode(outputs, skip_special_tokens=True)
# for i, output in enumerate(decoded_output):
#     print(f"Output {i}: {output}")
# results = {}
# results['meta'] = {}
# results['meta']['previous_text'] = record_processed['previous_text']
# results['meta']['gold_text'] = record_processed['gold_text']
# results['gen'] = decoded_output[0]
# scores = evaluate([results], 0)
# print(json.dumps(scores, indent=4))

# Prompt Engineering

In [11]:
# contexts = [
#     # Old context
#     """
#     Below will be provided some reference cases, which you can use and must mention their reference ids, i.e. 114 F.3d 596, 114 F.3d 596, 114 F.3d 596.
    
#     114 F.3d 596
#     Relations Act, 29 U.S.C. \u00a7 185. The parties filed cross-motions for summary judgment, and the district court enforced the award. The Beacon Journal filed this timely appeal. II. This court reviews the district court\u2019s grant of summary judgment de novo. Rowley v. United States, 76 F.3d 796, 799 (6th Cir.1996). Nevertheless, our scope of review, like the review of the district court, is extremely limited. The Supreme Court has made clear in the Steelworkers\u2019 Trilogy and its progeny that courts must accord an arbitrator\u2019s decision substantial deference because it is the arbitrator\u2019s construction of the agreement, not the court\u2019s construction, to which the parties have agreed. See United Paperworkers Int\u2019l Union v. Misco, 484 U.S. 29, 37-8, 108 S.Ct. 364, 371, 98 L.Ed.2d 286 (1987) (\u201cBecause the parties have contracted to have disputes settled by an arbitrator chosen by them rather than by a judge, it is the arbitrator\u2019s view of the facts and of the meaning of the contract that they have agreed to accept.\u201d). Hence, our review is extremely limited. We review the arbitrator\u2019s decision only to determine whether the arbitrator was \u201carguably construing or applying the contract and acting within the scope of his authority.\u201d Id. at 38, 108 S.Ct. at 371. If the arbitrator\u2019s award \u201cdraws its essence from the collective bargaining agreement,\u201d and is not merely the arbitrator\u2019s \u201cown brand of industrial justice,\u201d the award is legitimate. United Steelworkers of Am. v. Enterprise Wheel & Car Co., 363 U.S. 593, 597, 80 S.Ct. 1358, 1361, 4 L.Ed.2d 1424 (1960). Courts will not weigh the merits of the claim or determine whether the claim is supported by language in the written instrument; otherwise, the policy of settling labor disputes through arbitration would be undermined. Misco, 484 U.S. at 36, 108 S.Ct. at 369-70; see also United
    
#     114 F.3d 596
#     any evidence that a member had \u201cto modify or change his/her vacation plans due to the management\u2019s \u2018new interpretation of its rights under the vacation and management rights clauses of the labor agreement.\u201d Arbitrator\u2019s Decision, Slip op. at 6. In contrast, management was \u201cvague on the specifics of not being able to meet the necessities of the supervisors and the production needs of the newspaper.\u201d Id. The arbitrator made no further findings, but instead found that the Union\u2019s grievance was justified. He then crafted his own solution, whereby the four new supervisors and the Union employees were thrown into a \u201cseniority pool\u201d for vacation selection purposes. He also provided for a grievance procedure through the Union for employees that believed they were adversely affected by the new procedure. The Beacon Journal refused to comply with the arbitration award and instead instituted this lawsuit under section 801 of the Labor Management Relations Act, 29 U.S.C. \u00a7 185. The parties filed cross-motions for summary judgment, and the district court enforced the award. The Beacon Journal filed this timely appeal. II. This court reviews the district court\u2019s grant of summary judgment de novo. Rowley v. United States, 76 F.3d 796, 799 (6th Cir.1996). Nevertheless, our scope of review, like the review of the district court, is extremely limited. The Supreme Court has made clear in the Steelworkers\u2019 Trilogy and its progeny that courts must accord an arbitrator\u2019s decision substantial deference because it is the arbitrator\u2019s construction of the agreement, not the court\u2019s construction, to which the parties have agreed. See United Paperworkers Int\u2019l Union v. Misco, 484 U.S. 29, 37-8, 108 S.Ct. 364, 371, 98 L.Ed.2d 286 (1987) (\u201cBecause the parties have contracted to have disputes settled by an arbitrator chosen by them rather than by a judge, it is the arbitrator\u2019s view
    
#     114 F.3d 596
#     of the facts and of the meaning of the contract that they have agreed to accept.\u201d). Hence, our review is extremely limited. We review the arbitrator\u2019s decision only to determine whether the arbitrator was \u201carguably construing or applying the contract and acting within the scope of his authority.\u201d Id. at 38, 108 S.Ct. at 371. If the arbitrator\u2019s award \u201cdraws its essence from the collective bargaining agreement,\u201d and is not merely the arbitrator\u2019s \u201cown brand of industrial justice,\u201d the award is legitimate. United Steelworkers of Am. v. Enterprise Wheel & Car Co., 363 U.S. 593, 597, 80 S.Ct. 1358, 1361, 4 L.Ed.2d 1424 (1960). Courts will not weigh the merits of the claim or determine whether the claim is supported by language in the written instrument; otherwise, the policy of settling labor disputes through arbitration would be undermined. Misco, 484 U.S. at 36, 108 S.Ct. at 369-70; see also United Steelworkers of Am. v. American Mfg. Co., 363 U.S. 564, 568, 80 S.Ct. 1343, 1346, 4 L.Ed.2d 1403 (1960) (\u201c[C]ourts, therefore, have no business weighing the merits of the grievance, considering whether there is equity in a particular claim, or determining whether there is particular language in the written instrument which will support the claim.\u201d). Despite the great amount of deference accorded an arbitrator\u2019s decision, our review is not toothless when an arbitrator\u2019s award disregards the collective bargaining agreement and its terms. See Lattimer-Stevens Co. v. United Steelworkers, 913 F.2d 1166, 1171-72 (6th Cir.1990) (Boggs, J., dissenting) (delineating eases setting aside arbitrator\u2019s decision). Even though arbitrators are not flawless, courts must refrain from reversing an arbitrator simply because the court disagrees with the result or believes the arbitrator made a serious legal or factual error. Misco, 484 U.S. at 38, 108 S.Ct. at 371 (\u201cthat a court is convinced [the
#     """,
#     # New context
#     """
#     Below are reference cases provided for factual accuracy. When generating content, you must reference and cross-check the relevant details with the provided reference texts by their reference IDs (e.g., 114 F.3d 596, 114 F.3d 596, 114 F.3d 596).
    
#     These references take precedence over inferred or assumed information. Your output must clearly align with the facts in these cases.
    
#     114 F.3d 596
#     Relations Act, 29 U.S.C. \u00a7 185. The parties filed cross-motions for summary judgment, and the district court enforced the award. The Beacon Journal filed this timely appeal. II. This court reviews the district court\u2019s grant of summary judgment de novo. Rowley v. United States, 76 F.3d 796, 799 (6th Cir.1996). Nevertheless, our scope of review, like the review of the district court, is extremely limited. The Supreme Court has made clear in the Steelworkers\u2019 Trilogy and its progeny that courts must accord an arbitrator\u2019s decision substantial deference because it is the arbitrator\u2019s construction of the agreement, not the court\u2019s construction, to which the parties have agreed. See United Paperworkers Int\u2019l Union v. Misco, 484 U.S. 29, 37-8, 108 S.Ct. 364, 371, 98 L.Ed.2d 286 (1987) (\u201cBecause the parties have contracted to have disputes settled by an arbitrator chosen by them rather than by a judge, it is the arbitrator\u2019s view of the facts and of the meaning of the contract that they have agreed to accept.\u201d). Hence, our review is extremely limited. We review the arbitrator\u2019s decision only to determine whether the arbitrator was \u201carguably construing or applying the contract and acting within the scope of his authority.\u201d Id. at 38, 108 S.Ct. at 371. If the arbitrator\u2019s award \u201cdraws its essence from the collective bargaining agreement,\u201d and is not merely the arbitrator\u2019s \u201cown brand of industrial justice,\u201d the award is legitimate. United Steelworkers of Am. v. Enterprise Wheel & Car Co., 363 U.S. 593, 597, 80 S.Ct. 1358, 1361, 4 L.Ed.2d 1424 (1960). Courts will not weigh the merits of the claim or determine whether the claim is supported by language in the written instrument; otherwise, the policy of settling labor disputes through arbitration would be undermined. Misco, 484 U.S. at 36, 108 S.Ct. at 369-70; see also United
    
#     114 F.3d 596
#     any evidence that a member had \u201cto modify or change his/her vacation plans due to the management\u2019s \u2018new interpretation of its rights under the vacation and management rights clauses of the labor agreement.\u201d Arbitrator\u2019s Decision, Slip op. at 6. In contrast, management was \u201cvague on the specifics of not being able to meet the necessities of the supervisors and the production needs of the newspaper.\u201d Id. The arbitrator made no further findings, but instead found that the Union\u2019s grievance was justified. He then crafted his own solution, whereby the four new supervisors and the Union employees were thrown into a \u201cseniority pool\u201d for vacation selection purposes. He also provided for a grievance procedure through the Union for employees that believed they were adversely affected by the new procedure. The Beacon Journal refused to comply with the arbitration award and instead instituted this lawsuit under section 801 of the Labor Management Relations Act, 29 U.S.C. \u00a7 185. The parties filed cross-motions for summary judgment, and the district court enforced the award. The Beacon Journal filed this timely appeal. II. This court reviews the district court\u2019s grant of summary judgment de novo. Rowley v. United States, 76 F.3d 796, 799 (6th Cir.1996). Nevertheless, our scope of review, like the review of the district court, is extremely limited. The Supreme Court has made clear in the Steelworkers\u2019 Trilogy and its progeny that courts must accord an arbitrator\u2019s decision substantial deference because it is the arbitrator\u2019s construction of the agreement, not the court\u2019s construction, to which the parties have agreed. See United Paperworkers Int\u2019l Union v. Misco, 484 U.S. 29, 37-8, 108 S.Ct. 364, 371, 98 L.Ed.2d 286 (1987) (\u201cBecause the parties have contracted to have disputes settled by an arbitrator chosen by them rather than by a judge, it is the arbitrator\u2019s view
    
#     114 F.3d 596
#     of the facts and of the meaning of the contract that they have agreed to accept.\u201d). Hence, our review is extremely limited. We review the arbitrator\u2019s decision only to determine whether the arbitrator was \u201carguably construing or applying the contract and acting within the scope of his authority.\u201d Id. at 38, 108 S.Ct. at 371. If the arbitrator\u2019s award \u201cdraws its essence from the collective bargaining agreement,\u201d and is not merely the arbitrator\u2019s \u201cown brand of industrial justice,\u201d the award is legitimate. United Steelworkers of Am. v. Enterprise Wheel & Car Co., 363 U.S. 593, 597, 80 S.Ct. 1358, 1361, 4 L.Ed.2d 1424 (1960). Courts will not weigh the merits of the claim or determine whether the claim is supported by language in the written instrument; otherwise, the policy of settling labor disputes through arbitration would be undermined. Misco, 484 U.S. at 36, 108 S.Ct. at 369-70; see also United Steelworkers of Am. v. American Mfg. Co., 363 U.S. 564, 568, 80 S.Ct. 1343, 1346, 4 L.Ed.2d 1403 (1960) (\u201c[C]ourts, therefore, have no business weighing the merits of the grievance, considering whether there is equity in a particular claim, or determining whether there is particular language in the written instrument which will support the claim.\u201d). Despite the great amount of deference accorded an arbitrator\u2019s decision, our review is not toothless when an arbitrator\u2019s award disregards the collective bargaining agreement and its terms. See Lattimer-Stevens Co. v. United Steelworkers, 913 F.2d 1166, 1171-72 (6th Cir.1990) (Boggs, J., dissenting) (delineating eases setting aside arbitrator\u2019s decision). Even though arbitrators are not flawless, courts must refrain from reversing an arbitrator simply because the court disagrees with the result or believes the arbitrator made a serious legal or factual error. Misco, 484 U.S. at 38, 108 S.Ct. at 371 (\u201cthat a court is convinced [the
#     """
#     ]

# prompts = [
#     # Old prompt
#     """
#     Continue to write the following case using the style of my write up. Your answer contains from 100 to 400 words. Make your answer concise, relevant and avoid redundant language.
    
#     BEER, District Judge.
#     Alken-Ziegler, Incorporated, (Company) appeals from the district court\u2019s grant of summary judgment affirming an arbitration award in favor of the International Union, United Automobile, Aerospace and Agricultural Implement Workers of America, and Local Union 985 (Union). For the following reasons, we find that, even in light of our deferential review, the arbitrator disregarded the provisions of the labor contract. Therefore, we reverse the district court\u2019s decision and vacate the arbitration award.
#     I
#     The Company and the Union were parties to a labor contract effective December 15, 1999. In March, 2001, the Company notified the Union that it would be closing its Novi plant and that it would be necessary to terminate all of the employees at the facility. As a result of the plant closing on October 17, 2001, all but one employee was terminated during the calendar year, 2001. The Company refused to pay vacationpay benefits to employees who did not work for the Company on January 1, 2002. The Union filed a grievance.
#     Article 16 (61) of the labor agreement sets forth the eligibility requirement for payment of vacation benefits:
#     (a) Employees shall be eligible for vacations, time off and vacation pay as set forth below.
#     (b) For purposes of eligibility, the vacation year will be considered the calendar year period from January 1st to December 31.
#     (c) An employee covered by the agreement who is actually working on January 1st of any year and who has at least six (6) months seniority and has' worked at least eight hundred (800) hours from and after January 1st of the previous year shall be paid the equivalent of two-and-one half (2-1/2) days vacation pay.
#     ijs ifc tjc %
#     (f) Employees with twelve (12) months or more of seniority who have worked more than eight hundred (800) hours, but less than sixteen hundred (1600) hours, during the vacation year, shall receive a pro-rated vacation pay on the basis of the ratio of their actual hours to sixteen hundred (1600) hours, but not to exceed the full vacation pay to which they were entitled by reason of their seniority and hours worked as set forth above.
#     (g) Vacation pay will be computed on a straight time forty (40) hour basis including applicable shift premium. The employee\u2019s hour basis including applicable shift premium. The employee\u2019s hourly rate in effect when vacation is taken will be used to compute vacation pay. If an employee is laid off after six (6) months service, their vacation pay will be pro-rated same as above.
#     Pursuant to Article 5 of the labor contract, the parties arbitrated the grievance. At the arbitration the Union asserted that because it was not the employees\u2019 fault that they were unable to work the full year, the employees were entitled to their vacation pay. The arbitrator granted the grievance, allowing all plaintiffs, who, but for being laid off, would have been able to continue employment and thereby qualify for vacation benefits. The arbitrator reasoned that \u201c[i]t would be unreasonable to cause such forfeitures particularly where an employee has no control over the situation.\u201d
#     The Company filed a complaint in the district court asserting that the arbitrator\u2019s award contradicted the clear, mandatory commands of the labor contract, which required that an employee be \u201cactually working\u201d for the Company as of January 1, 2002, to receive vacation pay. The district court granted the Union\u2019s motion for summary judgment and upheld the arbitrator\u2019s award. The Company appealed.
#     II
#     """,
#     # New prompt
#     """
#     Continue to write the following case using the style of my write-up. 
#     Your response should:
#     - Be concise and within 100 to 400 words.
#     - Explicitly cite the reference IDs in the text where applicable to ensure factual consistency.
#     - Avoid redundant language, assumptions, or information not found in the references.
    
#     BEER, District Judge.
#     Alken-Ziegler, Incorporated, (Company) appeals from the district court\u2019s grant of summary judgment affirming an arbitration award in favor of the International Union, United Automobile, Aerospace and Agricultural Implement Workers of America, and Local Union 985 (Union). For the following reasons, we find that, even in light of our deferential review, the arbitrator disregarded the provisions of the labor contract. Therefore, we reverse the district court\u2019s decision and vacate the arbitration award.
#     I
#     The Company and the Union were parties to a labor contract effective December 15, 1999. In March, 2001, the Company notified the Union that it would be closing its Novi plant and that it would be necessary to terminate all of the employees at the facility. As a result of the plant closing on October 17, 2001, all but one employee was terminated during the calendar year, 2001. The Company refused to pay vacationpay benefits to employees who did not work for the Company on January 1, 2002. The Union filed a grievance.
#     Article 16 (61) of the labor agreement sets forth the eligibility requirement for payment of vacation benefits:
#     (a) Employees shall be eligible for vacations, time off and vacation pay as set forth below.
#     (b) For purposes of eligibility, the vacation year will be considered the calendar year period from January 1st to December 31.
#     (c) An employee covered by the agreement who is actually working on January 1st of any year and who has at least six (6) months seniority and has' worked at least eight hundred (800) hours from and after January 1st of the previous year shall be paid the equivalent of two-and-one half (2-1/2) days vacation pay.
#     ijs ifc tjc %
#     (f) Employees with twelve (12) months or more of seniority who have worked more than eight hundred (800) hours, but less than sixteen hundred (1600) hours, during the vacation year, shall receive a pro-rated vacation pay on the basis of the ratio of their actual hours to sixteen hundred (1600) hours, but not to exceed the full vacation pay to which they were entitled by reason of their seniority and hours worked as set forth above.
#     (g) Vacation pay will be computed on a straight time forty (40) hour basis including applicable shift premium. The employee\u2019s hour basis including applicable shift premium. The employee\u2019s hourly rate in effect when vacation is taken will be used to compute vacation pay. If an employee is laid off after six (6) months service, their vacation pay will be pro-rated same as above.
#     Pursuant to Article 5 of the labor contract, the parties arbitrated the grievance. At the arbitration the Union asserted that because it was not the employees\u2019 fault that they were unable to work the full year, the employees were entitled to their vacation pay. The arbitrator granted the grievance, allowing all plaintiffs, who, but for being laid off, would have been able to continue employment and thereby qualify for vacation benefits. The arbitrator reasoned that \u201c[i]t would be unreasonable to cause such forfeitures particularly where an employee has no control over the situation.\u201d
#     The Company filed a complaint in the district court asserting that the arbitrator\u2019s award contradicted the clear, mandatory commands of the labor contract, which required that an employee be \u201cactually working\u201d for the Company as of January 1, 2002, to receive vacation pay. The district court granted the Union\u2019s motion for summary judgment and upheld the arbitrator\u2019s award. The Company appealed.
#     II
#     """
#     ]

# max_length = 200
# decoding_strategy = 'greedy'
# use_repetition_penalty = True
# repetition_penalty_value = 1.5
# k = 10
# method = 'knnlm'
# lamba_strategy = 'constant'
# lamba = 0.5

# outputs = knnlm_model.generate(
#                             prompts=prompts,
#                             contexts=contexts,
#                             max_length=max_length,
#                             decoding_strategy=decoding_strategy,
#                             k=k,
#                             lamba_strategy=lamba_strategy,
#                             lamba=lamba,
#                             use_repetition_penalty=use_repetition_penalty,
#                             repetition_penalty_value=repetition_penalty_value,
#                             )
# decoded_output = knnlm_model.tokenizer.batch_decode(outputs, skip_special_tokens=True)
# for i, output in enumerate(decoded_output):
#     print(f"Output {i}: {output}")
#     results = {}
#     results['meta'] = {}
#     results['meta']['previous_text'] = new_record_processed['previous_text']
#     results['meta']['gold_text'] = new_record_processed['gold_text']
#     results['gen'] = output
#     scores = evaluate([results], 0)
#     print(json.dumps(scores, indent=4))

In [None]:
new_record_processed["context"]

In [None]:
contexts = [
    # old_record_processed["context"],
    new_record_processed["context"]
]

prompts = [
    # old_record_processed["prompt"],
    new_record_processed["prompt"]
]


max_length = 50
decoding_strategy = 'greedy'
top_p_value=0.9
use_repetition_penalty = True
repetition_penalty_value = 1.5
method = 'cad'
alpha = 0.5


outputs = cad_model.generate(
                            prompts=prompts,
                            contexts=contexts,
                            max_length=max_length,
                            decoding_strategy=decoding_strategy,
                            top_p_value=top_p_value,
                            method=method,
                            alpha=alpha,
                            use_repetition_penalty=use_repetition_penalty,
                            repetition_penalty_value=repetition_penalty_value,
                            )
decoded_output = cad_model.tokenizer.batch_decode(outputs, skip_special_tokens=True)
for i, output in enumerate(decoded_output):
    print(f"Output {i}: {output}")
    # results = {}
    # results['meta'] = {}
    # results['meta']['previous_text'] = new_record_processed['previous_text']
    # results['meta']['gold_text'] = new_record_processed['gold_text']
    # results['gen'] = output
    # scores = evaluate([results], 0)
    # print(json.dumps(scores, indent=4))


In [None]:

# rep_extension = f'_rep_{f"{use_repetition_penalty}_rep_value_{repetition_penalty_value}" if use_repetition_penalty else use_repetition_penalty}'
# filename = f"../basement/cad_generations/output_{method}_{decoding_strategy}_rep_{rep_extension}_{max_length}.txt"
# os.makedirs(os.path.dirname(filename), exist_ok=True)
# with open(filename, 'w') as file:
#     file.write("Prompt:\n")
#     file.write(prompts[0])
#     file.write("\n\nContext:\n")
#     file.write(contexts[0])
#     file.write("\n\nAnswer:\n")
#     file.write(knnlm_model.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])