In [1]:
from massw import api_gpt as api
import pandas as pd
import jsonlines as jl
import json

with jl.open("../data/annotation_0531.jsonl") as f:
    annotated_instances = list(f)
print(len(annotated_instances))
# Process the annotated instances
df_annotated = pd.DataFrame(annotated_instances)
df_annotated.set_index("id", inplace=True)
df_annotated["Context"] = df_annotated["label_annotations"].apply(lambda x: x["Multi-aspect Summary"]["Context"])
df_annotated["Key Idea"] = df_annotated["label_annotations"].apply(lambda x: x["Multi-aspect Summary"]["Key idea"])
df_annotated["Method"] = df_annotated["label_annotations"].apply(lambda x: x["Multi-aspect Summary"]["Method"])
df_annotated["Outcome"] = df_annotated["label_annotations"].apply(lambda x: x["Multi-aspect Summary"]["Outcome"])
df_annotated["Future Impact"] = df_annotated["label_annotations"].apply(lambda x: x["Multi-aspect Summary"]["Future Impact"])
df_annotated.drop(columns=["label_annotations", "span_annotations"], inplace=True)
ids = df_annotated.index.unique()
texts = {id: df_annotated.reset_index().drop_duplicates("id").set_index("id").loc[id, "displayed_text"] for id in ids}
azure_config = api.AzureConfig()
batch = api.Batch(tpm=40000, azure=azure_config)

with open("prompt.txt", "r") as f:
    system_prompt = f.read()

example_prompt = """
Title: Attention Is All You Need
Abstract: The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.
"""

example_output = """{
  "Context": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing sequence transduction models connect the encoder and decoder through an attention mechanism.",
  "Key Idea": "The authors propose a simple network architecture called Transformer based solely on attention mechanisms and dispenses with recurrence and convolutions.",
  "Method": "The authors perform experiments on the WMT 2014 English-to-German and English-to-French translation task. The authors apply the proposed model to English constituency parsing both with large and limited training data.",
  "Outcome": "The proposed model achieves a BLEU score of 28.4 on the WMT 2014 English-to-French translation task. The proposed model achieves a BLEU score of 41.8 on the WMT 2014 English-to-German translation task after training for 3.5 days on 8 GPUs.",
  "Future Impact": "N/A"
}"""


240


In [None]:

for i in ids:
    await batch.add(
        "chat.completions.create",
        model="gpt-35-turbo",
        # response_format={ "type": "json_object" },
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": example_prompt},
            {"role": "assistant", "content": example_output},
            {"role": "user", "content": texts[i]},
        ],
        metadata={"id": i},
    )

results = await batch.run()

In [6]:
# id2result
id2result = {}
for _, result in results.iterrows():
    content = result["result"]["choices"][0]["message"]["content"]
    # Remove """json and """ if present
    content = content.replace('"""json', "").replace('"""', "")
    try:
        id2result[result["id"]] = json.loads(content)
    except Exception as e:
        print(result["id"])
        print(content)

9821d5f7-72b0-4841-a54f-d2af4a04ea3a
{
  "Context": "The paper addresses the problem of inverse rendering where estimating the spherical harmonic illumination coefficients and texture parameters in a specular invariant colour subspace is challenging.",
  "Key Idea": "The authors propose a novel approach for inverse rendering based on a linear basis approximation of surface texture, which can account for non-Lambertian specular reflectance and complex illumination of the same light source colour.",
  "Method": "The proposed approach involves fitting a 3D morphable model to a single colour image of faces through the solution of bilinear equations in a specular invariant colour subspace. This approach recovers statistical texture model parameters without relying on computationally expensive analysis-by-synthesis techniques.",
  "Outcome": "The proposed approach recovers texture model parameters with an accuracy comparable to that of more computationally expensive methods, while requiring 

In [7]:
with jl.open("../data/gpt35_0531.jsonl", "w") as f:
    f.write_all([{"id": k, **v} for k, v in id2result.items()])

In [17]:
# Mixtral baselines
import ssl
import os
import urllib.request


def allowSelfSignedHttps(allowed):
    # Bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(
            ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context


# Enabling self-signed certificates if required
allowSelfSignedHttps(True)


def mixtral_request(messages):
    data = {
        "input_data": {
            "input_string": messages,
            "parameters": {
                "temperature": 0.6,
                "top_p": 0.9,
                "do_sample": True,
                "max_new_tokens": 500,
                "return_full_text": True
            }
        }
    }
    url = 'https://xingjian-ml-apqyj.eastus.inference.ml.azure.com/score'
    api_key = '46g9IOYuQYjwCjlHzwEy9lBOJfQHrjZO'  # Mixtral API key
    headers = {
        'Content-Type': 'application/json',
        'Authorization': ('Bearer ' + api_key),
        'azureml-model-deployment': 'mixtralai-mixtral-8x7b-instru-7'
    }
    body = str.encode(json.dumps(data))

    req = urllib.request.Request(url, body, headers)
    try:
        response = urllib.request.urlopen(req)
        result = response.read()
        response = json.loads(result)["output"]
    except urllib.error.HTTPError as error:
        print("The request failed with status code: " + str(error.code))

        # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
        print(error.info())
        print(error.read().decode("utf8", 'ignore'))
    return response


def get_mixtral_result(text):
    messages = [
        {"role": "user", "content": system_prompt},
        {"role": "assistant", "content": "I understand. Please give me futher information."},
        {"role": "user", "content": example_prompt},
        {"role": "assistant", "content": example_output},
        {"role": "user", "content": text},
    ]
    return mixtral_request(messages)

In [18]:
from tqdm.notebook import tqdm

id2result_mixtral = {}
for i in tqdm(ids):
    id2result_mixtral[i] = get_mixtral_result(texts[i])

  0%|          | 0/120 [00:00<?, ?it/s]

In [21]:
print(len(id2result_mixtral))
print(list(id2result_mixtral.values())[1])

120
 {
  "Context": "Non-pharmacological interventions, such as reminiscence and biographical cognitive stimulation practices, are common and effective for people with dementia. However, obtaining and maintaining biographical or personalized materials can be challenging.",
  "Key Idea": "The authors created a web platform that supports the work of psychologists in collecting and managing biographical materials for use in reminiscence and other biographical cognitive stimulation practices.",
  "Method": "The authors conducted a case study with one psychologist and three patients, using the platform for a period of two weeks.",
  "Outcome": "The results of the case study showed improvements in the collection of meaningful data about a person and in maintaining awareness of the therapy as a whole.",
  "Future Impact": "The platform has the potential to be widely adopted in the field of dementia care, improving the quality and efficiency of non-pharmacological interventions."
}


In [22]:
def parse_json_garbage(s):
    s = s[next(idx for idx, c in enumerate(s) if c in "{["):]
    try:
        return json.loads(s)
    except json.JSONDecodeError as e:
        return json.loads(s[:e.pos])

id2json_mixtral = {}
for k, v in id2result_mixtral.items():
    try:
        id2json_mixtral[k] = parse_json_garbage(v)
    except Exception as e:
        print(k)
        print(v)

adfd8058-64b3-4062-953c-034b732e2fa0
 {
"Context": "In video compression, P and B-frames are typically coded with complex recurrent or convolutional neural networks, while I-frames are coded with H.2


In [23]:
with jl.open("../data/mixtral_0531.jsonl", "w") as f:
    f.write_all([{"id": k, **v} for k, v in id2json_mixtral.items()])

In [26]:
# Fix ad hoc
get_mixtral_result(texts["adfd8058-64b3-4062-953c-034b732e2fa0"])

' {\n"Context": "Current video compression schemes are based on complex algorithms such as H.264, which may not be efficient in coding certain types of video sequences.",\n"Key Idea": "The authors propose a video compression scheme based on texture synthesis through Directional Empirical Mode Decomposition (DEMD) algorithm. The proposed scheme decompose P and B-frames into Intrinsic Mode Function (IMF) image and its residue, and only the first level IMF image for P and B frames are coded.",\n"Method": "The authors perform wavelet decomposition over residual image and use energy level at the HH band as a decision criterion for number of decomposition to be performed for optimum synthesis. The authors also demonstrate the effectiveness of the algorithm in multi-resolution parametric modeling of image data and scalable coding of IMF parameters.",\n"Outcome": "The proposed scheme demonstrates significant compression with acceptable quality.",\n"Future Impact": "N/A"\n}'