In [39]:
import pandas as pd
import plotly.express as px
import streamlit as st
import matplotlib.pyplot as plt
from langchain import HuggingFaceHub, LLMChain,PromptTemplate
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
import requests
from PIL import Image
import io

from helper_functions import run_request

In [2]:
hf_key = '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'

In [None]:
model_type_code = "codellama/CodeLlama-34b-Instruct-hf"

In [None]:
llm_code = HuggingFaceHub(huggingfacehub_api_token = hf_key, repo_id= model_type_code, model_kwargs={"temperature":0.1, "max_new_tokens":500})

In [None]:
agent = create_pandas_dataframe_agent(llm_code, df, verbose=True)

In [None]:
answer = agent.run("Figure out what should be the type of each column of the dataframe, and answer with a Python script to convert the columns to the correct type. To separate each line of code, use '\n'.")

In [3]:
processor = Pix2StructProcessor.from_pretrained('google/deplot')
model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot')

url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/5090.png"
image = Image.open(requests.get(url, stream=True).raw)

In [4]:
import pandas as pd
import plotly.express as px
df = pd.read_csv('data/physionet_cleaned/sleep.csv')
fig=px.line(df,x="date",y="avgdurationasleep", color_discrete_sequence=px.colors.qualitative.G10)

In [5]:
img_bytes = fig.to_image(format="png")
img = Image.open(io.BytesIO(img_bytes))

In [14]:
inputs = processor(images=img, text="Generate information from the figure below:", return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=512)
answer = processor.decode(predictions[0], skip_special_tokens=True)
print(answer)

TITLE |  <0x0A> date | avgdurationasleep <0x0A> Apr 15<0x0A>2018 | 45 <0x0A> Apr 29 | 38 <0x0A> May 13 | 46 <0x0A> May 27 | 43 <0x0A> Jun 10 | 91 <0x0A> Jun 24 | 119


In [40]:
model_type = "HuggingFaceH4/zephyr-7b-beta"

In [16]:
question_to_ask = "Based on the following description of a figure, give the key insights or trends.\n\n"
question_to_ask += answer

In [18]:
llm = HuggingFaceHub(huggingfacehub_api_token = hf_key, repo_id= model_type, model_kwargs={"temperature":0.01, "max_new_tokens":500})
llm_prompt = PromptTemplate.from_template(question_to_ask)
llm_chain = LLMChain(llm=llm,prompt=llm_prompt)
llm_response = llm_chain.predict()

In [24]:
llm_response.split('\n\n')[1]

"The data provided in the description shows a consistent pattern of sleep duration for an individual, with an average of 111 minutes (1 hour and 51 minutes) for each of the 30 recorded dates. This indicates a stable sleep pattern for the individual over the course of several months. The longest duration of sleep recorded was 119 minutes on June 24, while the shortest duration was 38 minutes on April 29. However, it is noteworthy that all other sleep durations were identical, suggesting a highly regular sleep cycle. This data could be useful for understanding the individual's sleep habits and potentially identifying any underlying sleep disorders or issues."

In [25]:
fig=px.line(df,x="date",y="avgdurationasleep",title="How much time do I sleep on average ?")
fig.update_xaxes(title="Date")
fig.update_yaxes(title="Average Duration Asleep")

In [46]:
query = "You are an helpful and friendly data analyst assistant.\n"
query += "You will be given a description of a figure and you will have to generate key insights and trends from it.\n"
query += "Answer with sentences in a friendly and helpful manner.\n"
query += "Here is the description: "

img_bytes = fig.to_image(format="png")
img = Image.open(io.BytesIO(img_bytes))
inputs = processor(images=img, text="Generate informations from the figure below:", return_tensors="pt")

predictions = model.generate(**inputs, max_new_tokens=1024)
answer = processor.decode(predictions[0], skip_special_tokens=True)

In [47]:
query += answer
query += "\n\n"

In [48]:
print(query)

You are an helpful and friendly data analyst assistant.
You will be given a description of a figure and you will have to generate key insights and trends from it.
Answer with sentences in a friendly and helpful manner.
Here is the description: TITLE | How much time do I sleep on average?<0x0A>Date | Average Duration Assleep<0x0A>Year <0x0A> Apr 15<0x0A>2018 | 40 <0x0A> Apr 29 | 38 <0x0A> May 13 | 64 <0x0A> May 27 | 144 <0x0A> Jun 10 | 90 <0x0A> Jun 24 | 118




In [49]:
insights = run_request(query, "HuggingFaceH4/zephyr-7b-beta", hf_key)

In [51]:
print(insights)

Based on the given data, we can see that the average duration of sleep per night varies from 38 to 144 minutes. The highest average sleep duration was recorded on May 27, while the lowest was recorded on April 15. Overall, the average sleep duration for the given time period is approximately 80 minutes.

From this data, we can also observe that there seems to be a trend of longer sleep durations in the month of May. This could potentially indicate that the individual's sleep patterns change based on the season or other external factors.

Additionally, we can see that there is some variability in sleep duration within each month. For example, in April, the sleep duration ranged from 38 to 40 minutes, while in May, it ranged from 64 to 144 minutes. This variability could be due to factors such as stress, diet, or exercise habits.

Overall, these insights can help the individual better understand their sleep patterns and potentially make adjustments to improve the quality and duration of 