In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from datetime import datetime
import unicodedata

base_url = "https://capitol.texas.gov"
urls = {
    "House": f"{base_url}/Committees/MeetingsUpcoming.aspx?Chamber=H",
    "Senate": f"{base_url}/Committees/MeetingsUpcoming.aspx?Chamber=S",
}

# Function to clean text
def normalize_text(text):
    return unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('utf-8')

# Function to extract weekday
def extract_weekday(date_text):
    try:
        match = re.search(r"([A-Za-z]+ \d{1,2}, \d{4})", date_text)
        if match:
            return datetime.strptime(match.group(1), "%B %d, %Y").strftime("%A")
    except ValueError:
        pass
    return "Unknown"

# Scrape data
data = []
for chamber, url in urls.items():
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    meeting_links = [base_url + a["href"] for a in soup.find_all("a", href=True) if a["href"].endswith(".HTM")]

    for link in meeting_links:
        meeting_response = requests.get(link)
        meeting_soup = BeautifulSoup(meeting_response.text, "html.parser")

        committee_name = next((p.text.split(":")[-1].strip() for p in meeting_soup.find_all("p") if "COMMITTEE:" in p.text), "Unknown Committee")
        committee_name = re.sub(r"\s+", " ", committee_name).strip()  # Clean extra spaces

        meeting_day = next((extract_weekday(p.text) for p in meeting_soup.find_all("p") if "TIME & DATE:" in p.text), "Unknown")

        for td in meeting_soup.find_all("td"):
            bill_link = td.find("a")
            if bill_link and "Bill=" in bill_link["href"]:
                bill_number = bill_link.text.strip()
                full_text = td.get_text("\n").strip()
                text_parts = list(filter(None, full_text.split("\n")))

                bill_author = text_parts[1].strip() if len(text_parts) > 1 else "Unknown"
                if len(text_parts) > 2 and not text_parts[2].startswith("Relating to"):
                    bill_author += " " + text_parts[2].strip()
                bill_author = re.sub(r"\s+", " ", bill_author).strip()

                caption_start = 2 if bill_author != "Unknown" else 1
                caption = " ".join(text_parts[caption_start:]).strip()

                bill_author_cleaned = re.escape(bill_author.replace(",", "").strip())
                caption = re.sub(rf"^\s*{bill_author_cleaned}\s*", "", caption).strip()

                first_name = bill_author.split()[0]
                caption = re.sub(rf"^\s*{re.escape(first_name)}\s*", "", caption).strip()

                caption = re.sub(r"\s+", " ", caption)
                caption = caption.replace("\xa0", " ").strip()

                caption = caption.replace("Relating to Relating to", "Relating to").strip()

                data.append([chamber, meeting_day, committee_name, bill_number, bill_author, caption])


df = pd.DataFrame(data, columns=["Chamber", "Day", "Committee Name", "Bill Number", "Bill Author", "Caption"])
df["Stance"] = ""
df["Action"] = ""

for i in range(len(df)):
    n = df['Bill Author'][i]
    if " " in n:
        name = n.split()[1:]
        for x in name:
            if x in df['Caption'][i]:
                df['Caption'][i] = df['Caption'][i].replace(x, " ").strip()
                
df.to_csv("bills.csv", index=False)


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['Caption'][i] = df['Caption'][i].replace(x, " ").strip()
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the o

In [None]:
import dash
from dash import dcc, html, Input, Output, ctx, dash_table, no_update

import pandas as pd
import plotly.express as px
import os

DATA_FILE = "bills.csv"


def load_data():
    if os.path.exists(DATA_FILE) and os.path.getsize(DATA_FILE) > 0:
        df = pd.read_csv(DATA_FILE)
        required_columns = ["Chamber", "Day", "Committee Name", "Bill Number", "Bill Author", "Caption", "Stance", "Action"]
        
        if all(col in df.columns for col in required_columns):
            return df
        else:
            print("⚠️ Warning: CSV file is missing required columns!")
            return pd.DataFrame(columns=required_columns)
    
    else:
        print("⚠️ Warning: No CSV file found! Returning empty dataframe.")
        return pd.DataFrame(columns=["Chamber", "Day", "Committee Name", "Bill Number", "Bill Author", "Caption", "Stance", "Action"])


df = load_data()

sunburst_fig = px.sunburst(df, path=['Chamber', 'Day', 'Committee Name'])
sunburst_fig.update_layout(
    height=800,
    margin=dict(l=50, r=50, t=50, b=50)
)

app = dash.Dash(__name__)
server = app.server 


app.layout = html.Div([
    html.H1("Texas Legislature Bills Dashboard", style={'textAlign': 'center'}),

    dcc.Graph(
        id="sunburst-chart",
        figure=sunburst_fig,
        style={"height": "800px", "width": "100%"}
    ),


    html.Div([
        dcc.Dropdown(
            id="chamber-filter",
            options=[{"label": ch, "value": ch} for ch in df["Chamber"].unique()] if not df.empty else [],
            placeholder="Select Chamber",
            multi=True,
            style={'width': '50%', 'margin': 'auto'}
        ),
        dcc.Dropdown(
            id="day-filter",
            options=[{"label": day, "value": day} for day in df["Day"].unique()] if not df.empty else [],
            placeholder="Select Day",
            multi=True,
            style={'width': '50%', 'margin': 'auto', 'marginTop': '10px'}
        ),
        dcc.Dropdown(
            id="committee-filter",
            options=[{"label": com, "value": com} for com in df["Committee Name"].unique()] if not df.empty else [],
            placeholder="Select Committee",
            multi=True,
            style={'width': '50%', 'margin': 'auto', 'marginTop': '10px'}
        )
    ], style={'textAlign': 'center', 'marginBottom': '20px'}),

    html.Div([
        dash_table.DataTable(
            id="bill-table",
            columns=[{"name": col, "id": col, "editable": True} for col in df.columns],
            data=df.to_dict("records"),
            style_table={'overflowX': 'auto', 'width': '95%', 'margin': 'auto'},
            editable=True,
            row_selectable="multi",
            filter_action="native",
            sort_action="native",
            page_size=20,
            style_cell={
                'textAlign': 'left',
                'whiteSpace': 'normal', 
                'height': 'auto',
            },
            style_data_conditional=[
                {"if": {"column_id": "Caption"}, "whiteSpace": "normal", "textAlign": "left"},
                {"if": {"column_id": "Stance"}, "whiteSpace": "normal", "textAlign": "left"},
                {"if": {"column_id": "Action"}, "whiteSpace": "normal", "textAlign": "left"}
            ]
        )
    ], style={'textAlign': 'left', 'marginLeft': '20px'}),

    # Save Button
    html.Div([
        html.Button("Save CSV", id="save-button", n_clicks=0, style={'marginTop': 20})
    ], style={'textAlign': 'center'}),

    # Download Link
    dcc.Download(id="download-dataframe-csv"),

    "Demo Designed by Quantitative Edge LLC"
])

# Callbacks
@app.callback(
    Output("bill-table", "data"),
    [Input("chamber-filter", "value"),
     Input("day-filter", "value"),
     Input("committee-filter", "value")]
)
def update_table(selected_chambers, selected_days, selected_committees):
    df = load_data()
    if df.empty:
        return []

    filtered_df = df.copy()

    if selected_chambers:
        filtered_df = filtered_df[filtered_df["Chamber"].isin(selected_chambers)]
    
    if selected_days:
        filtered_df = filtered_df[filtered_df["Day"].isin(selected_days)]

    if selected_committees:
        filtered_df = filtered_df[filtered_df["Committee Name"].isin(selected_committees)]
    
    return filtered_df.to_dict("records")

@app.callback(
    Output("download-dataframe-csv", "data"),
    Input("save-button", "n_clicks"),
    Input("bill-table", "data"),
    prevent_initial_call=True
)
def save_csv(n_clicks, table_data):
    if n_clicks:
        if not table_data:
            print("⚠️ No data to save!")
            return no_update

        updated_df = pd.DataFrame(table_data)

        print(f"✅ Saving {len(updated_df)} rows to CSV.")  # Debugging line

        return dcc.send_data_frame(updated_df.to_csv, "custom_bills.csv", index=False)

if __name__ == '__main__':
    app.run_server(debug=False, host="0.0.0.0", port=8080)
