# Executive Order Tagging

Use the clusters to figure out tags.

In [125]:
import json
import pandas
from typing import List
from langchain.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import BaseModel, Field
from concurrent.futures import ThreadPoolExecutor, as_completed

## Combine Datasets

Combine some datasets so we can understand which presidents and parties are in each cluster.

In [93]:
clusters = pandas.read_csv('data/executive_orders/clusters.csv')
clusters['name'] = clusters['path'].apply(lambda x: x.split('/')[-1].split('.')[0])

sentiment = pandas.read_csv('data/executive_orders/sentiment.csv')
presidents = pandas.read_csv('data/executive_orders/presidents.csv')

# add president to the clusters
combined = clusters.merge(sentiment, on='name')
# join with president
combined = combined.merge(presidents, on='president')

# save
combined.to_csv('data/executive_orders/combined.csv', index=False)

In [94]:
combined = pandas.read_csv('data/executive_orders/combined.csv')

# group by cluster and create a column of array of unique presidents and parties
grouped = combined.groupby('cluster').agg({
    'president': lambda x: list(set(x)),
    'party': lambda x: list(set(x))
}).reset_index()
grouped

Unnamed: 0,cluster,president,party
0,0,"[Chester A. Arthur, Andrew Johnson, Rutherford...","[Democratic, Republican, Whig, No Party]"
1,1,"[Harry S Truman, George W. Bush, Gerald R. For...","[Democratic, Republican]"
2,2,"[Theodore Roosevelt, Woodrow Wilson, Calvin Co...","[Democratic, Republican]"
3,3,"[Harry S Truman, Dwight D. Eisenhower, Gerald ...","[Democratic, Republican]"
4,4,"[Harry S Truman, Theodore Roosevelt, Woodrow W...","[Democratic, Republican]"
5,5,"[Harry S Truman, George W. Bush, Donald J. Tru...","[Democratic, Republican]"
6,6,"[Chester A. Arthur, Donald J. Trump (2nd Term)...","[Democratic, Republican]"
7,7,"[Harry S Truman, George W. Bush, Theodore Roos...","[Democratic, Republican]"
8,8,"[George W. Bush, Donald J. Trump (2nd Term), U...","[Democratic, Republican]"
9,9,"[Franklin D. Roosevelt, Herbert Hoover]","[Democratic, Republican]"


## Build Tags

In [132]:
gemini = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.1,
    max_retries=3
)

class Tags(BaseModel):
    analysis: str = Field(description='Analysis of the texts.')
    tags: List[str] = Field(description='Tags assigned to the cluster..')
    title: str = Field(description='Title of the cluster.')

def build_tags_pipeline(model, n_texts: int):
    prompt = [
        'Your task is to understand why the given documents were assigned to the same cluster of documents of the same type.',
        '- First analyze the documents for common themes, topics, and sentiment.',
        '- Then assign tags that would apply to all given documents and unique to this cluster based on your analysis.',
        '- Finally give the cluster a title based on the analysis and derived tags to distinguish it from the other clusters.',
        '- Do not use tags or titles that describe the document type, such as "Executive Order" or "Presidential Orders".',
        '- Do not use generic phrases like "Policies", "Regulations", "Orders", or "Actions" in titles, be specific about what is in common in the documents.'
    ]
    for i in range(n_texts):
        prompt.extend([
            f'# Document {i+1}',
            f'{{document_{i}}}',
        ])

    return ChatPromptTemplate.from_template(
        '\n'.join(prompt)
    ) | model.with_structured_output(Tags, include_raw=True)

def sample_in_cluster(cluster: int, n_texts: int):
    population = combined[combined['cluster'] == cluster]['path']
    sampled = population.sample(min(n_texts, population.shape[0]))

    documents = []
    for p in sampled:
        with open('data/executive_orders/raw/' + p) as f:
            eo = json.load(f)
            documents.append('\n'.join(eo["content"]))

    return documents

def sample_all_clusters(sample_size: int, sample_i: int):
    results = [f'Sample size: {sample_size}']
    for i in range(grouped.shape[0]):
        sampled = sample_in_cluster(i, sample_size)
        response = build_tags_pipeline(gemini, len(sampled))\
            .invoke({f'document_{d_i}': d for d_i, d in enumerate(sampled)})
        
        result = response['parsed'].model_dump() | {
            'request': {
                'cluster': i,
                'sample_size': sample_size
            },
            'stats': {
                'input_tokens': response['raw'].usage_metadata['input_tokens'],
                'output_tokens': response['raw'].usage_metadata['output_tokens'],
            }
        }

        with open(f'data/executive_orders/tags/{i}_{sample_size}_{sample_i}.json', 'w') as f:
            json.dump(result, f, indent=4)

        results.append(f'{str(i)}: {response['parsed'].title}')

    return results

In [None]:
sample_sizes = [10, 25, 50]
base_samples = 2

sample_params = []
for sample_size in sample_sizes:
    for i in range(base_samples * 50//sample_size):
        sample_params.append((sample_size, i))

with ThreadPoolExecutor(max_workers=2) as executor:
        futures = [executor.submit(sample_all_clusters, *params) for params in sample_params]

        for future in as_completed(futures):
            print('\n'.join(future.result()))


Sample size: 10
0: Military Appointments and Operational Directives
1: Executive Orders Amending Presidential Committees and Commissions
2: Alaskan Land Management Modifications
3: Authorizing Inspection of Tax Returns for Governmental Oversight
4: Federal Land Management and Boundary Adjustments
5: Delegating Authority to Government Entities
6: Government Intervention and Policy Implementation
7: Defense Zones and Restricted Airspace
8: Safeguarding National Interests Through Executive Authority
9: Extending Service of Government Employees
10: Adjustments to Land Reservations and Withdrawals
11: Amendments, Holidays, and Wartime Labor
12: Revoking and Amending Prior Directives
13: Wartime Production and Resource Management
14: California Land Management Directives
15: Order of Succession in Federal Offices
16: Transportation Labor Dispute Intervention
17: Land Reservations and Boundary Adjustments
18: Recognition of Service and Achievement
19: Government Employee Retirement Extension


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<loc

ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).