In [None]:

from users import *
import polars as pl

warpcast_hub_key = os.getenv("WARPCAST_HUB_KEY")

users = get_all_users_from_warpcast(warpcast_hub_key)


In [None]:
warpcast_data = [extract_warpcast_user_data(u) for u in users]

# Define the column names and data types for the DataFrame
users_cols = {
    'fid': pl.UInt32,
    'username': pl.Utf8,
    'display_name': pl.Utf8,
    'verified': pl.Boolean,
    'pfp_url': pl.Utf8,
    'follower_count': pl.UInt32,
    'following_count': pl.UInt32,
    'bio_text': pl.Utf8,
    'location_place_id': pl.Utf8,
}


def extract_warpcast_user_data(user):
    return {
        'fid': user['fid'],
        'username': user['username'],
        'display_name': user['displayName'],
        'verified': user['pfp']['verified'] if 'pfp' in user else False,
        'pfp_url': user['pfp']['url'] if 'pfp' in user else '',
        'follower_count': user['followerCount'],
        'following_count': user['followingCount'],
        'bio_text': user['profile']['bio']['text'] if 'bio' in user['profile'] else None,
        'location_place_id': user['profile']['location']['placeId'] if 'location' in user['profile'] else None
    }


users_df = pl.DataFrame(warpcast_data, schema=users_cols)
users_df.write_parquet('users.parquet')


In [None]:
locations_col = {
    'place_id': pl.Utf8,
    'description': pl.Utf8,
}

def get_warpcast_location(user) -> Optional[dict]:
    if 'location' in user['profile']:
        place_id = user['profile']['location'].get('placeId')
        if place_id:
            description = user['profile']['location'].get('description')
            return {'place_id': place_id, 'description': description}
    return None
locations = [get_warpcast_location(u) for u in users]
print(locations)

locations = [l for l in locations if l is not None]

locations_df = pl.DataFrame(locations, schema=locations_col)
locations_df.write_parquet('locations.parquet')


In [5]:
import duckdb
df = duckdb.query('''
SELECT *
FROM 'users.parquet' u
JOIN 'locations.parquet' l ON u.location_place_id = l.place_id
WHERE l.description LIKE '%Japan%'
''').pl()

print(df)

shape: (19, 11)
┌───────┬────────────┬────────────┬──────────┬───┬────────────┬────────────┬───────────┬───────────┐
│ fid   ┆ username   ┆ display_na ┆ verified ┆ … ┆ bio_text   ┆ location_p ┆ place_id  ┆ descripti │
│ ---   ┆ ---        ┆ me         ┆ ---      ┆   ┆ ---        ┆ lace_id    ┆ ---       ┆ on        │
│ u32   ┆ str        ┆ ---        ┆ bool     ┆   ┆ str        ┆ ---        ┆ str       ┆ ---       │
│       ┆            ┆ str        ┆          ┆   ┆            ┆ str        ┆           ┆ str       │
╞═══════╪════════════╪════════════╪══════════╪═══╪════════════╪════════════╪═══════════╪═══════════╡
│ 10993 ┆ minatofund ┆ MinatoFund ┆ false    ┆ … ┆ MinatoFund ┆ ChIJXSModo ┆ ChIJXSMod ┆ Tokyo,    │
│       ┆            ┆            ┆          ┆   ┆            ┆ WLGGARILWi ┆ oWLGGARIL ┆ Japan     │
│       ┆            ┆            ┆          ┆   ┆            ┆ Cfeu2M0    ┆ WiCfeu2M0 ┆           │
│ 10726 ┆ nishavtvl  ┆ Nisha      ┆ false    ┆ … ┆ cofounder  ┆ ChIJ4eIGNF 