In [5]:
# connect to test.db sqlite, create session, then run "get all users" with session.Query
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from models import Base, User, Location, ExternalAddress, Cast
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

engine = create_engine(os.getenv("PLANETSCALE_URL"))

# sqlite data.db
engine = create_engine('sqlite:///data-with-casts.db')

with sessionmaker(bind=engine)() as session:
    result = session.execute(text("SELECT * FROM casts LIMIT 100;"))

    # dump to df
    df = pd.DataFrame(result.fetchall())
    df = df.drop(columns=['replies_count', 'reactions_count', 'recasts_count', 'watches_count'])

    casts = []
    for index, row in df.iterrows():
        casts.append(Cast(**row.to_dict()))
    
    mysql_engine = create_engine(os.getenv("PLANETSCALE_URL"))
    with sessionmaker(bind=mysql_engine)() as mysql_session:
        mysql_session.bulk_save_objects(casts)

In [11]:
engine = create_engine(os.getenv("PLANETSCALE_URL"))
with sessionmaker(bind=engine)() as session:

    # get cast with hash 0x7cba6057e7bc7a352f1128b91a918d15b792ca382be13958cd20dfc2ee1952c8
    cast = session.query(Cast).filter(Cast.hash == '0x0ee0f23e018fcda0f861317226db8fb3d6db6a7602aeb5f3cd0b352c3d8f3335').first()
    print(cast.children)


AttributeError: 'Cast' object has no attribute 'children'

In [None]:
"""
Place all models in the notebook so Copilot knows what it's working with

Here are all the available models:
parent_association = Table('parent_association', Base.metadata,
                           Column('parent_hash', String,
                                  ForeignKey('casts.hash')),
                           Column('cast_hash', String,
                                  ForeignKey('casts.hash'))
                           )


# figure out ancestor hashes later
class Cast(Base):
    __tablename__ = 'casts'
    hash = Column(String, primary_key=True)
    thread_hash = Column(String, ForeignKey(
        'casts.hash'))
    parent_hash = Column(String, ForeignKey(
        'casts.hash'), nullable=True)
    text = Column(String)
    timestamp = Column(BigInteger)
    author_fid = Column(BigInteger, ForeignKey(
        'users.fid'))
    author = relationship('User', back_populates='casts')
    reactions = relationship('Reaction', back_populates='target')
    children_hashes = relationship("Cast", secondary=parent_association,
                                   primaryjoin=(
                                       hash == parent_association.c.parent_hash),
                                   secondaryjoin=(
                                       hash == parent_association.c.cast_hash),
                                   backref="parent_casts")


class Reaction(Base):
    __tablename__ = 'reactions'
    hash = Column(String, primary_key=True)
    reaction_type = Column(String)  # like & recast
    timestamp = Column(BigInteger)
    target_hash = Column(String, ForeignKey('casts.hash'))
    author_fid = Column(BigInteger, ForeignKey('users.fid'))
    target = relationship('Cast', back_populates='reactions')


class Location(Base):
    __tablename__ = 'locations'
    place_id = Column(String, primary_key=True)
    description = Column(String)
    users = relationship('User', backref='location')


class User(Base):
    __tablename__ = 'users'
    fid = Column(BigInteger, primary_key=True)
    username = Column(String)
    display_name = Column(String)
    verified = Column(Boolean, default=False)
    pfp_url = Column(String, nullable=True)
    follower_count = Column(BigInteger)
    following_count = Column(BigInteger)
    bio_text = Column(String, nullable=True)
    location_place_id = Column(String, ForeignKey(
        'locations.place_id'), nullable=True)
    casts = relationship('Cast', back_populates='author')

"""

In [None]:
# get all users where they have location with description that contains "USA"
# description_location is from the Location model
# location_place_id is from the User model

locations = session.query(Location).filter(
    Location.description.contains("USA")).all()

# print users with locations
users = session.query(User).filter(
    User.location_place_id.in_([location.place_id for location in locations])).all()

df = pd.DataFrame([user.__dict__ for user in users])
# print(df)

# make df with username, location column

new_df = pd.DataFrame(columns=["username", "location"])
for user in users:
    new_df = pd.concat([new_df, pd.DataFrame({"username": [user.username], "location": [user.location.description]})], ignore_index=True)

print(new_df)

In [None]:
# get all users who are in the UK

uk_locations = session.query(Location).filter(
    Location.description.contains("UK")).all()

uk_users = session.query(User).filter(
    User.location_place_id.in_([location.place_id for location in uk_locations])).all()

# just print the usernames
print(len(uk_users))
for user in uk_users:
    print(user.username)

# from all these uk users, sort by follower

uk_users.sort(key=lambda user: user.follower_count, reverse=True)

# seaborn plot

sns.set_theme(style="whitegrid")
ax = sns.barplot(x=[user.follower_count for user in uk_users[:10]], y=[user.username for user in uk_users[:10]])

In [None]:
# get all users with bio that contains "coinbase"

coinbase_users = session.query(User).filter(
    User.bio_text.contains("coinbase")).all()

for user in coinbase_users:
    print(user.username)