In [None]:
%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}

%load_ext autoreload
# "1" means: always reload modules marked with "%aimport"
# "2" means: always reload all modules except those marked with "%aimport"
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

sns.set(rc={'lines.linewidth': 2})
sns.set_context(context='notebook', font_scale=1.3)

plt.rc('figure', figsize=(8.0, 6.0))

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
np.set_printoptions(precision=4, linewidth=100)

In [None]:
# imports from app.py
import logging
import os
from datetime import datetime, timedelta
from pathlib import Path
from time import sleep

import numpy as np
import pytz
from dotenv import load_dotenv
from twython import (
    Twython,
    TwythonAuthError,
    TwythonError,
    TwythonRateLimitError,
    TwythonStreamer,
)
from thisishappening.utils.cluster_utils import cluster_activity
from thisishappening.utils.data_base import Events, RecentTweets, session_factory
from thisishappening.utils.data_utils import compare_activity_kde, get_grid_coords
from thisishappening.utils.tweet_utils import (
    check_tweet,
    date_string_to_datetime,
    get_event_info,
    get_place_bounding_box,
    get_tweet_info,
)

logging.basicConfig(format="{asctime} : {levelname} : {message}", style="{")
logger = logging.getLogger("happeninglogger")

In [None]:
# additional imports
import folium
from folium import plugins

from thisishappening.utils.tweet_utils import TweetInfo
from thisishappening.utils.tweet_utils import get_tokens_to_tweet
from thisishappening.utils.tweet_utils import get_coords
from thisishappening.utils.data_utils import get_coords_min_max

In [None]:
IS_PROD = os.getenv("IS_PROD", default=None)

if IS_PROD is None:
    env_path = Path.cwd().parent / ".env"
    if env_path.exists():
        load_dotenv(dotenv_path=env_path)
    else:
        env_path = Path.cwd() / ".env"
        if env_path.exists():
            load_dotenv(dotenv_path=env_path)
        else:
            raise OSError(".env file not found. Did you set it up?")

DEBUG_RUN = os.getenv("DEBUG_RUN", default="False").casefold()
if DEBUG_RUN not in ["true".casefold(), "false".casefold()]:
    raise ValueError(f"DEBUG_RUN must be True or False, current value: {DEBUG_RUN}")
DEBUG_RUN = DEBUG_RUN == "true".casefold()

if DEBUG_RUN:
    logger.setLevel(logging.DEBUG)
    POST_EVENT = False
    POST_DAILY_EVENTS = False
    LOG_TWEETS = False
    LOG_EVENTS = False
    PURGE_OLD_DATA = False
    RECENT_TWEETS_ROWS_TO_KEEP = None
    EVENTS_ROWS_TO_KEEP = None
    RECENT_TWEETS_DAYS_TO_KEEP = None
    EVENTS_DAYS_TO_KEEP = None
    ECHO = False
else:
    logger.setLevel(logging.INFO)
    POST_EVENT = (
        os.getenv("POST_EVENT", default="False").casefold() == "true".casefold()
    )
    POST_DAILY_EVENTS = (
        os.getenv("POST_DAILY_EVENTS", default="False").casefold() == "true".casefold()
    )
    LOG_TWEETS = True
    LOG_EVENTS = True
    PURGE_OLD_DATA = True
    RECENT_TWEETS_ROWS_TO_KEEP = os.getenv("RECENT_TWEETS_ROWS_TO_KEEP", default=None)
    RECENT_TWEETS_ROWS_TO_KEEP = (
        int(RECENT_TWEETS_ROWS_TO_KEEP) if RECENT_TWEETS_ROWS_TO_KEEP else None
    )
    EVENTS_ROWS_TO_KEEP = os.getenv("EVENTS_ROWS_TO_KEEP", default=None)
    EVENTS_ROWS_TO_KEEP = int(EVENTS_ROWS_TO_KEEP) if EVENTS_ROWS_TO_KEEP else None
    RECENT_TWEETS_DAYS_TO_KEEP = os.getenv("RECENT_TWEETS_DAYS_TO_KEEP", default=None)
    RECENT_TWEETS_DAYS_TO_KEEP = (
        float(RECENT_TWEETS_DAYS_TO_KEEP) if RECENT_TWEETS_DAYS_TO_KEEP else None
    )
    EVENTS_DAYS_TO_KEEP = os.getenv("EVENTS_DAYS_TO_KEEP", default=None)
    EVENTS_DAYS_TO_KEEP = float(EVENTS_DAYS_TO_KEEP) if EVENTS_DAYS_TO_KEEP else None
    ECHO = False

APP_KEY = os.getenv("API_KEY", default=None)
APP_SECRET = os.getenv("API_SECRET", default=None)
OAUTH_TOKEN = os.getenv("ACCESS_TOKEN", default=None)
OAUTH_TOKEN_SECRET = os.getenv("ACCESS_TOKEN_SECRET", default=None)
DATABASE_URL = os.getenv("DATABASE_URL", default=None)
if DATABASE_URL.startswith("postgres://"):
    DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://")
MY_SCREEN_NAME = os.getenv("MY_SCREEN_NAME", default=None)
assert MY_SCREEN_NAME is not None
LANGUAGE = os.getenv("LANGUAGE", default="en")
BOUNDING_BOX = os.getenv("BOUNDING_BOX", default=None)
BOUNDING_BOX = (
    [float(coord) for coord in BOUNDING_BOX.split(",")] if BOUNDING_BOX else []
)
assert len(BOUNDING_BOX) == 4
TEMPORAL_GRANULARITY_HOURS = int(os.getenv("TEMPORAL_GRANULARITY_HOURS", default="1"))
EVENT_MIN_TWEETS = int(os.getenv("EVENT_MIN_TWEETS", default="5"))
KM_START = float(os.getenv("KM_START", default="0.05"))
KM_STOP = float(os.getenv("KM_STOP", default="0.3"))
KM_STEP = int(os.getenv("KM_STEP", default="9"))
MIN_N_CLUSTERS = int(os.getenv("MIN_N_CLUSTERS", default="1"))
TWEET_MAX_LENGTH = int(os.getenv("TWEET_MAX_LENGTH", default="280"))
TWEET_URL_LENGTH = int(os.getenv("TWEET_URL_LENGTH", default="23"))
TWEET_LAT_LON = (
    os.getenv("TWEET_LAT_LON", default="False").casefold() == "true".casefold()
)
SHOW_TWEETS_ON_EVENT = (
    os.getenv("SHOW_TWEETS_ON_EVENT", default="True").casefold() == "true".casefold()
)
TWEET_GEOTAG = os.getenv("TWEET_GEOTAG", default="True").casefold() == "true".casefold()
# Use docs/index.html to render words and map of tweets
BASE_EVENT_URL = os.getenv(
    "BASE_EVENT_URL", default="https://USERNAME.github.io/thisishappening/?"
)

VALID_PLACE_TYPES = os.getenv(
    "VALID_PLACE_TYPES", default="admin, city, neighborhood, poi"
)
VALID_PLACE_TYPES = (
    [x.strip() for x in VALID_PLACE_TYPES.split(",")] if VALID_PLACE_TYPES else []
)
VALID_PLACE_TYPES = list(set(VALID_PLACE_TYPES))
IGNORE_WORDS = os.getenv("IGNORE_WORDS", default=None)
IGNORE_WORDS = (
    [rf"\b{x.strip()}\b" for x in IGNORE_WORDS.split(",")] if IGNORE_WORDS else []
)
IGNORE_WORDS = list(set(IGNORE_WORDS))
IGNORE_USER_SCREEN_NAMES = os.getenv("IGNORE_USER_SCREEN_NAMES", default=None)
IGNORE_USER_SCREEN_NAMES = (
    [rf"{x.strip()}" for x in IGNORE_USER_SCREEN_NAMES.split(",")]
    if IGNORE_USER_SCREEN_NAMES
    else []
)
IGNORE_USER_SCREEN_NAMES.append(MY_SCREEN_NAME)  # Ignore tweets from own screen name
IGNORE_USER_SCREEN_NAMES = list(set(IGNORE_USER_SCREEN_NAMES))
IGNORE_USER_ID_STR = os.getenv("IGNORE_USER_ID_STR", default=None)
IGNORE_USER_ID_STR = (
    [x.strip() for x in IGNORE_USER_ID_STR.split(",")] if IGNORE_USER_ID_STR else []
)
IGNORE_USER_ID_STR = list(set(IGNORE_USER_ID_STR))

MIN_FRIENDS_COUNT = int(os.getenv("MIN_FRIENDS_COUNT", default="1"))
MIN_FOLLOWERS_COUNT = int(os.getenv("MIN_FOLLOWERS_COUNT", default="1"))
IGNORE_POSSIBLY_SENSITIVE = (
    os.getenv("IGNORE_POSSIBLY_SENSITIVE", default="False").casefold()
    == "true".casefold()
)
IGNORE_QUOTE_STATUS = (
    os.getenv("IGNORE_QUOTE_STATUS", default="False").casefold() == "true".casefold()
)
IGNORE_REPLY_STATUS = (
    os.getenv("IGNORE_REPLY_STATUS", default="False").casefold() == "true".casefold()
)

IGNORE_LON_LAT = os.getenv("IGNORE_LON_LAT", default=None)
IGNORE_LON_LAT = (
    [
        (float(c[0].strip()), float(c[1].strip()))
        for c in [coords.split(",") for coords in IGNORE_LON_LAT.split(";")]
    ]
    if IGNORE_LON_LAT
    else []
)
IGNORE_LON_LAT = list(set(IGNORE_LON_LAT))

TOKEN_COUNT_MIN = int(os.getenv("TOKEN_COUNT_MIN", default="2"))
REDUCE_TOKEN_COUNT_MIN = (
    os.getenv("REDUCE_TOKEN_COUNT_MIN", default="True").casefold() == "true".casefold()
)
REMOVE_USERNAME_AT = (
    os.getenv("REMOVE_USERNAME_AT", default="True").casefold() == "true".casefold()
)

GRID_RESOLUTION = int(os.getenv("GRID_RESOLUTION", default="128"))
BW_METHOD = float(os.getenv("BW_METHOD", default="0.3"))
ACTIVITY_THRESHOLD_DAY = float(os.getenv("ACTIVITY_THRESHOLD_DAY", default="30.0"))
ACTIVITY_THRESHOLD_HOUR = float(os.getenv("ACTIVITY_THRESHOLD_HOUR", default="300.0"))

WEIGHTED = os.getenv("WEIGHTED", default="False").casefold() == "true".casefold()
REDUCE_WEIGHT_LON_LAT = os.getenv("REDUCE_WEIGHT_LON_LAT", default=None)
REDUCE_WEIGHT_LON_LAT = (
    [
        (f"{float(c[0].strip()):.5f}", f"{float(c[1].strip()):.5f}")
        for c in [coords.split(",") for coords in REDUCE_WEIGHT_LON_LAT.split(";")]
    ]
    if REDUCE_WEIGHT_LON_LAT
    else []
)
REDUCE_WEIGHT_LON_LAT = list(set(REDUCE_WEIGHT_LON_LAT))
WEIGHT_FACTOR_LON_LAT = os.getenv("WEIGHT_FACTOR_LON_LAT", default=None)
WEIGHT_FACTOR_LON_LAT = float(WEIGHT_FACTOR_LON_LAT) if WEIGHT_FACTOR_LON_LAT else None
WEIGHT_FACTOR_USER = os.getenv("WEIGHT_FACTOR_USER", default=None)
WEIGHT_FACTOR_USER = float(WEIGHT_FACTOR_USER) if WEIGHT_FACTOR_USER else None
WEIGHT_FACTOR_NO_COORDS = os.getenv("WEIGHT_FACTOR_NO_COORDS", default=None)
WEIGHT_FACTOR_NO_COORDS = (
    float(WEIGHT_FACTOR_NO_COORDS) if WEIGHT_FACTOR_NO_COORDS else None
)

QUERY_HAS_COORDS_ONLY = (
    os.getenv("QUERY_HAS_COORDS_ONLY", default="False").casefold() == "true".casefold()
)
QUERY_HAS_COORDS_ONLY = QUERY_HAS_COORDS_ONLY if QUERY_HAS_COORDS_ONLY else None

QUERY_INCLUDE_QUOTE_STATUS = (
    os.getenv("QUERY_INCLUDE_QUOTE_STATUS", default="True").casefold()
    == "true".casefold()
)
QUERY_INCLUDE_REPLY_STATUS = (
    os.getenv("QUERY_INCLUDE_REPLY_STATUS", default="False").casefold()
    == "true".casefold()
)
QUERY_INCLUDE_DELETED_STATUS = (
    os.getenv("QUERY_INCLUDE_DELETED_STATUS", default="False").casefold()
    == "true".casefold()
)

In [None]:
grid_coords, x_flat, y_flat = get_grid_coords(bounding_box=BOUNDING_BOX, grid_resolution=GRID_RESOLUTION)

xmin, xmax, ymin, ymax = get_coords_min_max(bounding_box=BOUNDING_BOX)

In [None]:
def plot_activity_diff(activity_prev, activity_curr, z_diff, bounding_box):
    # cmap = plt.cm.gist_earth_r
    cmap = plt.cm.coolwarm

    fig, ax = plt.subplots()
    im = ax.imshow(z_diff, aspect=x_flat.ptp() / y_flat.ptp(), cmap=cmap, extent=[xmin, xmax, ymin, ymax])

    plt.colorbar(im)

    lons_prev, lats_prev = get_coords(activity_prev)
    lon_lat_prev = np.array([[lon, lat] for lon, lat in zip(lons_prev, lats_prev)])
    lons_curr, lats_curr = get_coords(activity_curr)
    lon_lat_curr = np.array([[lon, lat] for lon, lat in zip(lons_curr, lats_curr)])

    ax.plot(lon_lat_prev[:, 0], lon_lat_prev[:, 1], 'r.', markersize=5)
    ax.plot(lon_lat_curr[:, 0], lon_lat_curr[:, 1], 'k.', markersize=5)
    fig.autofmt_xdate()

    plt.show()

In [None]:
# Establish connection to Twitter
# Uses OAuth1 ("user auth") for authentication
twitter = Twython(
    app_key=APP_KEY,
    app_secret=APP_SECRET,
    oauth_token=OAUTH_TOKEN,
    oauth_token_secret=OAUTH_TOKEN_SECRET,
)

# Establish connection to database
session = session_factory(DATABASE_URL, echo=ECHO)

In [None]:
timestamp_curr = datetime.utcnow().replace(tzinfo=pytz.UTC)

print(timestamp_curr)

tweet_info = TweetInfo(
    status_id_str=None,
    user_screen_name=None,
    user_id_str=None,
    created_at=timestamp_curr,
    deleted_at=None,
    tweet_body=None,
    tweet_language=None,
    is_quote_status=None,
    is_reply_status=None,
    possibly_sensitive=None,
    has_coords=None,
    longitude=None,
    latitude=None,
    place_id=None,
    place_name=None,
    place_full_name=None,
    place_country=None,
    place_country_code=None,
    place_type=None,
)


In [None]:
activity_curr_day = RecentTweets.get_recent_tweets(
    session,
    timestamp=tweet_info.created_at,
    hours=24,
    place_type=VALID_PLACE_TYPES,
    has_coords=QUERY_HAS_COORDS_ONLY,
    place_type_or_coords=True,
    include_quote_status=QUERY_INCLUDE_QUOTE_STATUS,
    include_reply_status=QUERY_INCLUDE_REPLY_STATUS,
    include_deleted_status=QUERY_INCLUDE_DELETED_STATUS,
)
activity_prev_day = RecentTweets.get_recent_tweets(
    session,
    timestamp=tweet_info.created_at - timedelta(days=1),
    hours=24,
    place_type=VALID_PLACE_TYPES,
    has_coords=QUERY_HAS_COORDS_ONLY,
    place_type_or_coords=True,
    include_quote_status=QUERY_INCLUDE_QUOTE_STATUS,
    include_reply_status=QUERY_INCLUDE_REPLY_STATUS,
    include_deleted_status=QUERY_INCLUDE_DELETED_STATUS,
)

activity_curr_hour = RecentTweets.get_recent_tweets(
    session,
    timestamp=tweet_info.created_at,
    hours=TEMPORAL_GRANULARITY_HOURS,
    place_type=VALID_PLACE_TYPES,
    has_coords=QUERY_HAS_COORDS_ONLY,
    place_type_or_coords=True,
    include_quote_status=QUERY_INCLUDE_QUOTE_STATUS,
    include_reply_status=QUERY_INCLUDE_REPLY_STATUS,
    include_deleted_status=QUERY_INCLUDE_DELETED_STATUS,
)
activity_prev_hour = RecentTweets.get_recent_tweets(
    session,
    timestamp=tweet_info.created_at
    - timedelta(hours=TEMPORAL_GRANULARITY_HOURS),
    hours=TEMPORAL_GRANULARITY_HOURS,
    place_type=VALID_PLACE_TYPES,
    has_coords=QUERY_HAS_COORDS_ONLY,
    place_type_or_coords=True,
    include_quote_status=QUERY_INCLUDE_QUOTE_STATUS,
    include_reply_status=QUERY_INCLUDE_REPLY_STATUS,
    include_deleted_status=QUERY_INCLUDE_DELETED_STATUS,
)

print('prev day', len(activity_prev_day))
print('curr day', len(activity_curr_day))

print('prev hour', len(activity_prev_hour))
print('curr hour', len(activity_curr_hour))

In [None]:
lon_center = (BOUNDING_BOX[0] + BOUNDING_BOX[2]) / 2
lat_center = (BOUNDING_BOX[1] + BOUNDING_BOX[3]) / 2

zoom_start = 10
min_opacity = 0.25

m = folium.Map(
    [lat_center, lon_center],
    tiles='OpenStreetMap',
    zoom_start=zoom_start,
    width='40%', height='70%',
)

# data_lat_lon = (
#     np.random.normal(size=(100, 3)) *
#     np.array([[1, 1, 1]]) +
#     np.array([[lat_center, lon_center, 1]])
# ).tolist()

# lat_lon_prev = [[x.latitude, x.longitude] for x in activity_prev_day]
# lat_lon_curr = [[x.latitude, x.longitude] for x in activity_curr_day]

lat_lon_prev = [[x.latitude, x.longitude] for x in activity_prev_hour]
lat_lon_curr = [[x.latitude, x.longitude] for x in activity_curr_hour]

tweet_prev = [[x.created_at.strftime('%Y-%m-%d %H:%M:%S'), x.user_screen_name, x.place_name, x.tweet_body] for x in activity_prev_hour]
tweet_curr = [[x.created_at.strftime('%Y-%m-%d %H:%M:%S'), x.user_screen_name, x.place_name, x.tweet_body] for x in activity_curr_hour]

for point, tweet in zip(lat_lon_prev, tweet_prev):
    folium.CircleMarker(
        location=(point[0], point[1]),
        radius=3,
        color="red",
        fill=False,
        weight=3,
        opacity=0.5,
        popup=(point[0], point[1], tweet[0], tweet[1], tweet[2], tweet[3]),
    ).add_to(m)

for point, tweet in zip(lat_lon_curr, tweet_curr):
    folium.CircleMarker(
        location=(point[0], point[1]),
        radius=3,
        color="black",
        fill=False,
        weight=3,
        opacity=0.5,
        popup=(point[0], point[1], tweet[0], tweet[1], tweet[2], tweet[3]),
    ).add_to(m)


folium.plugins.HeatMap(lat_lon_prev, min_opacity=min_opacity).add_to(folium.FeatureGroup(name='Previous').add_to(m))
folium.plugins.HeatMap(lat_lon_curr, min_opacity=min_opacity).add_to(folium.FeatureGroup(name='Current').add_to(m))

folium.LayerControl().add_to(m)

m

In [None]:
# # debug

# ACTIVITY_THRESHOLD_DAY = 2.0
# ACTIVITY_THRESHOLD_HOUR = 10.0

# WEIGHTED = True
# WEIGHTED = False

# WEIGHT_FACTOR_USER = 1.0
# WEIGHT_FACTOR_USER = 2.0

# WEIGHT_FACTOR_LON_LAT = 1.0
# WEIGHT_FACTOR_LON_LAT = 2.0

# WEIGHT_FACTOR_NO_COORDS = 0.0
# WEIGHT_FACTOR_NO_COORDS = 1.0

# BW_METHOD = 0.3
# BW_METHOD = 1.0
# BW_METHOD = 'scott'
# BW_METHOD = 'silverman'

In [None]:
print('ACTIVITY_THRESHOLD_DAY', ACTIVITY_THRESHOLD_DAY)
print('ACTIVITY_THRESHOLD_HOUR', ACTIVITY_THRESHOLD_HOUR)
print('WEIGHTED', WEIGHTED)

print('WEIGHT_FACTOR_USER', WEIGHT_FACTOR_USER)
print('WEIGHT_FACTOR_LON_LAT', WEIGHT_FACTOR_LON_LAT)
print('WEIGHT_FACTOR_NO_COORDS', WEIGHT_FACTOR_NO_COORDS)

print('BW_METHOD', BW_METHOD)

In [None]:
print(len(activity_prev_hour))
[(x.longitude, x.latitude, x.has_coords, x.created_at.strftime('%Y-%m-%d %H:%M:%S')) for x in activity_prev_hour][:100]

In [None]:
print(len(activity_curr_hour))
[(x.longitude, x.latitude, x.has_coords, x.created_at.strftime('%Y-%m-%d %H:%M:%S')) for x in activity_curr_hour][:100]

In [None]:
# Decide whether an event occurred
event_day = False
event_hour = False

if (len(activity_prev_day) > 1) and (len(activity_curr_day) > 1):
    z_diff_day, activity_prev_day_w, activity_curr_day_w = compare_activity_kde(
        grid_coords,
        activity_prev_day,
        activity_curr_day,
        bw_method=BW_METHOD,
        weighted=WEIGHTED,
        weight_factor_user=WEIGHT_FACTOR_USER,
        reduce_weight_lon_lat=REDUCE_WEIGHT_LON_LAT,
        weight_factor_lon_lat=WEIGHT_FACTOR_LON_LAT,
        weight_factor_no_coords=WEIGHT_FACTOR_NO_COORDS,
    )

    lat_activity_day, lon_activity_day = np.where(
        z_diff_day > ACTIVITY_THRESHOLD_DAY
    )

    if (lat_activity_day.size > 0) and (lon_activity_day.size > 0):
        event_day = True

    logger.info(
        f"Day event: {event_day}, current: {len(activity_curr_day)},"
        + f" previous: {len(activity_prev_day)},"
        + f" max diff: {z_diff_day.max():.2f},"
        + f" threshold: {ACTIVITY_THRESHOLD_DAY}"
    )
else:
    logger.info(
        f"Day event: {event_day}, current: {len(activity_curr_day)},"
        + f" previous: {len(activity_prev_day)},"
        + " not enough activity,"
        + f" threshold: {ACTIVITY_THRESHOLD_DAY}"
    )

if (len(activity_prev_hour) > 1) and (len(activity_curr_hour) > 1):
    z_diff_hour, activity_prev_hour_w, activity_curr_hour_w = compare_activity_kde(
        grid_coords,
        activity_prev_hour,
        activity_curr_hour,
        bw_method=BW_METHOD,
        weighted=WEIGHTED,
        weight_factor_user=WEIGHT_FACTOR_USER,
        reduce_weight_lon_lat=REDUCE_WEIGHT_LON_LAT,
        weight_factor_lon_lat=WEIGHT_FACTOR_LON_LAT,
        weight_factor_no_coords=WEIGHT_FACTOR_NO_COORDS,
    )

    lat_activity_hour, lon_activity_hour = np.where(
        z_diff_hour > ACTIVITY_THRESHOLD_HOUR
    )

    if (lat_activity_hour.size > 0) and (lon_activity_hour.size > 0):
        event_hour = True

    logger.info(
        f"Hour event: {event_hour}, current: {len(activity_curr_hour)},"
        + f" previous: {len(activity_prev_hour)},"
        + f" max diff: {z_diff_hour.max():.2f},"
        + f" threshold: {ACTIVITY_THRESHOLD_HOUR}"
    )
else:
    logger.info(
        f"Hour event: {event_hour}, current: {len(activity_curr_hour)},"
        + f" previous: {len(activity_prev_hour)},"
        + " not enough activity,"
        + f" threshold: {ACTIVITY_THRESHOLD_HOUR}"
    )

print('found event:', event_day and event_hour)

In [None]:
if (len(activity_prev_day) > 1) and (len(activity_curr_day) > 1):
    print('day')
    plot_activity_diff(activity_prev_day_w, activity_curr_day_w, z_diff_day, BOUNDING_BOX)

if (len(activity_prev_hour) > 1) and (len(activity_curr_hour) > 1):
    print('hour')
    plot_activity_diff(activity_prev_hour_w, activity_curr_hour_w, z_diff_hour, BOUNDING_BOX)

In [None]:
for x in activity_curr_hour_w:
    print(
        x['user_screen_name'], '---',
        f"{x['weight']:.4f}", '---', x['place_name'], '---',
        x['created_at'].strftime('%Y-%m-%d %H:%M:%S'), '---',
        x['tweet_body'],
    )

In [None]:
if (len(activity_prev_day) > 1) and (len(activity_curr_day) > 1):
    print('day')
    fig, ax = plt.subplots()
    cmap = plt.cm.coolwarm
    im = ax.imshow(z_diff_day > ACTIVITY_THRESHOLD_DAY, aspect=x_flat.ptp() / y_flat.ptp(), cmap=cmap, extent=[xmin, xmax, ymin, ymax])
    plt.show()

if (len(activity_prev_hour) > 1) and (len(activity_curr_hour) > 1):
    print('hour')
    fig, ax = plt.subplots()
    cmap = plt.cm.coolwarm
    im = ax.imshow(z_diff_hour > ACTIVITY_THRESHOLD_HOUR, aspect=x_flat.ptp() / y_flat.ptp(), cmap=cmap, extent=[xmin, xmax, ymin, ymax])
    plt.show()

In [None]:
# lat_activity, lon_activity = np.where(z_diff_day > ACTIVITY_THRESHOLD_DAY)

# print(lon_activity)
# print(lat_activity)

# print(x_flat[lon_activity])
# print(y_flat[lat_activity])

In [None]:
# lat_activity, lon_activity = np.where(z_diff_hour > ACTIVITY_THRESHOLD_HOUR)

# print(lon_activity)
# print(lat_activity)

# print(x_flat[lon_activity])
# print(y_flat[lat_activity])

In [None]:
from sklearn.cluster import DBSCAN
KMS_PER_RADIAN = 6371.0088

activity = activity_curr_hour_w
# activity = activity_curr_day_w

min_samples = EVENT_MIN_TWEETS

km_start = 0.05
km_stop = 0.33
km_step = 10
sample_weight = [x["weight"] for x in activity]
sample_weight = None
min_n_clusters = 1

kms = np.linspace(km_start, km_stop, km_step)
_eps = [(km / KMS_PER_RADIAN) for km in kms]

# haversine metric requires radians
lons, lats = get_coords(activity)
X = np.radians([[lon, lat] for lon, lat in zip(lons, lats)])

unique_labels = []
for km, eps in zip(kms, _eps):
    logger.info(f'Clustering, max neighbor distance {km:.3f} km')
    db = DBSCAN(eps=eps, min_samples=min_samples, algorithm='ball_tree', metric='haversine')
    db.fit(X, sample_weight=sample_weight)

    # label -1 means not assigned to a cluster
    unique_labels = [x for x in set(db.labels_) if x != -1]
    logger.info(f'Found {len(unique_labels)} clusters')

    if len(unique_labels) >= min_n_clusters:
        break

clusters = {}
for k in unique_labels:
    cluster_mask = (db.labels_ == k)
    cluster_tweets = [x for i, x in enumerate(activity) if cluster_mask[i]]
    clusters[k] = {
        'event_tweets': cluster_tweets,
    }

In [None]:
print(clusters.keys())

for idx in clusters.keys():
    print("=" * 20)
    print(f"Cluster {idx}")

    cluster = clusters[idx]

    print(f"Tweets: {len(cluster['event_tweets'])}")

    for x in cluster['event_tweets']:
        print((x['longitude'], x['latitude']))

    for x in cluster['event_tweets']:
        print(x['user_screen_name'], '---', f"{x['weight']:.4f}", '---', x['place_name'], '---', x['tweet_body'])

    print(
        get_tokens_to_tweet(
            cluster['event_tweets'],
            token_count_min=2,
            reduce_token_count_min=True,
            remove_username_at=REMOVE_USERNAME_AT,
        )
    )

    event_info = get_event_info(
        twitter,
        event_tweets=cluster['event_tweets'],
        tweet_max_length=TWEET_MAX_LENGTH,
        tweet_url_length=TWEET_URL_LENGTH,
        base_event_url=BASE_EVENT_URL,
        token_count_min=TOKEN_COUNT_MIN,
        reduce_token_count_min=REDUCE_TOKEN_COUNT_MIN,
        remove_username_at=REMOVE_USERNAME_AT,
        tweet_lat_lon=TWEET_LAT_LON,
        show_tweets_on_event=SHOW_TWEETS_ON_EVENT,
    )