In [1]:
import ast

import pandas as pd
import os
from shapely.geometry import LineString
import geopandas as gpd
import geopy.distance
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import shapely.wkt
import matplotlib as mpl
from matplotlib.ticker import ScalarFormatter
import matplotlib.colors as mplc
from mpl_toolkits.axes_grid1 import make_axes_locatable



import scipy.stats
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import summary_table
from adjustText import adjust_text

from pathlib import Path

In [2]:
cwd = Path.cwd()
parent_dir = cwd.parent.parent

parent_dir

PosixPath('/home/veror/Desktop/Tweet2Geo')

## Read data

In [3]:
AREA = 'SA'
# merge iso2 - iso3
# geodata dataset
gdf = gpd.read_file(parent_dir / 'Data' / 'ne_50m_admin_0_countries.shp')
# Molloweide projection
#gdf = gdf.to_crs("ESRI:54009")
# iso2 - iso3 dataset
iso_2_3_df = pd.read_csv(parent_dir / 'Data' / 'iso2-3 south america.csv')
# merge -> with GF and SU
df_country = pd.merge(gdf, iso_2_3_df, on='ADM0_A3', how='right')
#df_country.to_csv('Useful documents/info_country tot SA.csv', index=False)

## Choroplets

### Out-flows

In [4]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.gridspec as gridspec

# --- Set directories ---
flows_dir =parent_dir / 'Data' / 'South America' / '0 fluxes 0.5'
save_dir = parent_dir / 'Data analysis' / 'Choroplets' / 'SA' / 'out_flows_0.5'

# --- Read files ---
flows_files = os.listdir(flows_dir)
legend_fontsize = 18
colorsource = 'firebrick'

# --- Loop through flow files ---
for file in flows_files:
    country = file[:2]

    df_flow = pd.read_csv(flows_dir / file)
    df_flow = df_flow[~(df_flow['ISO 2'] == country)]

    print(f"Processing: {country}")

    # Get country of origin geometry
    df_source = df_country[df_country['ISO 2'] == country]
    df_flow = pd.merge(df_country, df_flow, on='ISO 2', how='left')

    # Ensure geopandas formats
    df_flow = gpd.GeoDataFrame(df_flow, geometry='geometry', crs='EPSG:3857')
    df_source = gpd.GeoDataFrame(df_source, geometry='geometry', crs='EPSG:3857')

    # --- Plotting with GridSpec ---
    fig = plt.figure(figsize=(7, 6), dpi=250)
    gs = gridspec.GridSpec(1, 2, width_ratios=[0.95, 0.05], wspace=0.04)

    ax = fig.add_subplot(gs[0])
    cax = fig.add_subplot(gs[1])

    # Plot flow map
    df_flow.plot(column='flux_percentage',
                 legend=True,
                 missing_kwds={'color': 'lightgrey'},
                 legend_kwds={'label': 'Tweets out-flows percentage [%]'},  # Leave empty, set manually
                 ax=ax,
                 cmap='Blues',
                 cax=cax,
                 rasterized=True)

    # Plot borders
    df_flow.boundary.plot(color='black', ax=ax, linewidth=0.5)

    # Highlight source country
    df_source.plot(color=colorsource, alpha=0.5, ax=ax)
    df_source.boundary.plot(color=colorsource, linewidth=0.5, ax=ax)

    # Add custom legend
    try:
        source_label = df_source['name'].values[0]  # Or use another appropriate column
    except:
        source_label = country

    if source_label == 'CO':
        source_label = 'Colombia'

    legend_handles = [mlines.Line2D([0], [0], color=colorsource, lw=4, label=f'Source: {source_label}')]
    ax.legend(handles=legend_handles, loc='lower left', fontsize=12, frameon=False)

    # Format colorbar
    cax.set_ylabel('Tweets out-flows percentage [%]', rotation=90, fontsize=18)
    cax.tick_params(labelsize=16)

    # Remove axes
    ax.tick_params(axis='both', labelsize=10)
    ax.axis('off')

    cax.yaxis.set_ticks_position('right')
    cax.yaxis.set_label_position('right')
    cax.set_xticks([])

    # Save
    file = country + '_choroplet.pdf'
    fig.savefig(save_dir / file, dpi=250, bbox_inches='tight')
    plt.close()


Processing: PY
Processing: BO
Processing: CO
Processing: VE
Processing: BR
Processing: UY
Processing: CL
Processing: PE
Processing: AR
Processing: EC


## inflows

In [5]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.gridspec as gridspec

# Directories
flows_dir = parent_dir / 'Data' / 'South America' / '0 fluxes 0.5'
flows_files = os.listdir(flows_dir)
save_dir = parent_dir / 'Data analysis' / 'Choroplets' / 'SA' / 'in_flows_0.5'

# Parameters
legend_fontsize = 18
colorsource = 'firebrick'

def adjust_colorbar(cb):
    cb.ax.tick_params(labelsize=legend_fontsize + 4)
    cb.ax.yaxis.label.set_size(legend_fontsize + 6)

# Loop through each destination country
for file in flows_files:
    country = file[:2]
    print(f"Processing {country}")

    # Initialize empty inflow DataFrame
    df_in_flows = pd.DataFrame(columns=[])

    # Collect all in-flows from other countries toward this `country`
    for file1 in flows_files:
        country1 = file1[:2]
        if country1 != country:
            df_flow1 = pd.read_csv(os.path.join(flows_dir, file1))
            df_flow1 = df_flow1[df_flow1['ISO 2'] == country].reset_index(drop=True)
            if not df_flow1.empty:
                df_flow1.at[0, 'ISO 2'] = country1
                df_in_flows = pd.concat([df_in_flows, df_flow1], ignore_index=True)

    # Merge with country geometry
    df_source = df_country[df_country['ISO 2'] == country]
    df_in_flows_tot = pd.merge(df_country, df_in_flows, on='ISO 2', how='left')

    # Convert to GeoDataFrames
    df_in_flows_tot = gpd.GeoDataFrame(df_in_flows_tot, geometry='geometry', crs='EPSG:3857')
    df_source = gpd.GeoDataFrame(df_source, geometry='geometry', crs='EPSG:3857')

    # --- Plotting with GridSpec (same as outflows style) ---
    fig = plt.figure(figsize=(7, 6), dpi=250)
    gs = gridspec.GridSpec(1, 2, width_ratios=[0.95, 0.05], wspace=0.04)

    ax = fig.add_subplot(gs[0])
    cax = fig.add_subplot(gs[1])

    # Choropleth plot
    df_in_flows_tot.plot(
        column='flux_percentage',
        legend=True,
        missing_kwds={'color': 'lightgrey'},
        legend_kwds={'label': 'Tweets in-flows percentage [%]'},
        ax=ax,
        cmap='Blues',
        cax=cax,
        rasterized=True
    )

    df_in_flows_tot.boundary.plot(color='black', ax=ax, linewidth=0.5)

    # Highlight the destination country
    df_source.plot(color=colorsource, alpha=0.5, ax=ax)
    df_source.boundary.plot(color=colorsource, linewidth=0.5, ax=ax)

    # Legend handle
    try:
        legend_label = df_source['country'].values[0] if 'country' in df_source.columns else country
    except:
        legend_label = country

    if legend_label == 'CO':
        legend_label = 'Colombia'

    legend_handles = [
        mlines.Line2D([0], [0], color=colorsource, lw=4, label=f'Target: {legend_label}')
    ]
    ax.legend(handles=legend_handles, loc='lower left', fontsize=12, frameon=False)

    # Colorbar adjustments
    cax.set_ylabel('Tweets in-flows percentage [%]', rotation=90, fontsize=18)
    cax.tick_params(labelsize=16)

    # Axis formatting
    ax.axis('off')
    cax.yaxis.set_ticks_position('right')
    cax.yaxis.set_label_position('right')
    cax.set_xticks([])

    # Save
    file = country + '_choroplet.pdf'
    fig.savefig(save_dir / file, dpi=250, bbox_inches='tight')
    plt.close()


Processing PY
Processing BO
Processing CO
Processing VE
Processing BR
Processing UY
Processing CL
Processing PE
Processing AR
Processing EC


### Self-loops

In [7]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

# Directories
flows_dir = parent_dir / 'Data' / 'South America' / '0 fluxes 0.5'
flows_files = os.listdir(flows_dir)
save_dir = parent_dir / 'Data analysis' / 'Choroplets' / 'SA'
legend_fontsize = 18

# Initialize container
df_self_loops = pd.DataFrame(columns=[])

# Process each flow file
for file in flows_files:
    df_flow = pd.read_csv(os.path.join(flows_dir, file))
    country = file[:2]

    # Extract self-loop row (flows from country -> same country)
    country_row = df_flow[df_flow['ISO 2'] == country]

    # Merge with geometry
    if not country_row.empty:
        df_merged = pd.merge(df_country, country_row, on='ISO 2')
        df_self_loops = pd.concat([df_self_loops, df_merged], ignore_index=True)

# Define color range
vmin = df_self_loops['flux_percentage'].min()
vmax = df_self_loops['flux_percentage'].max()

# Convert to GeoDataFrame
df_self_loops = gpd.GeoDataFrame(df_self_loops, geometry='geometry', crs='EPSG:3857')

# Plotting with GridSpec
fig = plt.figure(figsize=(7, 6), dpi=250)
gs = gridspec.GridSpec(1, 2, width_ratios=[0.95, 0.05], wspace=0.04)
ax = fig.add_subplot(gs[0])
cax = fig.add_subplot(gs[1])

# Plot choropleth
df_self_loops.plot(
    column='flux_percentage',
    ax=ax,
    legend=True,
    missing_kwds={'color': 'lightgrey'},
    legend_kwds={'label': ''},
    vmin=vmin,
    vmax=vmax,
    cmap='GnBu',
    cax=cax,
    rasterized=True
)

# Add boundaries
df_self_loops.boundary.plot(color='black', linewidth=0.5, ax=ax)

# Format colorbar
cax.set_ylabel('Percentage of self-loops [%]', rotation=90, fontsize=18)
cax.tick_params(labelsize=16)
cax.yaxis.set_ticks_position('right')
cax.yaxis.set_label_position('right')
cax.set_xticks([])

# Remove axis
ax.axis('off')

# Save figure
fig.savefig(save_dir / 'self_loops_0.5_choroplet.pdf', dpi=250, bbox_inches='tight')
plt.close()
