
# 04 · GBDT Ranker for Task‑A (self-contained)

This notebook trains a Gradient Boosted Decision Tree **ranker** to predict a vessel's **very next destination** (Task‑A).
It **does not rely on `taskA_candidate_pairs.parquet` generated by 03**. Instead, it rebuilds the candidate pairs and features (with caching) and then trains:

- **Preferred**: `LightGBMRanker` (`objective="lambdarank"`) if LightGBM is available.
- **Fallback**: `HistGradientBoostingClassifier` + one-hot encoding if LightGBM is not available.

The pipeline:
1. Load `samples_taskA.parquet` (or rebuild from `port_calls.cleaned.parquet` if missing).
2. Temporal split: Train / Valid / Test (via `utils.splits.temporal_split`).
3. Build candidates per sample (Top‑N historical + Geo‑M + GlobalTop1; cached).
4. Attach port-side features (coords/regions/degrees) and sample-side features (vessel attrs, seasonality, last leg speed, etc.).
5. **Group-aware downsampling** on train (keep all positives; sample negatives per group).
6. Train & evaluate the ranker; export metrics and top‑K predictions.


In [1]:
#Adjustable: Add parent directory (which contains utils/) to Python search path
import sys, os
sys.path.append(os.path.abspath(".."))  #  notebooks  sys.path

In [2]:
import polars as pl
import numpy as np, pandas as pd, json, joblib, warnings

# Fallback — Logistic with sparse OHE to reduce memory
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
import lightgbm as lgb

# Use all cores for Polars
os.environ["POLARS_MAX_THREADS"] = str(os.cpu_count())

# Display setup
pl.Config.set_tbl_rows(5)
pl.Config.set_tbl_cols(12)

# String cache for joins (compat across Polars versions)
if hasattr(pl, "enable_string_cache"):
    pl.enable_string_cache()   # Polars ≥1.0
elif hasattr(pl, "toggle_string_cache"):
    pl.toggle_string_cache(True)

# Utils
from utils.config import DATA_DIR, INTERIM_DIR, PROCESSED_DIR
from utils.etl_clean import ensure_interim
from utils.splits import temporal_split, add_crisis_flag
from utils.candidates import (
    build_origin_next_transitions, global_mf_next, build_pc_coords,
    build_conditional_transitions, build_candidates_for_split, global_top_list
)
from utils.features import build_ports_attr, compute_port_degree, attach_port_side, build_sample_side, merge_all_features
from utils.metrics import eval_topk_mrr

print("DATA_DIR     =", DATA_DIR)
print("INTERIM_DIR  =", INTERIM_DIR)
print("PROCESSED_DIR=", PROCESSED_DIR)

DATA_DIR     = /Users/wangwei/Documents/Folders/工作/Kpler/data/raw
INTERIM_DIR  = /Users/wangwei/Documents/Folders/工作/Kpler/data/interim
PROCESSED_DIR= /Users/wangwei/Documents/Folders/工作/Kpler/data/processed


In [3]:
# Load samples (require precomputed by 01/03)
samples_path = PROCESSED_DIR / "samples_taskA.parquet"
assert samples_path.exists(), f"Missing {samples_path}. Please run notebooks 01/03 first or set KPLER_PROCESSED_DIR."
samples = pl.read_parquet(samples_path)

# Ensure temporal dtype
if samples.schema.get("call_ts") == pl.Utf8:
    samples = samples.with_columns(pl.col("call_ts").str.strptime(pl.Datetime, strict=False))

# Temporal split
train, val, test = temporal_split(samples)
train = add_crisis_flag(train); val = add_crisis_flag(val); test = add_crisis_flag(test)

print("Train rows:", len(train), " Val rows:", len(val), " Test rows:", len(test))

Train rows: 260606  Val rows: 56201  Test rows: 21553


In [4]:
# Build sample-side features early for conditional transitions
cleaned = INTERIM_DIR / "port_calls.cleaned.parquet"
if not cleaned.exists():
    _ = ensure_interim()
pc = pl.read_parquet(cleaned)
vs = pl.read_csv(DATA_DIR / "vessels.csv", try_parse_dates=True) if (DATA_DIR / "vessels.csv").exists() else pl.DataFrame({})
s_side   = build_sample_side(samples, pc, vs)

# Build transitions and coords
cleaned = INTERIM_DIR / "port_calls.cleaned.parquet"
trans = build_origin_next_transitions(train)
g_top  = global_mf_next(trans)
# g_top3 = global_top_list(trans, K=3)
g_top5 = global_top_list(trans, K=5)

# Prefer cleaned parquet; if missing and raw also missing, derive coords from cached candidates
pc_coords = None
if cleaned.exists():
    pc = pl.read_parquet(cleaned)
    pc_coords = build_pc_coords(pc)
else:
    # Try to synthesize coordinates from cached candidates if available
    cache_train = PROCESSED_DIR / "cand_train_cached.parquet"
    cache_val   = PROCESSED_DIR / "cand_val_cached.parquet"
    cc = []
    if cache_train.exists(): cc.append(pl.read_parquet(cache_train))
    if cache_val.exists():   cc.append(pl.read_parquet(cache_val))
    if cc:
        cc = pl.concat(cc) if len(cc)>1 else cc[0]
        origin_coords = (cc.select(["origin","lat_o","lon_o"]).rename({"origin":"destination","lat_o":"lat","lon_o":"lon"}))
        cand_coords   = (cc.select(["candidate","lat_c","lon_c"]).rename({"candidate":"destination","lat_c":"lat","lon_c":"lon"}))
        pc_coords = pl.concat([origin_coords, cand_coords]).drop_nulls().unique()
    else:
        # As last resort, rebuild cleaned parquet (requires raw files present)
        _ = ensure_interim()
        pc = pl.read_parquet(cleaned)
        pc_coords = build_pc_coords(pc)

# Candidate caches + Conditional transitions (by vessel_type+dwt_bucket)
cache_train = PROCESSED_DIR / "cand_train_cached.parquet"
cache_val   = PROCESSED_DIR / "cand_val_cached.parquet"
cache_test  = PROCESSED_DIR / "cand_test_cached.parquet"
FORCE_REBUILD = True  # set True to ignore cache and regenerate

cond_cols = [c for c in ["vessel_type","dwt_bucket","product_family_dom"] if c in s_side.columns or c in samples.columns]
# Enrich splits with conditional columns
def enrich_with_conditions(split_df: pl.DataFrame) -> pl.DataFrame:
    add_cols = ["sample_port_call_id"] + [c for c in cond_cols if c in s_side.columns]
    out = split_df.join(s_side.select(add_cols), on="sample_port_call_id", how="left")
    # product_family_dom may already be in samples; ensure present
    if "product_family_dom" in samples.columns and "product_family_dom" not in out.columns:
        out = out.join(samples.select(["sample_port_call_id","product_family_dom"]), on="sample_port_call_id", how="left")
    return out
train_en = enrich_with_conditions(train)
val_en   = enrich_with_conditions(val)
test_en  = enrich_with_conditions(test)

# Build conditional transitions on training split
trans_cond = build_conditional_transitions(train_en, condition_cols=[c for c in ["vessel_type","dwt_bucket","product_family_dom"] if c in train_en.columns], top_k=10)

if cache_train.exists() and not FORCE_REBUILD:
    cand_train = pl.read_parquet(cache_train)
else:
    cand_train = build_candidates_for_split(train_en, trans, pc_coords, add_true_label=True, N=25, M=30, global_top1=g_top, global_top_list=g_top5, trans_cond=trans_cond, condition_cols=[c for c in ["vessel_type","dwt_bucket","product_family_dom"] if c in train_en.columns], M_stages=[15,30], max_cands_per_sample=60)
    cand_train.write_parquet(cache_train)

if cache_val.exists() and not FORCE_REBUILD:
    cand_val = pl.read_parquet(cache_val)
else:
    cand_val = build_candidates_for_split(val_en,   trans, pc_coords, add_true_label=True, N=25, M=30, global_top1=g_top, global_top_list=g_top5, trans_cond=trans_cond, condition_cols=[c for c in ["vessel_type","dwt_bucket","product_family_dom"] if c in val_en.columns], M_stages=[20,35,50], max_cands_per_sample=60)
    cand_val.write_parquet(cache_val)

if cache_test.exists() and not FORCE_REBUILD:
    cand_test = pl.read_parquet(cache_test)
else:
    cand_test = build_candidates_for_split(test_en,  trans, pc_coords, add_true_label=False, N=25, M=30, global_top1=g_top, global_top_list=g_top5, trans_cond=trans_cond, condition_cols=[c for c in ["vessel_type","dwt_bucket","product_family_dom"] if c in test_en.columns], M_stages=[20,35,50], max_cands_per_sample=80)
    cand_test.write_parquet(cache_test)

def assert_no_dup_names(df: pl.DataFrame, name: str):
    cols = df.columns
    dups = [c for c in set(cols) if cols.count(c) > 1]
    assert not dups, f"{name} has duplicate columns: {dups}"

assert_no_dup_names(cand_train, "cand_train")
assert_no_dup_names(cand_val,   "cand_val")
assert_no_dup_names(cand_test,  "cand_test")

print("cand_train:", cand_train.shape, "cand_val:", cand_val.shape, "cand_test:", cand_test.shape)

cand_train: (13038518, 9) cand_val: (3365558, 9) cand_test: (1442035, 9)


In [5]:
# Port-side attributes & degrees
ports_attr  = build_ports_attr(pc_coords)
port_degree = compute_port_degree(trans)

# Helper: candidate recall
def candidate_recall(df: pl.DataFrame) -> float:
    return float(df.group_by("sample_port_call_id").agg(pl.col("y").max()).select(pl.col("y").mean()).item())

# Sample-side features (vessel attrs, seasonal, last leg speed, etc.)
vs = pl.read_csv(DATA_DIR / "vessels.csv", try_parse_dates=True) if (DATA_DIR / "vessels.csv").exists() else pl.DataFrame({})
s_side = build_sample_side(samples, pc, vs)

# --- Train ---
cand_train  = attach_port_side(cand_train, ports_attr, port_degree, compute_distance=False)
cand_train  = merge_all_features(cand_train, s_side, train)
assert_no_dup_names(cand_train, "cand_train (post-merge)")
train_out = PROCESSED_DIR / "cand_train_enriched.parquet"
cand_train.write_parquet(train_out)
del cand_train; import gc; gc.collect()

# --- Val ---
cand_val    = attach_port_side(cand_val, ports_attr, port_degree, compute_distance=False)
cand_val    = merge_all_features(cand_val, s_side, val)
assert_no_dup_names(cand_val,   "cand_val (post-merge)")
val_recall = candidate_recall(cand_val)
print(f"VAL candidate recall:  {val_recall:.2%}")
val_out = PROCESSED_DIR / "cand_val_enriched.parquet"
cand_val.write_parquet(val_out)
del cand_val; import gc; gc.collect()

# --- Test ---
cand_test   = attach_port_side(cand_test, ports_attr, port_degree, compute_distance=False)
cand_test   = merge_all_features(cand_test, s_side, test)
assert_no_dup_names(cand_test,  "cand_test (post-merge)")
test_recall = candidate_recall(cand_test)
print(f"TEST candidate recall: {test_recall:.2%}")
test_out = PROCESSED_DIR / "cand_test_enriched.parquet"
cand_test.write_parquet(test_out)
del cand_test; import gc; gc.collect()

# Combine pairs at the end (read from enriched files)
pairs_out = PROCESSED_DIR / "taskA_candidate_pairs.parquet"
cand_train_en = pl.read_parquet(train_out).with_columns(pl.lit("train").alias("split"))
cand_val_en   = pl.read_parquet(val_out).with_columns(pl.lit("valid").alias("split"))
cand_test_en  = pl.read_parquet(test_out).with_columns(pl.lit("test").alias("split"))
pl.concat([cand_train_en, cand_val_en, cand_test_en]).write_parquet(pairs_out)
print("Saved candidate pairs to:", pairs_out)

VAL candidate recall:  100.00%
TEST candidate recall: 87.38%
Saved candidate pairs to: /Users/wangwei/Documents/Folders/工作/Kpler/data/processed/taskA_candidate_pairs.parquet


In [6]:
%%time
# Group-aware negative downsampling (keep all positives per sample)
def downsample_groupwise(df: pl.DataFrame, neg_per_pos:int=15, seed:int=42) -> pl.DataFrame:
    out = []
    rng = np.random.default_rng(seed)
    for sid, sub in df.group_by("sample_port_call_id", maintain_order=True):
        pos = sub.filter(pl.col("y")==1)
        neg = sub.filter(pl.col("y")==0)
        if len(pos) == 0:
            k = min(20, len(neg))
            if k > 0:
                keep_idx = rng.choice(len(neg), size=k, replace=False)
                out.append(neg[keep_idx])
            continue
        k = min(len(neg), neg_per_pos * len(pos))
        if k > 0:
            keep_idx = rng.choice(len(neg), size=k, replace=False)
            out.append(pl.concat([pos, neg[keep_idx]]))
        else:
            out.append(pos)
    return pl.concat(out) if out else df.head(0)

train_keep_cols = ["sample_port_call_id","origin","candidate","label","y"]

# Numeric features
num_cols = [
    "dist_km","is_same_region","in_cnt","out_cnt","age",
    "prev_dist_km","last_leg_knots_est",
    "month_sin","month_cos","dow_sin","dow_cos",
    "is_crisis_time","dist_x_crisis"
]

# Paths to enriched candidate parquet files (written in previous cell)
train_enriched_path = PROCESSED_DIR / "cand_train_enriched.parquet"
val_enriched_path   = PROCESSED_DIR / "cand_val_enriched.parquet"
test_enriched_path  = PROCESSED_DIR / "cand_test_enriched.parquet"

# extra numeric if available
cand_cols = pl.read_parquet(val_enriched_path).columns if val_enriched_path.exists() else []
extra_num = [c for c in [
    "prior_prob_oc","hist_cnt_oc","prior_prob_oc_vtype","prior_prob_oc_dwt",
    "prior_prob_oc_laden","prior_prob_oc_pf",
    "geo_rank_in_sample","log_dist_km","cand_is_waypoint",
    "in_cnt_cand","out_cnt_cand"
] if c in cand_cols]
num_cols = num_cols + extra_num

# Categorical features
cat_cols = [c for c in ["origin","candidate","vessel_type","dwt_bucket","product_family_dom"] if c in cand_cols]

# Build unique keep cols and backfill missing columns across splits
keep_cols = list(dict.fromkeys(train_keep_cols + num_cols + cat_cols))
def ensure_cols(df: pl.DataFrame, cols):
    missing = [c for c in cols if c not in df.columns]
    for c in missing:
        df = df.with_columns((pl.lit(0.0) if c in num_cols else pl.lit("unk")).alias(c))
    return df

def load_unique(path) -> pl.DataFrame:
    df = pl.read_parquet(path)
    df = ensure_cols(df, keep_cols)
    return df.select(keep_cols).unique(subset=["sample_port_call_id","candidate"])

cand_train_u = load_unique(train_enriched_path)
cand_val_u   = load_unique(val_enriched_path)
cand_test_u  = load_unique(test_enriched_path)

# Memory guard: subsample validation/test for demonstration
def subsample(df: pl.DataFrame, n:int=100_000, seed:int=42) -> pl.DataFrame:
    return df if len(df) <= n else df.sample(n=n, seed=seed)
cand_val_u  = subsample(cand_val_u,  n=100_000)
cand_test_u = subsample(cand_test_u, n=100_000)

# Optional pre-sample training set to control memory (similar to 03)
if len(cand_train_u) > 700_000:
    cand_train_u = cand_train_u.sample(n=700_000, seed=42)
cand_train_ds = downsample_groupwise(cand_train_u, neg_per_pos=12, seed=42)

print("Train (unique):", cand_train_u.shape, " -> Downsampled:", cand_train_ds.shape)
print("Val   (unique):", cand_val_u.shape)
print("Test  (unique):", cand_test_u.shape)

# Disable LightGBM in this run to avoid heavy memory/OMP issues; use logistic fallback
use_lgbm = True
print("LightGBM disabled for this run; using logistic fallback.")

def prep_for_lgb(df_pl: pl.DataFrame):
    df_pd = df_pl.select(keep_cols).to_pandas()
    for c in cat_cols:
        df_pd[c] = df_pd[c].astype("category")
    for c in num_cols:
        if c in df_pd.columns:
            df_pd[c] = pd.to_numeric(df_pd[c], errors="coerce").replace([np.inf, -np.inf], np.nan)
    X = df_pd[num_cols + cat_cols]
    y = df_pd["y"].astype(int).values
    group_sizes = df_pd.groupby("sample_port_call_id")["candidate"].size().values.tolist()
    meta = df_pd[["sample_port_call_id","origin","candidate","label"]]
    return X, y, group_sizes, meta

if use_lgbm:
    Xtr, ytr, gtr, mtr = prep_for_lgb(cand_train_ds)
    Xva, yva, gva, mva = prep_for_lgb(cand_val_u)
    Xte, yte, gte, mte = prep_for_lgb(cand_test_u)

    ranker = lgb.LGBMRanker(
        objective="lambdarank",
        metric="map",
        learning_rate=0.1,
        n_estimators=400,
        num_leaves=45,
        max_depth=-1,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        n_jobs=2
    )
    ranker.fit(
        Xtr, ytr, group=gtr,
        eval_set=[(Xva, yva)],
        eval_group=[gva],
        eval_at=[1,3,5],
        categorical_feature=cat_cols,
        callbacks=[
            lgb.early_stopping(stopping_rounds=150, verbose=False),
            lgb.log_evaluation(50)
        ]
    )

    def rank_predict_ranker(model, X, meta):
        s = model.predict(X, num_iteration=model.best_iteration_)
        meta2 = meta.copy()
        meta2["score"] = s
        preds, truth = [], []
        for sid, g in meta2.groupby("sample_port_call_id"):
            g2 = g.sort_values("score", ascending=False)
            preds.append(g2["candidate"].tolist())
            truth.append(g2["label"].iloc[0])
        return preds, truth

    preds_val, truth_val = rank_predict_ranker(ranker, Xva, mva)
    preds_te,  truth_te  = rank_predict_ranker(ranker, Xte, mte)

    m_val  = eval_topk_mrr([p[:5] for p in preds_val], truth_val, ks=(1,3,5))
    m_test = eval_topk_mrr([p[:5] for p in preds_te],  truth_te,  ks=(1,3,5))

    print("VAL:",  m_val)
    print("TEST:", m_test)

    # Save model
    outm = PROCESSED_DIR / "model_taskA_lgbm_ranker.txt"
    ranker.booster_.save_model(outm)
    with open(PROCESSED_DIR / "metrics_gbdt.json", "w") as f:
        json.dump({"val": m_val, "test": m_test}, f, indent=2)
    print("Saved model:", outm)

else:
    def to_xy(df_pl: pl.DataFrame):
        df2 = df_pl.select(keep_cols).unique(subset=["sample_port_call_id","candidate"])
        pdf = df2.to_pandas()
        X = pdf[num_cols + cat_cols]
        y = pdf["y"].astype(int).values
        meta = pdf[["sample_port_call_id","origin","candidate","label"]]
        return X, y, meta

    Xtr, ytr, mtr = to_xy(cand_train_ds)
    Xva, yva, mva = to_xy(cand_val_u)
    Xte, yte, mte = to_xy(cand_test_u)

    numeric = Pipeline([("imp", SimpleImputer(strategy="median")),
                        ("std", StandardScaler())])
    preproc = ColumnTransformer(
        transformers=[("num", numeric, num_cols),
                      ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=True), cat_cols)],
        remainder="drop"
    )

    clf = LogisticRegression(max_iter=200, solver="saga", n_jobs=-1, verbose=0)
    pipe = Pipeline([("prep", preproc), ("clf", clf)])
    pipe.fit(Xtr, ytr)

    def rank_predict_clf(pipe, X, meta):
        if hasattr(pipe.named_steps['clf'], 'predict_proba'):
            proba = pipe.predict_proba(X)[:, 1]
        else:
            proba = pipe.decision_function(X)
        meta2 = meta.copy()
        meta2["score"] = proba
        preds, truth = [], []
        for sid, g in meta2.groupby("sample_port_call_id"):
            g2 = g.sort_values("score", ascending=False)
            preds.append(g2["candidate"].tolist())
            truth.append(g2["label"].iloc[0])
        return preds, truth

    preds_val, truth_val = rank_predict_clf(pipe, Xva, mva)
    preds_te,  truth_te  = rank_predict_clf(pipe, Xte, mte)

    m_val  = eval_topk_mrr([p[:5] for p in preds_val], truth_val, ks=(1,3,5))
    m_test = eval_topk_mrr([p[:5] for p in preds_te],  truth_te,  ks=(1,3,5))

    print("VAL:",  m_val)
    print("TEST:", m_test)

    outm = PROCESSED_DIR / "model_taskA_hgb_ranker.joblib"
    joblib.dump(pipe, outm)
    with open(PROCESSED_DIR / "metrics_gbdt.json", "w") as f:
        json.dump({"val": m_val, "test": m_test}, f, indent=2)
    print("Saved model:", outm)

# Dump Top‑5
def dump_topk(meta, preds, split_name, k=5):
    rows = []
    # meta can be pandas DataFrame from prep
    sids = meta["sample_port_call_id"].unique()
    for sid, plist in zip(sids, preds):
        rows.append({"sample_port_call_id": sid, "pred_topk": plist[:k]})
    outp = PROCESSED_DIR / f"rank_top{str(k)}_{split_name}.parquet"
    pl.DataFrame(rows).write_parquet(outp)
    print("Saved:", outp)

dump_topk(mva, preds_val, "val", k=5)
dump_topk(mte, preds_te,  "test", k=5)

Train (unique): (700000, 21)  -> Downsampled: (700000, 21)
Val   (unique): (100000, 21)
Test  (unique): (100000, 21)
LightGBM disabled for this run; using logistic fallback.
[LightGBM] [Info] Total groups: 243349, total data: 700000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011747 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4236
[LightGBM] [Info] Number of data points in the train set: 700000, number of used features: 15
[LightGBM] [Info] Total groups: 46898, total data: 100000
[50]	valid_0's map@1: 0.991684	valid_0's map@3: 0.995252	valid_0's map@5: 0.995367
[100]	valid_0's map@1: 0.991876	valid_0's map@3: 0.995345	valid_0's map@5: 0.995455
[150]	valid_0's map@1: 0.99194	valid_0's map@3: 0.99541	valid_0's map@5: 0.995505
[200]	valid_0's map@1: 0.99194	valid_0's map@3: 0.995369	valid_0's map@5: 0.995502
[250]	valid_0's map@1: 

In [7]:
cat_cols = ["origin", "candidate", "vessel_type", "dwt_bucket", "product_family_dom"]

summary = []
for col in cat_cols:
    # 去重+去空值得到类别列表
    uniq = (
        cand_train_u.select(pl.col(col))
          .drop_nulls()
          .unique()
          .sort(col)
          .get_column(col)
          .to_list()
    )
    summary.append({
        "column": col,
        "n_unique": len(uniq),
        "values": uniq,            
    })

cat_summary = pl.DataFrame(summary)
cat_summary

column,n_unique,values
str,i64,list[str]
"""origin""",1980,"[""AMPCO"", ""ASENG FPSO"", … ""Zueitina""]"
"""candidate""",2038,"[""AMPCO"", ""ASENG FPSO"", … ""Zueitina""]"
"""vessel_type""",13,"[""Asphalt/Bitumen Tanker"", ""Bulk Carrier"", … ""Products Tanker ""]"
"""dwt_bucket""",5,"[""10–50k"", ""120–200k"", … ""<10k""]"
"""product_family_dom""",12,"[""ammonia"", ""chem/bio"", … ""olefins""]"


In [8]:
label_stats = (
    cand_train_u
    .select(pl.col("label"))
    .drop_nulls()
    .unique()
    .sort("label")
)

n_labels = label_stats.height
label_list = label_stats.get_column("label").to_list()

print(f"Distinct labels: {n_labels}")
print(label_list[:10])                 

Distinct labels: 2019
['AMPCO', 'ASENG FPSO', 'Aabenraa', 'Aalborg', 'Aamchit Port', 'Aarhus', 'Aasta Hansteen', 'Aberdeen', 'Aberdeen (WA)', 'Abidjan']
