In [1]:
# show_matching_examples.py
"""
Affiche des exemples clairs de paires/clusters unifi√©s issus de step3_weighted_matching.py.
Id√©al pour valider la qualit√© du matching sur profils marocains.
"""

import json
from pathlib import Path

def truncate(text, length=100):
    if not text:
        return "‚Äî"
    text = str(text).replace('\n', ' ').replace('\r', ' ')
    return (text[:length] + "...") if len(text) > length else text

def main():
    output_dir = Path("output")
    try:
        with open(output_dir / "unified_profiles.json", "r", encoding="utf-8") as f:
            unified = json.load(f)
    except FileNotFoundError:
        print("‚ùå Fichier 'unified_profiles.json' non trouv√©. Ex√©cute d'abord step3_weighted_matching.py.")
        return

    if not unified:
        print("‚ö†Ô∏è Aucune identit√© unifi√©e trouv√©e.")
        return

    print(f"üîç Affichage de 5 clusters unifi√©s parmi {len(unified)} :\n")

    # Afficher les 5 premiers clusters (m√™me si >2 profils)
    for idx, person in enumerate(unified[:5]):
        profiles = person["profiles"]
        print("=" * 90)
        print(f"üî∏ Cluster {idx+1} | ID: {person['unified_id']} | {len(profiles)} profils")
        print("=" * 90)

        for p in profiles:
            plat = p["platform"].capitalize()
            name = p.get("fullName", "‚Äî")
            username = p.get("username", "‚Äî")
            email = p.get("email", "‚Äî")
            location = p.get("location", "‚Äî")
            bio = truncate(p.get("bio") or p.get("headline") or p.get("repo_descriptions", ""))

            print(f"\n‚û°Ô∏è {plat}:")
            print(f"   Nom complet : {name}")
            print(f"   Username    : @{username}")
            print(f"   Email       : {email}")
            print(f"   Localisation: {location}")
            print(f"   Bio         : {bio}")

        # V√©rifier si c'est un vrai triplet (3 plateformes)
        platforms = {p["platform"] for p in profiles}
        if len(platforms) == 3:
            print("\n   üåü Triplet complet d√©tect√© !")
        elif len(platforms) == 2:
            print(f"\n   üîó Paire : {' ‚Üî '.join(sorted(platforms))}")
        print("\n")

    # Statistique rapide
    triplet_count = sum(1 for u in unified if len({p["platform"] for p in u["profiles"]}) == 3)
    pair_count = sum(1 for u in unified if len({p["platform"] for p in u["profiles"]}) == 2)
    print(f"üìä R√©sum√© : {pair_count} paires | {triplet_count} triplets complets")

if __name__ == "__main__":
    main()

üîç Affichage de 5 clusters unifi√©s parmi 471 :

üî∏ Cluster 1 | ID: person_00000 | 83 profils

‚û°Ô∏è Github:
   Nom complet : Omar MHAIMDAT
   Username    : @omarmhaimdat
   Email       : omarmhaimdat@gmail.com
   Localisation: morocco
   Bio         : Data Scientist Software Engineer

‚û°Ô∏è Linkedin:
   Nom complet : Omar Jadiani
   Username    : @omar-jadiani-766aa3229
   Email       : None
   Localisation: other
   Bio         : As a passionate Software Developer with a strong focus on building high performance mobile and web a...

‚û°Ô∏è Github:
   Nom complet : Omar Jadiani
   Username    : @OmarJ9
   Email       : None
   Localisation: morocco
   Bio         : I am a passionate Software Developer

‚û°Ô∏è Linkedin:
   Nom complet : OMAR Ait benhaddi
   Username    : @omaraitbenhaddi
   Email       : None
   Localisation: morocco
   Bio         : Omar is an avid Computer Science Engineering student at the International Academy of Civil Aviation ...

‚û°Ô∏è Github:
   Nom comp