```sas
%macro IV_WoE_Categorical(data,var,target);

    proc sql;
        select &var, &target
        into :var_list separated by ','
        from &data;
    quit;

    proc sql;
        update &data
        set &var = coalesce(&var, (select mode from &data));
    quit;

    proc freq data=&data;
        tables &var / noprint;
        weight &target;
        output out=recap pctn=freq;
    run;

    data l;
        set recap;
        total = sum(freq);
        bad = sum(&target);
        default_rate = bad / total;
        good = total - bad;
        share = total / sum(total);
        bad_rate = bad / total;
        distribution_good = good / sum(good);
        distribution_bad = bad / sum(bad);
        woe = log(distribution_good / distribution_bad);
        if woe = . then woe = 0;
        iv = woe * (distribution_good - distribution_bad);
    run;

    %mend;
```

```sas
%macro IV_WoE_Continue(df, var, label, tar, pr=false);

proc sort data=&df;
    by &var;
run;

data &df;
    set &df;
    if missing(&var) then &var="NULL";
run;

proc freq data=&df;
    tables &var * &label / out=freq;
run;

data freq;
    set freq;
    Total=sum(count);
run;

proc sort data=freq;
    by &var;
run;

data freq;
    set freq;
    if &label=0 then Good=count;
    else Bad=count;
run;

data freq;
    set freq;
    if &label=0 then do;
        Share=Total/sum(Total);
        Bad_Rate=Bad/Total;
        Distrib_Good=(Total-Bad)/(sum(Total)-sum(Bad));
        Distrib_Bad=Bad/sum(Bad);
        WoE=log(Distrib_Good/Distrib_Bad);
        IV=WoE*(Distrib_Good-Distrib_Bad);
    end;
run;

proc sort data=freq;
    by &var;
run;

%if &pr=true %then %do;
    proc print data=freq;
run;
    data _null_;
        set freq;
        if last.&var then do;
            sum_iv+IV;
        end;
        if last.&var and &var ne "NULL" then do;
            call symputx("IV",put(sum_iv,8.4));
            sum_iv=0;
        end;
    run;
    %put IV = &IV;
%end;
%mend;

```

```sas
/*******************************************************************************
  Macro : Analyse_decoupage
  Auteur : marshall
  Date : 01/2023
  Description : Cette macro SAS permet de faire une analyse de découpage d'une variable 
                en utilisant un graphique en barres et un graphique en camembert.
                
  Entrées : 
    - base   : nom de la base de données SAS
    - var    : nom de la variable à analyser
    - target : nom de la variable cible à analyser
  
  Sorties : 
    - Graphique en barres du taux de défaut par catégories de la variable
    - Graphique en camembert du pourcentage de part de chaque catégorie
*******************************************************************************/
%macro Analyse_decoupage(base,var,target);
  
  /* Création de la table de découpage */
  proc sql;
    create table decoupage as
    select &var, &target, count(*) as total, mean(&target) as bad_rate
    from &base
    group by &var;
  quit;
  
  /* Calcul du pourcentage de part */
  data decoupage;
    set decoupage;
    share = total/sum(total)*100;
  run;
  
  /* Affichage du graphique en barres */
  proc sgplot data=decoupage;
    bar &var / response=bad_rate group=bad_rate stat=mean;
    xaxis label="&var";
    yaxis label="Taux de Défaut";
    title "Evolution du Taux de Défaut";
  run;
  
  /* Affichage du graphique en camembert */
  proc sgplot data=decoupage;
    pie &var / group=share label=share nolegend;
    title "Pourcentage de Part";
  run;

%mend Analyse_decoupage;


```

```sas
%let len = %sysfunc(length(&string));
%let i = &len;

%do %while(%eval(&i) > 0);
  %let output = %qscan(%qsubstr(&string, &i, 1), %str(%)) || &output;
  %let i = %eval(&i - 1);
%end;
```

```sas
%macro Optimal(data,var,target,seuil,model,top_sig);

* Initialisation des variables ;
%let dic = ;
%let l1 = ;
%let l2 = ;
data tes;
set &data;
join = "a" + compress(put( _N_ , 5. ));
run;
data lmp;
set tes;
run;
%let best_ks = -inf;
%let _best_seuil_ = 0;

* Algorithm de découpage optimal s'appuyant sur la régression logistique ;
%if (mean( &var = 0 ) >= 0.06) %then %do;
    data tes;
    set tes;
    &var = ( &var = 0 ) * .;
    run;
%end;
%else %do;
%end;
%do i = 2 %to 20 %by 1;
    %try;
        data tes;
        set tes;
        _GROUPS_ = quantile( &var, &i );
        _LABELS_ = quantile( &var, &i );
        if (min( freq( _LABELS_ ) ) >= &seuil) %then %do;
            proc logistic data=tes;
            model &target = _GROUPS_ / selection=forward;
            score out=score outroc=roc;
            run;
            data score;
            set score;
            score_auc = score;
            run;
            %let score_auc = floor( score_auc * 100 ) / 100;
            %let l1 = "&l1 " + put( score_auc, 4.2 );
            %let l2 = "&l2 " + put( &i, 2. );
            %if ( &score_auc > &best_ks ) and ( min( abs( Coefficient ) ) >= &top_sig ) %then %do;
                %let best_ks = &score_auc;
                %let _best_seuil_ = &i;
            %end;
        %end;
    %catch;
    %end;
%end;

* Table de vérification de l'optimalité des découpages ;
%let dic = "seuil &l2" + "score &l1";
data resume;
array temp[2] &dic;
do i = 1 to dim(temp);
    seuil = scan(temp[i],1," ");
    score = scan(temp[i],2," ");
    output;
end;
run;
data tes;
set tes;
_GROUPS_ = quantile( &var, &_best_seuil_ );
_LABELS_ = quantile( &var, &_best_seuil_ );
run;

%mend Optimal;

```

```sas
/* Macro qui résume les variables catégorielles potentielles */
%macro categorical_var_summary(data);
  /* Identification des potentielles variables catégorielles */
  %put **Identification des potentielles variables catégorielles:**;

  /* Copie des données originales */
  data base;
    set &data;
  run;

  /* Déclaration des variables */
  data _null_;
    array col_list[*] $200;
    array data_type[*] $200;
    array unique_count[*];
    array unique_val[*] $200;
    count=0;

    /* Boucle pour parcourir les colonnes */
    do i=1 to dim(base);
      if nmiss(base[i]) < 1e6 then do;
        /* Ajout des informations sur la colonne à la liste */
        count=count+1;
        col_list[count]=varname(base[i]);
        data_type[count]=vartype(base[i]);
        unique_count[count]=nmiss(base[i]);
        unique_val[count]=put(base[i], $200.);
      end;
    end;

    /* Impression du tableau */
    put "Features|Data Type|Unique Count|Unique Values";
    do i=1 to count;
      put col_list[i] "|" data_type[i] "|" unique_count[i] "|" unique_val[i];
    end;
  run;
%mend categorical_var_summary;

```

```sas
proc sort data=base;
   by &feature;
run;

proc freq data=base;
   tables &feature * &target / nopercent norow nocol;
   weight &target;
run;

proc sgplot data=base;
   histogram &target / group=&feature;
   xaxis label="Count" fontsize=14 fontweight='bold';
   yaxis label="&feature" fontsize=14 fontweight='bold';
run;

```

```sas
proc sgplot data=base;
   where target in ('Sain','Défaut');
   hbox y=&feature_set target / groups=target nooutliers;
run;
```