In [None]:
#r "nuget:Microsoft.Spark"
#r "nuget:Microsoft.ML"
#r "nuget:Microsoft.ML.FastTree"
#r "nuget: Plotly.NET,2.0.0-preview.18"
#r "nuget: Plotly.NET.Interactive,2.0.0-preview.1"

In [None]:
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.FastTree;
using Microsoft.Spark;
using Microsoft.Spark.Sql;
using static Microsoft.Spark.Sql.Functions;
using Plotly.NET;

// run spark : spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --master local D:\3bStudio\Sandbox\spark-program\FirstSparkProgram\bin\Debug\net6.0\microsoft-spark-3-0_2.12-2.1.0.jar debug
// see Spark portal at : http://localhost:4040
var spark = SparkSession
    .Builder()
    .AppName("spark-create-ml-model")
    .GetOrCreate();

spark.SparkContext.SetLogLevel("WARN");

In [None]:
DataFrame df = spark
    .Read()
    .Option("header", true)
    .Option("inferSchema", true)
    .Csv(@"D:\3bStudio\Sandbox\3bs-spark-training\resources\heart.csv");

df.Show();

In [None]:
df.PrintSchema();

In [None]:
public class HeartProfile
{
    public float Age;
    public float Cholesterol;
    public float RestingBP;
    public float FastingBS;
    public bool HeartDisease;
}

In [None]:
var rows = df.Collect().ToList();

var dataset = new List<HeartProfile>();
foreach(var row in rows)
{
    object[] rowValues = row.Values;
    
    dataset.Add(new HeartProfile {
        Age = Convert.ToSingle(rowValues[0]),
        Cholesterol = Convert.ToSingle(rowValues[4]),
        RestingBP = Convert.ToSingle(rowValues[3]),
        FastingBS = Convert.ToSingle(rowValues[5]),
        HeartDisease = (int)rowValues[11] == 1 ? true : false
    });
}

In [None]:
// Utilisation de PLOTLY pour visualiser notre dataset et comprendre ce qu'il contient
var avergaeCholestrerolByAge =
    from allData in dataset
    group allData by allData.Age into groupedPeople
    select new
    {
        Age = groupedPeople.Key,
        AverageScore = groupedPeople.Average(x => x.Cholesterol),
    };
var chart = Chart2D.Chart.Column<float, float, string>(avergaeCholestrerolByAge.Select(x => x.Age).ToList(), avergaeCholestrerolByAge.Select(x => x.AverageScore).ToList());
chart.Show();

In [None]:
var mlContext = new MLContext();

var dataView = mlContext.Data.LoadFromEnumerable(dataset);
DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

IDataView trainData = dataSplit.TrainSet;
IDataView testData = dataSplit.TestSet;

In [None]:
var pipeline = mlContext.Transforms
        .Concatenate("Features", new[] { "Age", "Cholesterol", "RestingBP", "FastingBS" })
        .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "HeartDisease", featureColumnName: "Features"));

In [None]:
ITransformer trainedModel = pipeline.Fit(trainData);

In [None]:
var predictions = trainedModel.Transform(testData);
var metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "HeartDisease", scoreColumnName: "Score");
Console.WriteLine("");
Console.WriteLine("");
Console.WriteLine($"************************************************************");
Console.WriteLine($"*       Metrics for Heart disease detrection binary classification model      ");
Console.WriteLine($"*-----------------------------------------------------------");
Console.WriteLine($"*       Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"*       Area Under Roc Curve:      {metrics.AreaUnderRocCurve:P2}");
Console.WriteLine($"*       Area Under PrecisionRecall Curve:  {metrics.AreaUnderPrecisionRecallCurve:P2}");
Console.WriteLine($"*       F1Score:  {metrics.F1Score:P2}");
Console.WriteLine($"*       LogLoss:  {metrics.LogLoss:#.##}");
Console.WriteLine($"*       LogLossReduction:  {metrics.LogLossReduction:#.##}");
Console.WriteLine($"*       PositivePrecision:  {metrics.PositivePrecision:#.##}");
Console.WriteLine($"*       PositiveRecall:  {metrics.PositiveRecall:#.##}");
Console.WriteLine($"*       NegativePrecision:  {metrics.NegativePrecision:#.##}");
Console.WriteLine($"*       NegativeRecall:  {metrics.NegativeRecall:P2}");
Console.WriteLine($"************************************************************");
Console.WriteLine("");
Console.WriteLine("");

In [None]:
// details : https://github.com/dotnet/machinelearning-samples/tree/main/samples/csharp/getting-started/BinaryClassification_HeartDiseaseDetection
mlContext.Model.Save(trainedModel, trainData.Schema, "D:/2.Sandbox/3bs-spark-training/resources/HeartClassification.zip");