# Train Linear Model

### Read cleaned dataset for training

In [ ]:
val dataDir = System.getenv("HOME") + "/data"
val dataLocation = s"$dataDir/cleaned-history.parquet"
val spark   = sparkSession

In [ ]:
val data = spark.read.load(dataLocation)

### Assembler to gather features from selected columns

In [ ]:
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.{Pipeline, PipelineModel}

In [ ]:
val assembler = new VectorAssembler()
                 .setInputCols(Array("BTCUSD", "XRPUSD"))
                 .setOutputCol("features")

### Linear Model definition with Label column

In [ ]:
val lr = new LinearRegression()
  .setLabelCol("ETHUSD")
  .setMaxIter(10)
  .setRegParam(0.001)

### Pipeline with 2 stages: Featurers assembler and Linear Regression

In [ ]:
val pipeline = new Pipeline()
  .setStages(Array(assembler, lr))

### Train the model and plot Prediction vs Actual

In [ ]:
val model = pipeline.fit(data)

In [ ]:
ScatterChart(model.transform(data).select("ETHUSD","prediction").collect)

### Save model on disk

In [ ]:
model.write.overwrite().save(s"$dataDir/spark-linear-model")