In [1]:
%useLatestDescriptors
%use kotlin-dl (0.5.1)
%use dataframe (0.9.1)


In [2]:
var df = DataFrame.readCSV(fileOrUrl = "../src/main/resources/titanic.csv", delimiter = ';', parserOptions = ParserOptions(locale = java.util.Locale.FRENCH))

df.head()

In [14]:
df.schema()

pclass: Int
survived: Int
name: String
sex: String?
age: Double?
sibsp: Int?
parch: Int?
ticket: String
fare: Double?
cabin: String?
embarked: String?
boat: String?
body: Int?
homedest: String?

In [3]:
fun <T> DataFrame<T>.toTrainTest(
    trainRatio: Double,
    yColumn: ColumnSelector<T, Number>,
): Pair<OnHeapDataset, OnHeapDataset> =
    toOnHeapDataset(yColumn)
        .split(trainRatio)

fun <T> DataFrame<T>.toOnHeapDataset(yColumn: ColumnSelector<T, Number>): OnHeapDataset =
    OnHeapDataset.create(
        dataframe = this,
        yColumn = yColumn,
    )

fun <T> OnHeapDataset.Companion.create(
    dataframe: DataFrame<T>,
    yColumn: ColumnSelector<T, Number>,
): OnHeapDataset {
    val x by column<FloatArray>("X")

    fun extractX(): Array<FloatArray> =
        dataframe.remove(yColumn)
            .convert { allDfs() }.toFloat()
            .merge { dfsOf<Float>() }.by { it.toFloatArray() }.into(x)
            .getColumn(x).toTypedArray()

    fun extractY(): FloatArray = dataframe[yColumn].toFloatArray()

    return create(
        ::extractX,
        ::extractY,
    )
}


In [4]:
 val (train, test) = df
        // imputing
        .fillNulls { sibsp and parch and age and fare }.perCol { it.mean() }
        .fillNulls { sex }.withValue("female")
        // one hot encoding
        .pivotMatches { pclass and sex }
        // feature extraction
        .select { survived and pclass and sibsp and parch and age and fare and sex } // TODO: need to check
        .shuffle()
        .toTrainTest(0.7) { survived }

In [10]:
val SEED = 12L
val TEST_BATCH_SIZE = 100
val EPOCHS = 50
val TRAINING_BATCH_SIZE = 50

val model = Sequential.of(
    Input(9),
    Dense(50, Activations.Relu, kernelInitializer = HeNormal(), biasInitializer = Zeros()),
    Dense(50, Activations.Relu, kernelInitializer = HeNormal(), biasInitializer = Zeros()),
    Dense(2, Activations.Linear, kernelInitializer = HeNormal(), biasInitializer = Zeros())
)

In [11]:
import org.jetbrains.kotlinx.dl.api.summary.printSummary

model.compile(
            optimizer = Adam(),
            loss = Losses.SOFT_MAX_CROSS_ENTROPY_WITH_LOGITS,
            metric = Metrics.ACCURACY
        )

model.printSummary()

Model type: Sequential
______________________________________________________________________________
Layer (type)                           Output Shape              Param #      
input_1(Input)                         [None, 9]                 0            
______________________________________________________________________________
dense_2(Dense)                         [None, 50]                500          
______________________________________________________________________________
dense_3(Dense)                         [None, 50]                2550         
______________________________________________________________________________
dense_4(Dense)                         [None, 2]                 102          
______________________________________________________________________________
Total trainable params: 3152
Total frozen params: 0
Total params: 3152
______________________________________________________________________________


In [12]:
val trainHistory = model.fit(dataset = train, epochs = EPOCHS, batchSize = TRAINING_BATCH_SIZE)
val trainHistoryDF = trainHistory.batchHistory.toDataFrame()
trainHistoryDF.head()

In [13]:
val accuracy = model.evaluate(dataset = test, batchSize = TEST_BATCH_SIZE).metrics[Metrics.ACCURACY]
println("Accuracy: $accuracy")

Accuracy: 0.7301075458526611


**Jack Dawson**

Jack was a 20-year-old artist who managed to get into titanic by winning a game of poker. He was the love interest of Rose and boarded the ship at Southampton on a third-class ticket.

survived: 0 = no, 1 = yes.
pclass: ticket category from first to third class.
fare: passenger fare.
ticket: passenger ticket number.
demographics: sex , age.
sibsp, parch: number of siblings or spouses aboard, number of parents or children aboard.
cabin: cabin number, embarked: port of embarkation (C = Cherbourg, Q = Queenstown, S = Southampton), home.dest: port of destination.

In [21]:
data class MovieCharacterPassenger(val survived: Int, val name: String, val pclass: Int, val sibsp: Int?, val parch: Int?, val age: Double?, val fare: Double?, val sex: String?)
val persons = listOf(MovieCharacterPassenger(0, "Jack Dawson", 3, 0, 0, 20.0, 7.875, "male"), MovieCharacterPassenger(0, "Rose DeWitt Bukater", 1, 0, 1, 17.0, 151.0, "female"), MovieCharacterPassenger(0 ,"Caledon Nathan Hockley", 1, 0, 0, 30.0, 151.0, "male"))

val movieCharactersDF = persons.toDataFrame()

movieCharactersDF.head()

In [24]:
import java.util.*

val x by column<FloatArray>("X")

val result = movieCharactersDF
        // one hot encoding
        .pivotMatches { pclass and sex }
        .select { survived and pclass and sibsp and parch and age and fare and sex }
        .convert { allDfs() }.toFloat()
        .merge { dfsOf<Float>() }.by { it.toFloatArray() }.into(x)
        .getColumn(x).toTypedArray()

print(Arrays.deepToString(result))


[[0.0, 1.0, 0.0, 0.0, 0.0, 20.0, 7.875, 1.0, 0.0], [0.0, 0.0, 1.0, 0.0, 1.0, 17.0, 151.0, 0.0, 1.0], [0.0, 0.0, 1.0, 0.0, 0.0, 30.0, 151.0, 1.0, 0.0]]

In [25]:
model.predict(result[0])



1

**Rose DeWitt Bukater**
Rose was an American socialite. She was 17 years old, boarded the ship at Southampton on a first-class ticket.

In [26]:
model.predict(result[1])

1

In [27]:
model.predict(result[2])

1

**Caledon Nathan Hockley**

Also known as “Cal”. He was an American Industrialist and the heir to a Pittsburgh steel fortune. He was Rose's fiancee, 30 years of age, and boarded the ship at Southampton on a first-class ticket.

According to the model cal dies in the Titanic disaster, but in the movie he manages to survive.

Cal's money didn't save him, although he eventually escaped when he found a lost child and claimed that child to be his own.

In [None]:
model.close()