In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# Attribute Information from
# https://archive.ics.uci.edu/ml/datasets/Flags
#    1. name:       Name of the country concerned

#    2. landmass:
#                   1=N.America,
#                   2=S.America,
#                   3=Europe,
#                   4=Africa,
#                   5=Asia,
#                   6=Oceania

#    3. zone: Geographic quadrant, based on Greenwich and the Equator
#                   1=NE,
#                   2=SE,
#                   3=SW,
#                   4=NW

#    4. area:       in thousands of square km

#    5. population: in round millions

#    6. language:
#                   1=English,
#                   2=Spanish,
#                   3=French,
#                   4=German,
#                   5=Slavic,
#                   6=other Indo-European,
#                   7=Chinese,
#                   8=Arabic, 
#                   9=Japanese/Turkish/Finnish/Magyar,
#                  10=others

#    7. religion:
#                   0=Catholic,
#                   1=Other Christian,
#                   2=Muslim,
#                   3=Buddhist,
#                   4=Hindu,
#                   5=ethnic,
#                   6=Marxist,
#                   7=others

#    8. bars:       number of vertical bars in the flag

#    9. stripes:    number of horizontal stripes in the flag

#   10. colours:    number of different colours in the flag

#   11. red:        0 if red absent, 1 if red present in the flag

#   12. green:      same for green

#   13. blue:       same for blue

#   14. gold:       same for gold (also yellow)

#   15. white:      same for white

#   16. black:      same for black

#   17. orange:     same for orange (also brown)

#   18. mainhue:    predominant colour in the flag (tie-breaks decided by taking the topmost hue,
#                   if that fails then the most central hue, and if that fails the leftmost hue)

#   19. circles:    number of circles in the flag

#   20. crosses:    number of (upright) crosses

#   21. saltires:   number of diagonal crosses

#   22. quarters:   number of quartered sections

#   23. sunstars:   number of sun or star symbols

#   24. crescent:   1 if a crescent moon symbol present, else 0

#   25. triangle:   1 if any triangles present, else 0

#   26. icon:       1 if an inanimate image present (e.g., a boat), else 0

#   27. animate:    1 if an animate image (e.g., an eagle, a tree, a human hand) present, else 0

#   28. text:       1 if any letters or writing on the flag (e.g., a motto or slogan), else 0

#   29. topleft:    color in the top-left corner (moving right to decide tie-breaks)

#   30. botright:   color in the bottom-left corner (moving left to decide tie-breaks)

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
flag_data = pd.read_csv("flags.csv", names=['name', 'landmass', 'zone', 'area', 'population', 'language', 'religion', 'bars', 'stripes', 'colors', 'red', 'green', 'blue', 'gold', 'white', 'black', 'orange', 'mainhue', 'circles', 'crosses', 'saltires', 'quarters', 'sunstars', 'crescent', 'triangle', 'icon', 'animate', 'text', 'topleft', 'botright'])
flag_data

Unnamed: 0,name,landmass,zone,area,population,language,religion,bars,stripes,colors,red,green,blue,gold,white,black,orange,mainhue,circles,crosses,saltires,quarters,sunstars,crescent,triangle,icon,animate,text,topleft,botright
0,Afghanistan,5,1,648,16,10,2,0,3,5,1,1,0,1,1,1,0,green,0,0,0,0,1,0,0,1,0,0,black,green
1,Albania,3,1,29,3,6,6,0,0,3,1,0,0,1,0,1,0,red,0,0,0,0,1,0,0,0,1,0,red,red
2,Algeria,4,1,2388,20,8,2,2,0,3,1,1,0,0,1,0,0,green,0,0,0,0,1,1,0,0,0,0,green,white
3,American-Samoa,6,3,0,0,1,1,0,0,5,1,0,1,1,1,0,1,blue,0,0,0,0,0,0,1,1,1,0,blue,red
4,Andorra,3,1,0,0,6,0,3,0,3,1,0,1,1,0,0,0,gold,0,0,0,0,0,0,0,0,0,0,blue,red
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,Western-Samoa,6,3,3,0,1,1,0,0,3,1,0,1,0,1,0,0,red,0,0,0,1,5,0,0,0,0,0,blue,red
190,Yugoslavia,3,1,256,22,6,6,0,3,4,1,0,1,1,1,0,0,red,0,0,0,0,1,0,0,0,0,0,blue,red
191,Zaire,4,2,905,28,10,5,0,0,4,1,1,0,1,0,0,1,green,1,0,0,0,0,0,0,1,1,0,green,green
192,Zambia,4,2,753,6,10,5,3,0,4,1,1,0,0,0,1,1,green,0,0,0,0,0,0,0,0,1,0,green,brown


## Select your features

In [None]:
target = flag_data['religion']

In [None]:
selected_features = flag_data.drop(columns=['name', 'religion'])
selected_features.head(20)

In [None]:
selected_features = pd.get_dummies(selected_features)
selected_features.head()

In [None]:
print(selected_features.shape, target.shape)

## Create a Train Test Split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=35)

## Pre-Processing

In [None]:
# Scale your data
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data

X_scaler = StandardScaler().fit(X_train)

## Train the Model

In [None]:
# Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical

In [None]:
print(X_train_scaled.shape, y_test_categorical.shape)

In [None]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=48))
model.add(Dense(units=8, activation='softmax'))

In [None]:
model.summary()

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:

# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=1
)

In [None]:
deep_model = Sequential()
deep_model.add(Dense(units=6, activation='relu', input_dim=48))
deep_model.add(Dense(units=6, activation='relu'))
deep_model.add(Dense(units=8, activation='softmax'))

In [None]:
deep_model.summary()

In [None]:
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

In [None]:
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print( f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")