In [1]:
pip install micromlgen

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting micromlgen
  Downloading micromlgen-1.1.28.tar.gz (12 kB)
Building wheels for collected packages: micromlgen
  Building wheel for micromlgen (setup.py) ... [?25l[?25hdone
  Created wheel for micromlgen: filename=micromlgen-1.1.28-py3-none-any.whl size=32173 sha256=b8fd30b4d02b2d3c3ee4e924bc4b91107554a8ceaf8fede1ec0857a42be63568
  Stored in directory: /root/.cache/pip/wheels/b5/c1/36/9848f822936171a90ec43d0534cb86a58e1dff20c645e22074
Successfully built micromlgen
Installing collected packages: micromlgen
Successfully installed micromlgen-1.1.28


In [16]:
from sklearn.model_selection import train_test_split as split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from pandas import read_csv
from micromlgen import port
from sklearn.preprocessing import LabelEncoder

In [17]:
# Load dataset
df = read_csv('https://raw.githubusercontent.com/wooihaw/datasets/main/genders_heights_weights.csv')
df.sample(5)

Unnamed: 0,Gender,Height,Weight
1555,Male,167.6,74.9
3202,Male,169.7,78.3
6574,Female,163.2,64.6
9389,Female,159.5,61.6
9743,Female,164.8,71.8


In [18]:
# Divide into features (X) and targets (y)
X = df.values[:, 1:]
y = df.values[:, 0]

le = LabelEncoder()
y = le.fit_transform(y)

classmap = dict(zip(range(len(le.classes_)), le.classes_))
print(f"{classmap=}")

# Split into training and testing sets
X_train, X_test, y_train, y_test = split(X, y, random_state=42)

# Train and evaluate Decision Tree classifier
dtc = DecisionTreeClassifier(max_depth=3).fit(X_train, y_train)
print(f"dtc accuracy: {dtc.score(X_test, y_test):.2%}")

# Train and evaluate Random Forest classifier
rfc = RandomForestClassifier(n_estimators=10, max_depth=3).fit(X_train, y_train)
print(f"rfc accuracy: {rfc.score(X_test, y_test):.2%}")

classmap={0: 'Female', 1: 'Male'}
dtc accuracy: 91.60%
rfc accuracy: 90.96%


In [23]:
h = float(input("Enter height in cm: "))
w = float(input("Enter weight in kg: "))
print(f"Predict as {le.inverse_transform(rfc.predict([[h, w]]))}")

Enter height in cm: 175
Enter weight in kg: 80
Predict as ['Male']


In [24]:
# Export to C code
c_code = port(rfc, classmap=classmap)

with open('rfc.h', 'w') as f:
  f.write(c_code)

In [25]:
from sklearn.model_selection import GridSearchCV

params = dict(n_estimators = range(1, 11), max_depth=range(1, 11))
grid = GridSearchCV(RandomForestClassifier(), params, cv=5, verbose=1, n_jobs=-1)
grid.fit(X_train, y_train)
print(grid.best_params_)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
{'max_depth': 5, 'n_estimators': 8}


In [26]:
rfc2 = RandomForestClassifier(**grid.best_params_).fit(X_train, y_train)
print(f"rfc2 accuracy: {rfc2.score(X_test, y_test):.2%}")

rfc2 accuracy: 91.84%


In [27]:
# Export rfc2 to C code
c_code = port(rfc2, classmap=classmap)

with open('rfc2.h', 'w') as f:
  f.write(c_code)