In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,confusion_matrix

#import dataset
df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv')

#drop rows containing missing values
df.dropna(axis = 0, how = 'any', subset = None, inplace = True)

# Convert Non-numeric data using one-hot encoding
df = pd.get_dummies(df,columns=['island','sex'])
df.head()

# Assing X and Y variables
X = df.drop('species', axis=1)
Y = df['species']

# Split data into test/train set (70/30 split) and shuffle
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, shuffle = True)

#Assign algorithm
model = LogisticRegression(solver = 'lbfgs', max_iter = 30)

# Link algorithm to X and Y variables
model.fit(X_train, Y_train)

# Run algorithm on test data to make predictions
model_test = model.predict (X_test)

#Evaluate predictions
print(confusion_matrix(Y_test,model_test))
print(classification_report(Y_test,model_test))

[[36  0  4]
 [18  4  2]
 [ 4  0 32]]
              precision    recall  f1-score   support

      Adelie       0.62      0.90      0.73        40
   Chinstrap       1.00      0.17      0.29        24
      Gentoo       0.84      0.89      0.86        36

    accuracy                           0.72       100
   macro avg       0.82      0.65      0.63       100
weighted avg       0.79      0.72      0.67       100



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=30).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [32]:
# Data point to predict
penguin = [
	56, #bill_length_mm
	12.5, #bill_depth_mm
	120, #flipper_length_mm 
	3150, #body_mass_g
	0, #island_Biscoe    
	1, #island_Dream
	0, #island_Torgersen    
	0, #sex_Male
	1, #sex_Female
]

# Make prediction

feature_names_trained = model.feature_names_in_
penguin_df = pd.DataFrame([penguin], columns=feature_names_trained)

new_penguin = model.predict(penguin_df)
new_penguin

probabilidades = model.predict_proba(X_test)
print(probabilidades)

[[1.03435376e-06 9.99998948e-01 1.79055171e-08]
 [3.79475907e-05 1.21766013e-07 9.99961931e-01]
 [1.78747545e-06 1.19149580e-06 9.99997021e-01]
 [9.99868459e-01 4.76347312e-06 1.26777732e-04]
 [9.99764462e-01 3.26438486e-06 2.32274020e-04]
 [7.95722121e-05 9.99906232e-01 1.41960741e-05]
 [3.52019260e-05 2.85939309e-05 9.99936204e-01]
 [1.10204792e-07 9.99999862e-01 2.82777942e-08]
 [1.25192303e-06 9.99956433e-01 4.23146955e-05]
 [9.99096826e-01 6.04838683e-04 2.98335241e-04]
 [3.99233868e-02 9.59584007e-01 4.92606315e-04]
 [9.81152882e-01 1.87236261e-02 1.23491959e-04]
 [8.30787402e-03 4.53634880e-06 9.91687590e-01]
 [9.99999654e-01 2.52708079e-07 9.34915766e-08]
 [8.91797302e-04 4.41678606e-02 9.54940342e-01]
 [9.55603860e-01 4.33654359e-02 1.03070381e-03]
 [9.74225642e-05 2.79870573e-03 9.97103872e-01]
 [7.90160607e-04 3.13143574e-05 9.99178525e-01]
 [2.40978044e-06 9.99997207e-01 3.83012339e-07]
 [9.99999866e-01 1.71328574e-09 1.32145285e-07]
 [5.59058311e-04 9.99166621e-01 2.743202