In [None]:
!unzip penguin_dataset.zip

Archive:  penguin_dataset.zip
  inflating: penguins_lter.csv       
  inflating: penguins_size.csv       


In [None]:
import pandas as pd

In [None]:
p_lter = pd.read_csv('penguins_lter.csv')
p_size = pd.read_csv('penguins_size.csv')

In [None]:
p_lter.head()

Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
0,PAL0708,1,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A1,Yes,11/11/07,39.1,18.7,181.0,3750.0,MALE,,,Not enough blood for isotopes.
1,PAL0708,2,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,11/11/07,39.5,17.4,186.0,3800.0,FEMALE,8.94956,-24.69454,
2,PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195.0,3250.0,FEMALE,8.36821,-25.33302,
3,PAL0708,4,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A2,Yes,11/16/07,,,,,,,,Adult not sampled.
4,PAL0708,5,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N3A1,Yes,11/16/07,36.7,19.3,193.0,3450.0,FEMALE,8.76651,-25.32426,


In [None]:
p_size.head()

Unnamed: 0,species,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data_clean = p_size.dropna().copy()

In [None]:
le = LabelEncoder()
data_clean['species_encoded'] = le.fit_transform(data_clean['species'])
data_clean['island_encoded'] = le.fit_transform(data_clean['island'])
data_clean['sex_encoded'] = le.fit_transform(data_clean['sex'])

In [None]:
features = ['island_encoded', 'culmen_length_mm', 'culmen_depth_mm',
            'flipper_length_mm', 'body_mass_g', 'sex_encoded']
X = data_clean[features]
y = data_clean['species_encoded']

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=6498546)

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=898549)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nТочность модели: {accuracy:.2f}")


Точность модели: 1.00


In [None]:
feature_importance = pd.DataFrame({
    'Feature': features,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)

In [None]:
print("\nВажность признаков:")
display(feature_importance)


Важность признаков:


Unnamed: 0,Feature,Importance
1,culmen_length_mm,0.371136
3,flipper_length_mm,0.25698
2,culmen_depth_mm,0.134134
0,island_encoded,0.128137
4,body_mass_g,0.102603
5,sex_encoded,0.00701


In [None]:
sample = X_test[2].reshape(1, -1)
predicted_species = le.inverse_transform(model.predict(sample))[0]
print(f"\nПример предсказания: {predicted_species}")


Пример предсказания: MALE


In [None]:
import joblib

In [None]:
joblib.dump(model, 'penguin_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(le, 'label_encoder.pkl')

['label_encoder.pkl']