In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Bước 1: Tải dữ liệu
iris = load_iris()
X, y = iris.data, iris.target

# Chỉ lấy 2 lớp để test phân loại nhị phân (cho LogisticRegression dễ hiểu)
X = X[y != 2]
y = y[y != 2]

# Bước 2: Tách train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Bước 3: Tạo pipeline
steps = [
    ('scaler', PolynomialFeatures(degree=2, include_bias=False)),
    ('pca', PCA(n_components=2)),
    ('clf', LogisticRegression())
]
model = Pipeline(steps=steps)

# Bước 4: Huấn luyện
model.fit(X_train, y_train)

# Bước 5: Dự đoán và đánh giá
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print("Predictions:", y_pred)
print("Accuracy:", acc)


Predictions: [1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]
Accuracy: 1.0


In [None]:


a = param_dict.pop('layers') 

def handle_1item(a, i):
    a_keys = [f"layer{i}__{key}" for key in a.keys()]
    a = dict(zip(a_keys, a.values()))
    return a


a = [handle_1item(item, i) for i, item in enumerate(a)  ]

import itertools
b_keys = list(itertools.chain(*[list(item.keys()) for item in a]) )
b_values = list(itertools.chain(*[list(item.values()) for item in a])) 

b = dict(zip(b_keys, b_values))


c_keys = list(param_dict.keys()) + list(b.keys())
c_values = list(param_dict.values()) + list(b.values())
c_values = [item if isinstance(item, list) else [item] for item in c_values]

c = dict(zip(c_keys, c_values))
c

{'patience': [5, 10],
 'min_delta': [0.001],
 'epochs': [30, 50],
 'learning_rate': [0.001, 0.01],
 'layer0__name': ['DenseBatchNormalizationDropoutTuner'],
 'layer0__dropout_rate': [0.5],
 'layer0__start_units': [16],
 'layer0__num_layers': [1, 2, 3, 4, 5],
 'layer1__name': ['DenseBatchNormalizationTuner'],
 'layer1__start_units': [8],
 'layer1__num_layers': [1, 2, 3, 4, 5]}

In [3]:
import itertools

def add_layer_text_to_key(a, i):
    a_keys = [f"layer{i}__{key}" for key in a.keys()]
    a = dict(zip(a_keys, a.values()))
    return a

def get_list_param(param_dict):
    # Loại bỏ key
    list_layers = param_dict.pop("layers")

    # Thêm tiền tố layer vào mỗi key trong list_layers
    list_layers = [
        add_layer_text_to_key(layer, i) for i, layer in enumerate(list_layers)
    ]

    # Tạo list_layers mới
    list_layers_keys = list(
        itertools.chain(*[list(item.keys()) for item in list_layers])
    )
    list_layers_values = list(
        itertools.chain(*[list(item.values()) for item in list_layers])
    )
    list_layers = dict(zip(list_layers_keys, list_layers_values))

    # Tổng hợp tạo ra param_dict
    param_dict_keys = list(param_dict.keys()) + list(list_layers.keys())
    param_dict_values = list(param_dict.values()) + list(list_layers.values())
    param_dict_values = [
        item if isinstance(item, list) else [item] for item in param_dict_values
    ]

    param_dict = dict(zip(param_dict_keys, param_dict_values))

    return param_dict

param_dict = {
    "patience": [5, 10], 
    "min_delta": [0.001], 
    "epochs": [30, 50], 
    "learning_rate": [0.001, 0.01], 
    "layers": [
        {
            "name": 'DenseBatchNormalizationDropoutTuner', 
            "dropout_rate": 0.5,
            "start_units": 16, 
            "num_layers": [1,2,3,4,5], 
        }, 
        {
            "name": 'DenseBatchNormalizationTuner', 
            "start_units": 8, 
            "num_layers": [1,2,3,4,5], 
           
        }, 
    ]
}

get_list_param(param_dict)


{'patience': [5, 10],
 'min_delta': [0.001],
 'epochs': [30, 50],
 'learning_rate': [0.001, 0.01],
 'layer0__name': ['DenseBatchNormalizationDropoutTuner'],
 'layer0__dropout_rate': [0.5],
 'layer0__start_units': [16],
 'layer0__num_layers': [1, 2, 3, 4, 5],
 'layer1__name': ['DenseBatchNormalizationTuner'],
 'layer1__start_units': [8],
 'layer1__num_layers': [1, 2, 3, 4, 5]}

In [31]:
from sklearn.model_selection import ParameterSampler

param_list = list(ParameterSampler(c, n_iter=10, random_state=42))
param_list[9]

{'patience': 10,
 'min_delta': 0.001,
 'learning_rate': 0.001,
 'layer2__start_units_A': 8,
 'layer2__num_layers_A': 1,
 'layer2__name': 'A',
 'layer2__drop_A': 100,
 'layer1__start_units': 8,
 'layer1__num_layers': 2,
 'layer1__name': 'DenseBatchNormalizationTuner',
 'layer0__start_units': 16,
 'layer0__num_layers': 2,
 'layer0__name': 'DenseBatchNormalizationDropoutTuner',
 'layer0__dropout_rate': 0.5,
 'epochs': 30}

In [None]:
param_dict = {
    "patience": [5, 10], 
    "min_delta": [0.001], 
    "epochs": [30, 50], 
    "learning_rate": [0.001, 0.01], 
    "layers": [
        {
            # "name": 'DenseBatchNormalizationDropoutTuner', 
            # "dropout_rate": 0.5,
            # "start_units": 128, 
            # "list_num_layers": [1,2,3,4,5], 
        }, 
        {
            "name": 'DenseBatchNormalizationTuner', 
            "start_units": 16, 
            "list_num_layers": [1,2,3,4,5], 
           
        }
    ]
}

class ListParamCreator:
    def __init__(self, param_dict, num_models):
        """Tạo ra danh sách các tham số phục vụ cho model training, cấu trúc của param_dict phải là như sau: <br>
        ```
        param_dict = {
            "patience": [5, 10],
            "min_delta": [0.001],
            "epochs": [30, 50],
            "learning_rate": [0.001, 0.01],
            "layers": [
                {
                    "name": 'DenseBatchNormalizationDropoutTuner',
                    "dropout_rate": 0.5,
                    "start_units": 16,
                    "list_num_layers": [1,2,3,4,5],
                },
                {
                    "name": 'DenseBatchNormalizationTuner',
                    "start_units": 8,
                    "list_num_layers": [1,2,3,4,5],

                }
            ]
        }
        ```
        Khi đó tập trung xử lí key 'layers' để đưa **param_dict** về dạng dict như này: <br>
        ```
        param_dict_transformed = {
            'patience': [5, 10],
            'min_delta': [0.001],
            'epochs': [30, 50],
            'learning_rate': [0.001, 0.01],
            'layer0__name': ['DenseBatchNormalizationDropoutTuner'],
            'layer0__dropout_rate': [0.5],
            'layer0__start_units': [16],
            'layer0__num_layers': [1, 2, 3, 4, 5],
            'layer1__name': ['DenseBatchNormalizationTuner'],
            'layer1__start_units': [8],
            'layer1__num_layers': [1, 2, 3, 4, 5]
        }
        ```
        Sau đó lấy ngẫu nhiên **num_models** tập tham số từ **param_dict_transformed**

        Args:
            param_dict (_type_): dict
            num_models (_type_): số lượng tập tham số cần lấy
        """
        self.param_dict = param_dict
        self.num_models = num_models

    def next(self):
        # Loại bỏ key
        list_layers = self.param_dict.pop("layers")

        # Thêm tiền tố layer vào mỗi key trong list_layers
        list_layers = [
            self.add_layer_text_to_key(layer, i) for i, layer in enumerate(list_layers)
        ]

        # Tạo list_layers mới
        list_layers_keys = list(
            itertools.chain(*[list(item.keys()) for item in list_layers])
        )
        list_layers_values = list(
            itertools.chain(*[list(item.values()) for item in list_layers])
        )
        list_layers = dict(zip(list_layers_keys, list_layers_values))

        # Tổng hợp tạo ra param_dict
        param_dict_keys = list(self.param_dict.keys()) + list(list_layers.keys())
        param_dict_values = list(self.param_dict.values()) + list(list_layers.values())
        param_dict_values = [
            item if isinstance(item, list) else [item] for item in param_dict_values
        ]

        param_dict = dict(zip(param_dict_keys, param_dict_values))

        return param_dict

    def add_layer_text_to_key(self, a, i):
        if not a: 
            a[f"layer{i}__none"] = None
            return a

        a_keys = [f"layer{i}__{key}" for key in a.keys()]
        a = dict(zip(a_keys, a.values()))
        return a
    

        class LayerCreator:
            """Create layer từ param và text đại diện cho layer đó <br>

            Examples:
            ```
            param = {
                'patience': 10,
                'min_delta': 0.001,
                'learning_rate': 0.01,
                'layer1__start_units': 8,
                'layer1__num_layers': 4,
                'layer1__name': 'DenseBatchNormalizationTuner',
                'layer0__start_units': 16,
                'layer0__num_layers': 5,
                'layer0__name': 'DenseBatchNormalizationDropoutTuner',
                'layer0__dropout_rate': 0.5,
                'epochs': 30
            }
            layer_text = 'layer0'

            ```
            Khi đó tạo layer từ các key có chứa 'layer0' là: start_units, num_layers, name, dropout_rate

            Args:
                param (_type_): dict
                layer_text (_type_): text thể hiện cho layer cần tạo
            """

            def __init__(self, param, layer_text):
                self.param = param
                self.layer_text = layer_text

            def next(self):
                # Kiểm tra có phải PassThroughLayer
                if self.is_PassThroughLayer(): 
                    return tf_myclasses.PassThroughLayer()

                # Get param ứng với layer_text
                keys = pd.Series(self.param.keys())
                values = pd.Series(self.param.values())
                keys = keys[keys.str.startswith(self.layer_text)]
                values = values[keys.str.startswith(self.layer_text)]

                keys = keys.apply(self.get_param_name)
                layer_param = dict(zip(keys, values))

                # Tạo class
                class_name = layer_param.pop("name")
                ClassName = globals()[class_name]

                # Tạo object
                layer = ClassName(**layer_param)
                return layer

            def get_param_name(self, key):
                parts = key.split("__", 1)
                return parts[1]
            
            def is_PassThroughLayer(self): 
                keys = pd.Series(self.param.keys())
                keys = keys[keys.str.startswith(self.layer_text)]
                return keys.iloc[0] == f"{self.layer_text}__none"


a = ListParamCreator(param_dict, 10).next()
a

{'patience': [5, 10],
 'min_delta': [0.001],
 'epochs': [30, 50],
 'learning_rate': [0.001, 0.01],
 'layer0_none': [None],
 'layer1__name': ['DenseBatchNormalizationTuner'],
 'layer1__start_units': [16],
 'layer1__list_num_layers': [1, 2, 3, 4, 5]}

In [6]:
hello = {}

if not hello: 
    print("Rỗng rồi")

def add_layer_text_to_key(a, i):
    if not a: 
        a[f"layer{i}"] = None
        return a

    a_keys = [f"layer{i}__{key}" for key in a.keys()]
    a = dict(zip(a_keys, a.values()))
    return a

b = add_layer_text_to_key(hello, 1)
b

Rỗng rồi


{'layer1': None}

In [2]:
def calculate_sum(a, b): 
    return a+b

def calculate_subtraction(a,b):
    return a-b

def demo_funcs(a, b, c, funcs):
    d = funcs(a, b)
    return d + c

demo_funcs(1,1,1, calculate_subtraction)

1