In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

In [1]:
# メモリ最適化関数
def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024**2  # 変換前のメモリ使用量（MB単位）
    for col in df.columns:
        col_type = df[col].dtype
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            # 整数型の場合
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                # 必要に応じて他の整数型（np.int16, np.int32, np.int64）への変換処理を追加可能
            # 浮動小数点型の場合
            elif str(col_type)[:5] == 'float':
                # float16の表現可能範囲内ならfloat16に変換
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                # それ以外はfloat32に変換（場合によりfloat64のままにする選択肢もあり）
                else:
                    df[col] = df[col].astype(np.float32)
    end_mem = df.memory_usage().sum() / 1024**2  # 変換後のメモリ使用量
    print('Mem. usage decreased from {:5.2f} MB to {:5.2f} MB ({:.1f}% reduction)'.format(
          start_mem, end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df