In [2]:
# Input to create_column_filter:
# df - a dataframe (where the column names "CLASS" and "ID" have special meaning)
#
# Output from create_filter:
# df            - a new dataframe, where columns, except "CLASS" and "ID", containing only missing values 
#                 or only one unique value (apart from the missing values) have been dropped
# column_filter - a list of the names of the remaining columns, including "CLASS" and "ID"

In [39]:
import pandas as pd
import numpy as np

In [40]:
df = pd.DataFrame({"CLASS":[1,0,1,0,1],"A":[1,2,np.nan,4,5],"B":[1,1,1,1,np.nan],"C":["h","h",np.nan,"i","h"],"D":[np.nan,np.nan,np.nan,np.nan,np.nan]})
df

Unnamed: 0,CLASS,A,B,C,D
0,1,1.0,1.0,h,
1,0,2.0,1.0,h,
2,1,,1.0,,
3,0,4.0,1.0,i,
4,1,5.0,,h,


In [9]:
test = df.loc[:,'A']
test

0    1.0
1    2.0
2    NaN
3    4.0
4    5.0
Name: A, dtype: float64

In [12]:
for item in df.columns:
    print(item)

CLASS
A
B
C
D


In [143]:
df = pd.DataFrame({"CLASS":[1,0,1,0,1],"A":[1,2,np.nan,4,5],"B":[1,1,1,1,np.nan],"C":["h","h",np.nan,"i","h"],"D":[np.nan,np.nan,np.nan,np.nan,np.nan],"E":[1,np.nan,np.nan,np.nan,np.nan]})
df

Unnamed: 0,CLASS,A,B,C,D,E
0,1,1.0,1.0,h,,1.0
1,0,2.0,1.0,h,,
2,1,,1.0,,,
3,0,4.0,1.0,i,,
4,1,5.0,,h,,


In [144]:
df.columns

Index(['CLASS', 'A', 'B', 'C', 'D', 'E'], dtype='object')

In [175]:
def create_column_filter(df):
    new_df = df.copy()
    all_columns = new_df.columns
    saved_columns = []
    
    for column_name in new_df.columns:
        if column_name == 'CLASS':
            continue
        column_value = new_df.loc[:, column_name]
        values = []
        for item in column_value:
            if pd.isna(item):
                continue
            else:
                values.append(item)
        if len(set(values)) > 1 or len(values) == 1:
            saved_columns.append(column_name)
            
    deleted_colums = list(set(all_columns) ^ set(saved_columns))
    deleted_colums.remove('CLASS')
    
    for item in deleted_colums:
        new_df.pop(item)
    return new_df, saved_columns

In [176]:
new_df, column_filter = create_column_filter(df)
print(new_df)
print(column_filter)

   CLASS    A    C    E
0      1  1.0    h  1.0
1      0  2.0    h  NaN
2      1  NaN  NaN  NaN
3      0  4.0    i  NaN
4      1  5.0    h  NaN
['A', 'C', 'E']


In [177]:
def apply_column_filter(df, column_filter):
    new_df = df.copy()
    for item in column_filter:
        new_df.pop(item)
    return new_df

In [181]:
new_df = pd.DataFrame({"CLASS":[1,0,0],"A":[4,5,6],"B":[1,2,1],"C":[np.nan,np.nan,np.nan],"D":[np.nan,4,5], "E":[1,np.nan,np.nan]})
print(new_df)
filtered_new_df = apply_column_filter(new_df,column_filter)
print(filtered_new_df)

   CLASS  A  B   C    D    E
0      1  4  1 NaN  NaN  1.0
1      0  5  2 NaN  4.0  NaN
2      0  6  1 NaN  5.0  NaN
   CLASS  B    D
0      1  1  NaN
1      0  2  4.0
2      0  1  5.0
