In [2]:
import pandas
df = pandas.read_csv('Data/house-prices.csv')

In [2]:
df.head()

Unnamed: 0,Home,Price,SqFt,Bedrooms,Bathrooms,Offers,Brick,Neighborhood
0,1,114300,1790,2,2,2,No,East
1,2,114200,2030,4,2,3,No,East
2,3,114800,1740,3,2,1,No,East
3,4,94700,1980,3,2,3,No,East
4,5,119800,2130,3,3,3,No,East


In [3]:
house = pandas.concat([df, pandas.get_dummies(df['Brick']), pandas.get_dummies(df['Neighborhood'])], axis = 1)
del house['No']
del house['West']
del house['Brick']
del house['Neighborhood']
del house['Home']
house.head()

Unnamed: 0,Price,SqFt,Bedrooms,Bathrooms,Offers,Yes,East,North
0,114300,1790,2,2,2,0,1,0
1,114200,2030,4,2,3,0,1,0
2,114800,1740,3,2,1,0,1,0
3,94700,1980,3,2,3,0,1,0
4,119800,2130,3,3,3,0,1,0


In [4]:
X = house[['SqFt', 'Bedrooms', 'Bathrooms', 'Offers', 'Yes', 'East', 'North']]
Y = house['Price'].values

In [6]:
import statsmodels.api as sm
X2 = sm.add_constant(X)
est = sm.OLS(Y, X2)
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.869
Model:                            OLS   Adj. R-squared:                  0.861
Method:                 Least Squares   F-statistic:                     113.3
Date:                Sun, 07 May 2017   Prob (F-statistic):           8.25e-50
Time:                        15:43:21   Log-Likelihood:                -1356.7
No. Observations:                 128   AIC:                             2729.
Df Residuals:                     120   BIC:                             2752.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const       2.284e+04   1.02e+04      2.231      0.0

In [6]:
predictorcols = ['SqFt', 'Bedrooms', 'Bathrooms', 'Offers', 'Yes', 'East', 'North']
import itertools
for variables in itertools.combinations(predictorcols, 1):
    print(variables)

('SqFt',)
('Bedrooms',)
('Bathrooms',)
('Offers',)
('Yes',)
('East',)
('North',)


In [8]:
import statsmodels.api as sm
import itertools
AICs = {}
for k in range(1,len(predictorcols)+1):
    for variables in itertools.combinations(predictorcols, k):
        predictors  = X[list(variables)]
        predictors2 = sm.add_constant(predictors)
        est = sm.OLS(Y, predictors2)
        res = est.fit()
        AICs[variables] = res.aic    

In [9]:
AICs

{('SqFt',): 2930.399221278264,
 ('Bedrooms',): 2935.671458496874,
 ('Bathrooms',): 2936.1658574541634,
 ('Offers',): 2963.862468153879,
 ('Yes',): 2947.744826785076,
 ('East',): 2974.473573747804,
 ('North',): 2931.3623066402315,
 ('SqFt', 'Bedrooms'): 2915.2245344840526,
 ('SqFt', 'Bathrooms'): 2917.6684903192145,
 ('SqFt', 'Offers'): 2865.6942475349356,
 ('SqFt', 'Yes'): 2896.9093592727936,
 ('SqFt', 'East'): 2927.0624268771817,
 ('SqFt', 'North'): 2897.745654169053,
 ('Bedrooms', 'Bathrooms'): 2916.035689947397,
 ('Bedrooms', 'Offers'): 2909.8050956787056,
 ('Bedrooms', 'Yes'): 2900.1164490230194,
 ('Bedrooms', 'East'): 2936.0608986117104,
 ('Bedrooms', 'North'): 2908.6992372764653,
 ('Bathrooms', 'Offers'): 2907.5652435435345,
 ('Bathrooms', 'Yes'): 2911.69774341551,
 ('Bathrooms', 'East'): 2934.545689960338,
 ('Bathrooms', 'North'): 2902.5097472661846,
 ('Offers', 'Yes'): 2939.20844689237,
 ('Offers', 'East'): 2962.720064186388,
 ('Offers', 'North'): 2929.775806458391,
 ('Yes', 'E

In [11]:
from collections import Counter
c = Counter(AICs)
#c.most_common()    #最大前10
c.most_common()[::-10]

[(('SqFt', 'Bedrooms', 'Bathrooms', 'Offers', 'Yes', 'East', 'North'),
  2729.3189814012494),
 (('SqFt', 'Bedrooms', 'Bathrooms', 'Offers', 'Yes'), 2789.5148143560264),
 (('SqFt', 'Offers', 'East', 'North'), 2805.929045591597),
 (('SqFt', 'Bedrooms', 'Bathrooms', 'East', 'North'), 2827.1498026886024),
 (('Bedrooms', 'Bathrooms', 'Offers', 'Yes', 'East'), 2837.9283737790706),
 (('Bedrooms', 'Bathrooms', 'Offers', 'Yes'), 2845.973295559599),
 (('SqFt', 'Offers'), 2865.6942475349356),
 (('Bedrooms', 'Bathrooms', 'Offers', 'East'), 2874.0450207228523),
 (('Bedrooms', 'Bathrooms', 'Yes'), 2883.9535408052025),
 (('SqFt', 'Yes'), 2896.9093592727936),
 (('Bedrooms', 'North'), 2908.6992372764653),
 (('Bedrooms', 'Bathrooms'), 2916.035689947397),
 (('Bathrooms',), 2936.1658574541634)]