In [1]:
# Import data
import pandas as pd
data = pd.read_csv('C:/Users/wkula/Uczelnia/Databases/houses_to_rent.csv')
data.rename(columns={'Unnamed: 0':'id'}, inplace=True)

print(data)

        id  city  area  rooms  bathroom  parking spaces floor     animal  \
0        0     1   240      3         3               4     -      acept   
1        1     0    64      2         1               1    10      acept   
2        2     1   443      5         5               4     3      acept   
3        3     1    73      2         2               1    12      acept   
4        4     1    19      1         1               0     -  not acept   
...    ...   ...   ...    ...       ...             ...   ...        ...   
6075  6075     1    50      2         1               1     2      acept   
6076  6076     1    84      2         2               1    16  not acept   
6077  6077     0    48      1         1               0    13      acept   
6078  6078     1   160      3         2               2     -  not acept   
6079  6079     1    60      2         1               1     4      acept   

          furniture      hoa rent amount property tax fire insurance     total  
0     

In [2]:
# Read headers

print(data.columns)

Index(['id', 'city', 'area', 'rooms', 'bathroom', 'parking spaces', 'floor',
       'animal', 'furniture', 'hoa', 'rent amount', 'property tax',
       'fire insurance', 'total'],
      dtype='object')


In [3]:
# Describe data

print(data.city.describe())

count    6080.000000
mean        0.863322
std         0.343535
min         0.000000
25%         1.000000
50%         1.000000
75%         1.000000
max         1.000000
Name: city, dtype: float64


In [4]:
# Data information

print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6080 entries, 0 to 6079
Data columns (total 14 columns):
id                6080 non-null int64
city              6080 non-null int64
area              6080 non-null int64
rooms             6080 non-null int64
bathroom          6080 non-null int64
parking spaces    6080 non-null int64
floor             6080 non-null object
animal            6080 non-null object
furniture         6080 non-null object
hoa               6080 non-null object
rent amount       6080 non-null object
property tax      6080 non-null object
fire insurance    6080 non-null object
total             6080 non-null object
dtypes: int64(6), object(8)
memory usage: 665.1+ KB
None


In [5]:
# Unique values

all_city = data['area'].unique()
print("Area array: {0}".format(all_city))
print("Num of unique arrays: {0}".format(len(all_city)))


Area array: [  240    64   443    73    19    13    55    82    32    60    20   375
    92    56   188   100   192    93   155    80    65   320   180    31
   105   300   150   130    70   120   250    50    94   176   191    25
   800   340    90   650   230   190   205    96   396    74    30   400
   220   750   140    22   345    46    35    72    83    38    62    85
   134   200   170    58    87    42   145   500   265   126    29    59
   290   172    40   280   115   540    76   236   270    48   198   185
   440   119    57    51   169    33   225   260   243    45    47    68
   187    17    67   118    43   195   141    16   112   128   110   107
    97   349    84   286   196   184   117    11   360   122   138    52
   160   209   450   900   420   148    75    44    98   322   600   520
    86   550   298    41    89   177    18    66   515   350    78   106
   153   700   640   103   113   164   136   163    79   104   203    53
    27   175   326    23   210    91   

In [6]:
# Prepare the environment
from sqlalchemy import Column, Integer, String, Boolean, ForeignKey, UniqueConstraint, CheckConstraint
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base

db_string = "postgres://postgres:postgres@localhost:5432/Lab4"

engine = create_engine(db_string)

Base = declarative_base()

In [7]:
# Creating schema

class House(Base):
    __tablename__ = 'houses'
    id = Column(Integer, primary_key=True)
    id_costs = Column(Integer, ForeignKey("costs.id"))
    id_places = Column(Integer, ForeignKey("places.id"))
    id_benefits = Column(Integer, ForeignKey("benefits.id"))

    def __repr__(self):
        return "<houses(id='{id}', costs={cost}, place={place}, benefits={benefits})>".format(
            id=self.id, cost=self.id_costs, place=self.id_places, benefits=self.id_benefits)
    
class Cost(Base):
    __tablename__ = 'costs'
    __table_args__ = (
        CheckConstraint('length(hoa) > 0'),
        CheckConstraint('length(rent_amount) > 0'),
        CheckConstraint('length(property_tax) > 0'),
        CheckConstraint('length(fire_insurance) > 0'),
        CheckConstraint('length(total) > 0')
    )
    id = Column(Integer, primary_key=True)
    hoa = Column(String(20))
    rent_amount = Column(String(20))
    property_tax = Column(String(20))
    fire_insurance = Column(String(20))
    total = Column(String(20))

    def __repr__(self):
        return "<costs(id='{id}', hoa={hoa}, rent_amount={rent}, property_tax={tax}, fire_insurance={fire}, total={total})>".format(
            id=self.id, hoa=self.hoa, rent=self.rent_amount, tax=self.property_tax, fire=self.fire_insurance, total=self.total)

class Place(Base):
    __tablename__ = 'places'
    __table_args__ = (
        CheckConstraint('area > 0'),
        CheckConstraint('rooms >= 0'),
        CheckConstraint('bathroom >= 0'),
        CheckConstraint('parking_spaces >= 0')
    )
    id = Column(Integer, primary_key=True)
    city = Column(Boolean, nullable = False)
    area = Column(Integer)
    rooms = Column(Integer)
    bathroom = Column(Integer)
    parking_spaces = Column(Integer)

    def __repr__(self):
        return "<places(id='{id}', city={city}, area={area}, rooms={rooms}, bathroom={bathroom}, parking_spaces={parking})>".format(
            id=self.id, city=self.city, area=self.area, rooms=self.rooms, bathroom=self.bathroom, parking=self.parking_spaces)

class Benefits(Base):
    __tablename__ = 'benefits'
    __table_args__ = (
        CheckConstraint('length(animal) > 0'),
        CheckConstraint('length(furniture) > 0')
    )
    id = Column(Integer, primary_key=True)
    floor = Column(Integer, nullable=True)
    animal = Column(String(20))
    furniture = Column(String(20))

    def __repr__(self):
        return "<parameters(id='{id}', floor={floor}, animal={animal}, furniture={furniture})>".format(
            id=self.id, floor=self.floor, animal=self.animal, furniture=self.furniture)

Base.metadata.create_all(engine)

In [8]:
# Get Costs

Costs_list = pd.DataFrame(data[['hoa','rent amount','property tax','fire insurance','total']]).drop_duplicates().dropna().reset_index().drop(columns = ['index'])

Costs_list.index.name = 'id'

Costs_list


Unnamed: 0_level_0,hoa,rent amount,property tax,fire insurance,total
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,R$0,"R$8,000","R$1,000",R$121,"R$9,121"
1,R$540,R$820,R$122,R$11,"R$1,493"
2,"R$4,172","R$7,000","R$1,417",R$89,"R$12,680"
3,R$700,"R$1,250",R$150,R$16,"R$2,116"
4,R$0,"R$1,200",R$41,R$16,"R$1,257"
...,...,...,...,...,...
5664,R$0,"R$1,390",R$0,R$18,"R$1,408"
5665,R$420,"R$1,150",R$0,R$15,"R$1,585"
5666,R$768,"R$2,900",R$63,R$37,"R$3,768"
5667,R$250,R$950,R$42,R$13,"R$1,255"


In [9]:
# Get Places

Places_list = pd.DataFrame(data[['city','area','rooms','bathroom','parking spaces']]).drop_duplicates().dropna().reset_index().drop(columns = ['index'])

Places_list.index.name = 'id'

Places_list

Unnamed: 0_level_0,city,area,rooms,bathroom,parking spaces
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1,240,3,3,4
1,0,64,2,1,1
2,1,443,5,5,4
3,1,73,2,2,1
4,1,19,1,1,0
...,...,...,...,...,...
3026,1,130,3,3,0
3027,1,114,3,2,3
3028,0,600,5,5,4
3029,1,88,2,2,1


In [10]:
# Get benefits

Benefits_list = pd.DataFrame(data[['floor','animal','furniture']]).drop_duplicates().dropna().reset_index().drop(columns = ['index'])

Benefits_list.index.name = 'id'

Benefits_list

Unnamed: 0_level_0,floor,animal,furniture
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,-,acept,furnished
1,10,acept,not furnished
2,3,acept,furnished
3,12,acept,not furnished
4,-,not acept,not furnished
...,...,...,...
120,26,not acept,furnished
121,27,not acept,furnished
122,32,not acept,furnished
123,51,acept,not furnished


In [11]:
# Prepare id columns

data['costs_id'] = data['id']
data['places_id'] = data['id']
data['benefits_id'] = data['id']

for index, row in Costs_list.iterrows():
    data.loc[(data['hoa'] == row['hoa']) & 
             (data['rent amount'] == row['rent amount']) &
             (data['property tax'] == row['property tax']) & 
             (data['fire insurance'] == row['fire insurance']) &
             (data['total'] == row['total']), 'costs_id'] = index

for index, row in Places_list.iterrows():
    data.loc[(data['city'] == row['city']) &
             (data['area'] == row['area']) &
             (data['rooms'] == row['rooms']) &
             (data['bathroom'] == row['bathroom']) &
             (data['parking spaces'] == row['parking spaces']), 'places_id'] = index

for index, row in Benefits_list.iterrows():
    data.loc[(data['floor'] == row['floor']) &
             (data['animal'] == row['animal']) &
             (data['furniture'] == row['furniture']), 'benefits_id'] = index   
    

In [12]:
# Get houses

Houses_list = pd.DataFrame(data[['costs_id','places_id','benefits_id']]).drop_duplicates().dropna().reset_index().drop(columns = ['index'])

Houses_list.index.name = 'id'

Houses_list

Unnamed: 0_level_0,costs_id,places_id,benefits_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,0
1,1,1,1
2,2,2,2
3,3,3,3
4,4,4,4
...,...,...,...
5877,5665,57,5
5878,5666,2739,102
5879,5667,116,13
5880,1356,504,4


In [14]:
#Sending data to database
Costs_list = Costs_list.rename(columns = {'rent amount' : 'rent_amount', 'property tax' : 'property_tax', 'fire insurance' : 'fire_insurance'})
Places_list = Places_list.rename(columns = {'parking spaces' : 'parking_spaces'})

Places_list['city'] = Places_list['city'].astype('bool')
Places_list

Unnamed: 0_level_0,city,area,rooms,bathroom,parking_spaces
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,True,240,3,3,4
1,False,64,2,1,1
2,True,443,5,5,4
3,True,73,2,2,1
4,True,19,1,1,0
...,...,...,...,...,...
3026,True,130,3,3,0
3027,True,114,3,2,3
3028,False,600,5,5,4
3029,True,88,2,2,1


In [16]:
Costs_list.to_sql('costs',engine, if_exists='append')
Places_list.to_sql('places',engine, if_exists='append')
Benefits_list.to_sql('benefits',engine, if_exists='append')
Houses_list.to_sql('houses',engine, if_exists='append')
