In [1]:
from Core.DTO import *
from Core.Relations import *
import pandas as pd
import numpy as np


## Input de dados

In [2]:
# 🔹 Configuração do banco (pode ser reutilizada para qualquer ModelDTOo)
mongo_url = "mongodb://localhost:27017/"
db_manager = DatabaseManager('mysql+pymysql://root:000000000@localhost/mydb', mongo_url = mongo_url)
session = db_manager.get_session()
dataset_repo = DatasetRepository(session)
conversor = ConverterDTO(session=session)
mongo_db = db_manager.get_mongo_db()


### Seoul

In [3]:
import re
from datetime import datetime

def remove_parentheses_content(text):
    return re.sub(r'\([^)]*\)', '', text)

project_name = 'SeoulBike'

dataset_dto = dataset_repo.filter_by({'name' : project_name}).first()
if not(dataset_dto):
    df = pd.read_csv('data//SeoulBikeData.csv', encoding='latin1')
    df.columns = [remove_parentheses_content(i.lower()).strip().replace(' ','_') for i in df.columns]
    df['timestamp'] = df['date'].map(lambda x : datetime.strptime(x,  "%d/%m/%Y"))
    df = df.drop(columns = 'date')
    df_melt = df.melt(id_vars = 'timestamp', value_vars = df.drop(columns = 'timestamp').columns)
    df_melt['type'] = df_melt['value'].map(lambda x : type(x).__name__)
    df_melt.rename(columns ={'variable':'name'}, inplace=True)
    df_melt['project'] = project_name
    df_melt['prediction'] = 0

    dataset_dto = DatasetDTO(name = project_name, targetFeature = 'rented_bike_count' )
    dataset_dto.save_data_mongo(mongo_db ,df = df_melt)
    dataset_dto.instructions = {'project':project_name,'prediction':0}
    dataset_repo.save(dataset_dto)
    dataset_dto = dataset_repo.filter_by({'name' : project_name}).first()

dataset_dto.load_data_from_mongo(mongo_db)
dataset = dataset_dto.dataset 

In [4]:
dataset.df

Unnamed: 0,timestamp,prediction,dew_point_temperature,functioning_day,holiday,hour,humidity,rainfall,rented_bike_count,seasons,snowfall,solar_radiation,temperature,visibility,wind_speed
0,2017-12-01,0,-3.4,Yes,No Holiday,23,84,0.0,930,Winter,0.0,1.16,3.0,2000,4.2
1,2017-12-02,0,-3.3,Yes,No Holiday,23,87,0.0,618,Winter,0.0,1.12,7.6,1955,2.6
2,2017-12-03,0,3.8,Yes,No Holiday,23,92,2.5,621,Winter,0.0,0.69,8.0,1296,2.5
3,2017-12-04,0,2.4,Yes,No Holiday,23,89,0.1,937,Winter,0.0,1.17,4.4,2000,5.8
4,2017-12-05,0,-9.4,Yes,No Holiday,23,59,0.0,812,Winter,0.0,0.64,-0.5,2000,3.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2018-11-26,0,1.8,Yes,No Holiday,23,93,0.0,1708,Autumn,0.3,1.74,13.2,714,1.6
361,2018-11-27,0,7.5,Yes,No Holiday,23,88,0.0,1577,Autumn,0.0,0.87,12.5,763,2.7
362,2018-11-28,0,-8.7,Yes,No Holiday,23,38,0.0,1540,Autumn,0.0,1.86,10.5,1880,3.0
363,2018-11-29,0,-4.2,Yes,No Holiday,23,59,0.0,1635,Autumn,0.0,0.64,6.5,1999,2.6


In [5]:
model = OHEDecisionTreeRegressor()
task = SeoulBikeTrainingTask (dataset = dataset) 
run = Run()
run.execute( task = task,  model = model, taskParameters={'end_date':'2017-12-31'})
run_dto = conversor.converter_object_to_dto(run)
run_repo = RunRepository(session=session)
run_repo.save(run_dto)

In [6]:
task = SeoulBikePredictionTask (dataset = dataset) 
run = Run()
run.execute( task = task,  model = model, taskParameters={'end_date':'2017-12-31'})
run_dto = conversor.converter_object_to_dto(run)
run_repo = RunRepository(session=session)
run_repo.save(run_dto)

In [9]:
run.measures

[<Core.Relations.Measure.Measure at 0x11e48132770>]

In [5]:
#dataset = dataset_repo.filter_by({'name' : 'SeoulBike'}).first().dataset
#model = OHEDecisionTreeRegressor()
task = SeoulBikePredictionTask (dataset = dataset) 
datas = pd.date_range(start="2018-01-01", end="2018-11-30", freq="MS")  # MS = Month Start
for data_inicio in datas:
    data_fim = pd.date_range(start=data_inicio, periods=1, freq="ME")[0]
    run = Run()
    run.execute( task = task,  model = model, taskParameters={'start_date':data_inicio,'end_date':data_fim})
    run_dto = conversor.converter_object_to_dto(run)
    run_repo = RunRepository(session=session)
    run_repo.save(run_dto)