In [1]:
import pandas as pd

In [2]:
data_path="data/YNDX_160101_161231.csv"
df=pd.read_csv(data_path)

In [10]:
import enum
import gym

class Actions(enum.Enum):
    Skip=0
    Buy=1
    Close=2
    
class StocksEnv(gym.Env):
    metadata={"render.modes":["human"]}
    
    @classmethod
    def from_dir(cls,data_dir,**kwargs):
        prices={file:data.load_relative(file) for file in data.price_files(data_dir)}
        return StocksEnv(prices, **kwargs)
    
    def __init__(self, prices,bars_count=DEFAULT_BARS_COUNT,
                 commision=DEFAULT_COMMISION_PERC,reset_on_close=True,state_1d=False,
                 random_ofs_on_reset=True,reward_on_close=False,volumes=False):
        assert isinstance(prices,dict)
        self._prices=prices
        if state_1d:
            self._state=State1D(bars_count, commision,reset_on_close,
                                reward_on_close=reward_on_close, volumes=volumes)
        else:
            self._state=State(bars_count,commision,reset_on_close,
                              reward_on_close=reward_on_close,volumes=volumes)
        self.action_space=gym.spaces.Discrete(n=len(Actions))
        self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=self._state.shape,dtype=np.float32)
        self.random_ofs_on_reset=random_ofs_on_reset
        self._seed()
        
    def reset(self):
        # make selection of the instrument and its offset. Then reset the state
        self._instrument=self.np_random.choice(list(self.prices.keys()))
        prices=self._prices[self._instrument]
        bars=self._state.bars_count
        if self.random_ofs_on_reset:
            offset=self.np_random.choice(prices.high.shape[0]-bars*10)+bars
        else:
            offset=bars
        self._state.reset(prices,offset)
        return self._state.encode()
    
    def step(self,action_idx):
        action=Actions(action_idx)
        reward,done=self._state.step(action)
        obs=self._state.encode()
        info={"instrument": self._instrument,
              "offset": self._state._offset}
        return obs, reward, done, info
    
    def render(self, mode="human", close=False):
        pass
    def close(self):
        pass
    
    def seed(self, seed=None):
        self.np_random, seed1=seeding.np_random(seed)
        seed2=seeding.hash_seed(seed1+1)%2**31
        return [seed1, seed2]
    

    
class State:
    def __init__(self, bars_count, commision_perc, reset_on_close, reward_on_close=True, volumes=True):
        assert isinstance(bars_count, int)
        assert bars_count>0
        assert isinstance(commision_perc,float)
        assert commision_perc>=0.
        assert isinstance(reset_on_close,bool)
        assert isinstance(reward_on_close,bool)
        self.bars_count=bars_count
        self.commission_perc=commision_perc
        self.reset_on_close=reset_on_close
        self.reward_on_close=reward_on_close
        self.volumes=volumes
        
    def reset(self,prices,offset):
        assert isinstance(prices,data.Prices)
        assert offset >= self.bars_count-1
        self.have_position=False
        self.open_price=0.
        self._prices=prices
        self._offset=offset
    
    @property
    def shape(self):
        # [h,l,c]*bars+position_flag+rel_profit (since open)
        if self.volumes:
            return (4*self.bars_count+1+1,)
        else:
            return (3*self.bars_count+1+1,)
    
    def encode(self):
        '''
        Convert current state into numpy array
        '''
        res=np.ndarray(shape=self.shape,dtype=np.float32)
        shift=0
        for bar_idx in range(-self.bars_count+1, 1):
            res[shift]=self._prices.high[self._offset+bar_idx]
            shift+=1
            res[shift]=self._prices.low[self._offset+bar_idx]
            shift+=1
            res[shift]=self._prices.close[self._offset+bar_idx]
            shift+=1
            if self.volumes:
                res[shift]=self._prices.volume[self._offset+bar_idx]
                shift+=1
            res[shift]=float(self.have_position)
            shift+=1
            if not self.have_position:
                res[shift]=0.
            else:
                res[shift]=(self._cur_close()-self.open_price)/self.open_price
            return res
    
    def _cur_close(self):
        open=self._prices.open[self._offset]
        rel_close=self._prices.close[self._offset]
        return open*(1.0+rel_close)
    
    def step(self,action):
        assert isinstance(action,Actions)
        reward=0.
        done=False
        close=self._cur_close()
        
        if action==Actions.Buy and not self.have_position:
            self.have_position=True
            self.open_price=close
            reward-=self.commision_perc
        elif action==Actions.Close and self.have_position:
            reward-=self.commission_perc
            done|=self.reset_on_close
            if self.reward_on_close:
                reward+=100.*(close-self.open_prices)/self.open_prices
                self.have_position=False
                self.open_price=0.
        
        self._offset+=1
        prev_close=close
        close=self._cur_close()
        done|=self._offset>=self._prices.close.shape[0]-1
        if self.have_position and not self.reward_on_close:
            reward+=100.0*(close-prev_close)/prev_close
        
        return reward,done
    
class State1D(State):
    @property
    def shape(self):
        if self.volumes:
            return (6,self.bars_count)
        else:
            return (5,self.bars_count)
        
    def encode(self):
        res=np.zeros(shape=self.shape, dtype=np.float32)
        ofs=self.bars_count-1
        res[0]=self._prices.high[self._offset-ofs:self_offset+1]
        res[1]=self._prices.low[self._offset-ofs:self._offset+1]
        res[2]=self._prices.close[self._offset-ofs:self._offset+1]
        if self.volumes:
            res[3]=self._prices.volume[self._offset-ofs:self._offset+1]
            dst=4
        else:
            dst=3
        if self.have_position:
            res[dst]=1.0
            res[dst+1]=(self._cur_close()-self.open_price)/self.open_price    
        return res
        

NameError: name 'DEFAULT_BARS_COUNT' is not defined

In [13]:
# models
from torch import nn

class SimpleFFDQN(nn.Module):
    def __init__(self, obs_len, actions_n):
        super(SimpleFFDQN,self).__init__()
        self.fc_val=nn.Sequential(
            nn.Linear(obs_len,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,1)
        )
        self.fc_adv=nn.Sequential(
            nn.Linear(obs_len,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,actions_n)
        )
        
    def forward(self,x):
        val=self.fc_val(x)
        adv=self.fc_adv(x)
        return val+adv-adv.mean()
    
class DQNConv1D(nn.Module):
    def __init__(self, shape,actions_n):
        super(DQNConv1D,self).__init__()
        
        self.conv=nn.Sequential(
            nn.Conv1d(shape[0],128,5),
            nn.ReLU(),
            nn.Conv1d(128,128,5),
            nn.ReLU()
        )
        
        out_size=self._get_conv_out(shape)
        self.fc_val=nn.Sequential(
            nn.Linear(out_size,512),
            nn.ReLU(),
            nn.Linear(512,1)
        )
        
        self.fc_adv=nn.Sequential(
            nn.Linear(out_size,512),
            nn.ReLU(),
            nn.Linear(512,actions_n)
        )
        
    def _get_conv_out(self, shape):
        o=self.conv(torch.zeros(1,*shape))
        return int(np.prod(o.size()))
    
    def forward(self,x):
        conv_out=self.conv(x).view(x.size()[0],-1)
        val=self.fc_val(conv_out)
        adv=self.fc_adv(conv_out)
        return val+adv-adv.mean()
    
    