In [None]:
# tensortradeのtutorialをやってみる
from tensortrade.oms.instruments import Instrument

In [None]:
USD = Instrument("USD", 2, "U.S. Dollar")
TTC = Instrument("TTC", 8, "Tensor Trade Coin")

In [None]:
from gymnasium.spaces import Discrete
from tensortrade.env.default.actions import TensorTradeActionScheme
from tensortrade.env.default.rewards import TensorTradeRewardScheme
from tensortrade.env.generic import ActionScheme, TradingEnv, Renderer
from tensortrade.core import Clock
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.instruments import ExchangePair
from tensortrade.oms.wallets import Portfolio
from tensortrade.oms.orders import Order, proportion_order, TradeSide, TradeType
from ray.rllib.algorithms.ppo import PPOConfig
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt



In [None]:
class BSH(TensorTradeActionScheme):
    registered_name = "bsh"

    def __init__(self, cash: "Wallet", asset: "Wallet"):
        super().__init__()
        self.cash = cash
        self.asset = asset

        self.listeners = []
        self.action = 0

    @property
    def action_space(self):
        return Discrete(2)
    
    def attach(self, listener):
        self.listeners += [listener]
        return self
    
    def get_orders(self, action: int, portfolio: "Portfolio"):
        order = None

        if abs(action - self.action) > 0:
            src = self.cash if self.action == 0 else self.asset
            tgt = self.asset if self.action == 0 else self.cash
            order = proportion_order(portfolio, src, tgt, 1.0)
            self.action = action

        for listener in self.listeners:
            listener.on_action(action)

        return [order]
    
    def reset(self):
        super().reset()
        self.action = 0

In [None]:
class PBR(TensorTradeRewardScheme):
    registered_name = "pbr"
    
    def __init__(self, price: "Stream"):
        super().__init__()
        self.position = -1

        r = Stream.sensor(price, lambda p: p.value, dtype="float").diff()
        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")

        reward = (r * position).fillna(0).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, action: int):
        self.position = -1 if action == 0 else 1

    def get_reward(self, portfolio: "Portfolio"):
        return self.feed.next()["reward"]
    
    def reset(self):
        self.position = -1
        self.feed.reset()

In [None]:
class PositionChangeChart(Renderer):

    def __init__(self, color: str = "orange"):
        self.color = "orange"

    def render(self, env, **kwargs):
        history = pd.DataFrame(env.observer.renderer_history)

        actions = list(history.action)
        p = list(history.price)

        buy = {}
        sell = {}

        for i in range(len(actions) - 1):
            a1 = actions[i]
            a2 = actions[i + 1]

            if a1 != a2:
                if a1 == 0 and a2 == 1:
                    buy[i] = p[i]
                else:
                    sell[i] = p[i]

        buy = pd.Series(buy)
        sell = pd.Series(sell)

        fig, axs = plt.subplots(1, 2, figsize=(15, 5))

        fig.suptitle("Performance")
        axs[0].plot(np.arange(len(p)), p, label="price", color=self.color)
        axs[0].scatter(buy.index, buy.values, marker="^", color="green")
        axs[0].scatter(sell.index, sell.values, marker="^", color="red")
        axs[0].set_title("Trading Chart")

        performance_df = pd.DataFrame().from_dict(
            env.action_scheme.portfolio.performance, orient="index")
        performance_df.plot(ax=axs[1])
        axs[1].set_title("Net Worth")
        


In [None]:
import copy

import ray
import numpy as np
import pandas as pd

from ray import tune
from ray.tune.registry import register_env, _global_registry

import tensortrade.env.default as default
from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio

In [None]:
def create_env(config):
    x = np.arange(0, 2 * np.pi, 2 * np.pi / 1001)
    y = 50 * np.sin(3 * x) + 100

    # x = np.arange(0, 2 * np.pi, 2 * np.pi / 1000)
    p = Stream.source(y, dtype="float").rename("USD-TTC")

    bitfinex = Exchange("bitfinex", service=execute_order)(p)

    cash = Wallet(bitfinex, 100000 * USD)
    asset = Wallet(bitfinex, 0 * TTC)
    
    portfolio = Portfolio(USD, [
        cash, asset
    ])

    feed = DataFeed([
        p,
        p.rolling(window=10).mean().rename("fast"),
        p.rolling(window=50).mean().rename("medium"),
        p.rolling(window=100).mean().rename("slow"),
        p.log().diff().fillna(0).rename("lr")
    ])

    reward_scheme = PBR(price=p)
    action_scheme = BSH(cash=cash, asset=asset).attach(reward_scheme)
    renderer_feed = DataFeed([
        Stream.source(y, dtype="float").rename("price"),
        Stream.sensor(action_scheme, lambda s: s.action, dtype="float").rename("action")
    ])

    environment = default.create(
        feed=feed,
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        renderer_feed=renderer_feed,
        renderer=PositionChangeChart(),
        window_size=config["window_size"],
        max_allowed_loss=0.6
    )
    return environment

register_env("TradingEnv", create_env)

In [None]:
config = PPOConfig().training(
    gamma=0,
    lr=8e-6,
    vf_loss_coeff=0.5,
    entropy_coeff=0.01,
    lr_schedule=[
        [0, 1e-1],
        [int(1e2), 1e-2],
        [int(1e3), 1e-3],
        [int(1e4), 1e-4],
        [int(1e5), 1e-5],
        [int(1e6), 1e-6],
        [int(1e7), 1e-7]
    ],
    lambda_=0.72,
).environment(
    env="TradingEnv",
    env_config={
        "window_size": 25
    },
    clip_rewards=True,
).framework(
    framework="torch",
).debugging(
    log_level="DEBUG",
).env_runners(
    observation_filter="MeanStdFilter",
    # ignore_worker_failures=True,
    num_env_runners=1,
).resources(
    num_gpus=0
)

In [None]:
analysis = tune.run(
    "PPO",
    stop={
        'env_runners/episode_reward_mean': 500
    },
    config=config.to_dict(),
    checkpoint_at_end=True
)

In [None]:
#import ray.rllib.algorithms.ppo as ppo
#import ray.rllib.agents.ppo as ppo

checkpoints = analysis.get_best_trial(
    metric="env_runners/episode_reward_mean",
    mode="max"
).checkpoint

In [None]:
checkpoints

In [None]:
algo = config.build()
algo.restore(checkpoints.path)

In [None]:
env = create_env({"window_size": 25})

episode_reward = 0
done = False
obs = env.reset()
obs = obs[0]

In [None]:
while not done:
    action = algo.compute_single_action(obs)
    obs, reward, done, truncated, info = env.step(action)
    episode_reward += reward
    print(action, reward, info)

In [None]:
env.render()

In [None]:
episode_reward

In [None]:
import pprint

In [None]:
algo = config.build()
for i in range(10):
    result = algo.train()
    result.pop("config")
    pprint.pprint(result)

In [None]:
env = create_env({"window_size": 25})

episode_reward = 0
done = False
obs = env.reset()
obs = obs[0]

In [None]:
while not done:
    action = algo.compute_single_action(obs)
    obs, reward, done, truncated, info = env.step(action)
    episode_reward += reward
    print(action, reward, info)

In [None]:
env.render()