From ca78a708c69cbf2920d4f4d2a5ecaeea9e9098d5 Mon Sep 17 00:00:00 2001
From: Xiao-Yang Liu <xl2427@columbia.edu>
Date: Thu, 3 Mar 2022 13:37:40 -0500
Subject: [PATCH] Revert "Shenlei151 flexibility enhance"

---
 RL_stock.py                                   | 133 ---
 finrl/drl_agents/stablebaselines3/models.py   |   2 -
 .../env_stock_trading/env_stocktrading.py     | 991 +++++++++---------
 3 files changed, 484 insertions(+), 642 deletions(-)
 delete mode 100644 RL_stock.py

diff --git a/RL_stock.py b/RL_stock.py
deleted file mode 100644
index c7311b622..000000000
--- a/RL_stock.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# import packages
-
-from finrl.apps import config
-import pandas as pd
-import numpy as np
-import matplotlib
-import matplotlib.pyplot as plt
-# matplotlib.use('Agg')
-import datetime
-
-from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader
-from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
-from finrl.finrl_meta.env_stock_trading.env_stocktrading import StockTradingEnv
-from finrl.drl_agents.stablebaselines3.models import DRLAgent
-from finrl.finrl_meta.data_processor import DataProcessor
-
-from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
-from pprint import pprint
-
-import sys
-sys.path.append("../FinRL-Library")
-
-import itertools
-
-import os
-if not os.path.exists("./" + config.DATA_SAVE_DIR):
-    os.makedirs("./" + config.DATA_SAVE_DIR)
-if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
-    os.makedirs("./" + config.TRAINED_MODEL_DIR)
-if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
-    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
-
-
-# read data
-df=pd.read_csv("new.csv").iloc[:,1:]
-
-# data preprocess
-# print(df)
-# df.loc[0,'USD (PM)']=-1
-# df=df.fillna(-1)
-list_ticker=['Gold','Bitcoin']
-list_date=list(pd.date_range(df['date'].min(),df['date'].max()).astype(str))
-combination=list(itertools.product(list_date,list_ticker))
-
-df.columns=['date','Bitcoin','Gold']
-df=df.sort_values(['date'])
-processed=df.melt(id_vars=['date'],value_vars=['Gold','Bitcoin'],var_name='tic',value_name='close')
-# print(df.info())
-# print(processed.info())
-# print(processed)
-processed['Disable']=processed['close'].apply(pd.isna)
-processed[processed['tic']=='Gold']=processed[processed['tic']=='Gold'].fillna(method='pad')
-processed.loc[0,'close']=1324
-processed_full=processed.sort_values(['date','tic'],ignore_index=True)
-# processed_full=processed
-# print(processed_full)
-# print(processed_full.isna().any())
-# processed_full.close=processed.close.astype('object')
-# print(processed_full.info())
-time=datetime.date(2016,9,11)
-
-# initial = [cash, initial_stock1_share, initial_stock2_share]
-initial=[1000,0,0]
-
-# initial dataframe used to store the result of model
-all_action=pd.DataFrame(columns=['date','Bitcoin','Gold'])
-all_value=pd.DataFrame(columns=['date','account_value'])
-all_state=pd.DataFrame(columns=['cash','Bitcoin_price','Gold_price','Bitcoin_num','Gold_num','Bitcoin_Disable','Gold_Disable'])
-
-# trainning & trading process
-while(time+datetime.timedelta(days=30)<datetime.date(2021,9,10)):
-    train = data_split(processed_full, str(time),str(time+datetime.timedelta(days=31)))
-    trade = data_split(processed_full, str(time+datetime.timedelta(days=30)),str(time+datetime.timedelta(days=60)))
-    # print(len(processed_full))
-    # print(train)
-    # print(trade)
-    # print(train.iloc[-1][:])
-
-    # print(trade.loc[0]['close'])
-
-    stock_dimension=len(train.tic.unique())
-    # print("initial asset========", initial[0] + sum(initial[1:1 + stock_dimension] * trade.loc[0]['close']))
-    state_space=3*stock_dimension+1
-    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
-    env_kwargs = {
-        "hmax": 10,
-        "initial_list": initial, # Pass a initial state list to build trading env, instead of simply pass initial cash
-        "buy_cost_pct": [0.01,0.02], # Different stock may need to have different cost of trading (buy or sell) in some specific problems
-        "sell_cost_pct": [0.01,0.02],
-        "state_space": state_space,
-        "stock_dim": stock_dimension,
-        "tech_indicator_list": ['Disable'], # there may be some dates that a stock is unable to trade
-        "action_space": stock_dimension,
-        "reward_scaling": 1e-4,
-        "model_name":"stock exchange_SAC_coor",
-        "mode":"alpha_Bitcoin=0.01, alpha_Gold=0.02"
-
-    }
-
-    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
-
-    env_train, _ = e_train_gym.get_sb_env()
-    # print(type(env_train))
-    agent = DRLAgent(env = env_train)
-    model_PPO = agent.get_model("sac")
-    trained_PPO = agent.train_model(model=model_PPO,
-                                 tb_log_name='sac',
-                                 total_timesteps=600)
-
-    e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
-    # print("initial=======",initial)
-    df_account_value, df_actions,df_state = DRLAgent.DRL_prediction(
-        model=trained_PPO,
-        environment = e_trade_gym)
-    # print("first day asset:",df_account_value.iloc[0]['account_value'])
-    df_actions.to_csv('action.csv')
-    print("==============Get Backtest Results===========")
-    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
-    print(df_account_value)
-    perf_stats_all = backtest_stats(account_value=df_account_value)
-    # holding_num_share=backtest_stats()
-    perf_stats_all = pd.DataFrame(perf_stats_all)
-    perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')
-    # for Serial training, have nothing to do with FinRL lib
-    initial=[df_state.iloc[-1]['cash'],df_state.iloc[-1]['Bitcoin_num'],df_state.iloc[-1]['Gold_num']]
-    time=time + datetime.timedelta(days=30)
-    all_action=pd.concat([all_action,df_actions],axis=0)
-    all_value=pd.concat([all_value,df_account_value],axis=0)
-    all_state=pd.concat([all_state,df_state],axis=0)
-
-all_value.to_csv('all_value.csv')
-all_action.to_csv('all_action.csv')
-all_state.to_csv('all_state.csv')
\ No newline at end of file
diff --git a/finrl/drl_agents/stablebaselines3/models.py b/finrl/drl_agents/stablebaselines3/models.py
index bdbeb8437..ac079db7f 100644
--- a/finrl/drl_agents/stablebaselines3/models.py
+++ b/finrl/drl_agents/stablebaselines3/models.py
@@ -110,7 +110,6 @@ def DRL_prediction(model, environment, deterministic=True):
         """make a prediction"""
         account_memory = []
         actions_memory = []
-        state_memory=[] #add memory pool to store states
         test_env.reset()
         for i in range(len(environment.df.index.unique())):
             action, _states = model.predict(test_obs, deterministic=deterministic)
@@ -120,7 +119,6 @@ def DRL_prediction(model, environment, deterministic=True):
             if i == (len(environment.df.index.unique()) - 2):
                 account_memory = test_env.env_method(method_name="save_asset_memory")
                 actions_memory = test_env.env_method(method_name="save_action_memory")
-                state_memory=test_env.env_method(method_name="save_state_memory") # add current state to state memory
             if dones[0]:
                 print("hit end!")
                 break
diff --git a/finrl/finrl_meta/env_stock_trading/env_stocktrading.py b/finrl/finrl_meta/env_stock_trading/env_stocktrading.py
index 4218671ba..9fcc9c245 100644
--- a/finrl/finrl_meta/env_stock_trading/env_stocktrading.py
+++ b/finrl/finrl_meta/env_stock_trading/env_stocktrading.py
@@ -1,507 +1,484 @@
-import gym
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from gym import spaces
-from gym.utils import seeding
-from stable_baselines3.common.vec_env import DummyVecEnv
-
-matplotlib.use("Agg")
-
-# from stable_baselines3.common.logger import Logger, KVWriter, CSVOutputFormat
-
-
-class StockTradingEnv(gym.Env):
-    """A stock trading environment for OpenAI gym"""
-
-    metadata = {"render.modes": ["human"]}
-
-    def __init__(
-        self,
-        df,
-        stock_dim,
-        hmax,
-        initial_list,
-        # initial_amount,
-        buy_cost_pct,
-        sell_cost_pct,
-        reward_scaling,
-        state_space,
-        action_space,
-        tech_indicator_list,
-        turbulence_threshold=None,
-        risk_indicator_col="turbulence",
-        make_plots=False,
-        print_verbosity=10,
-        day=0,
-        initial=True,
-        previous_state=[],
-        model_name="",
-        mode="",
-        iteration="",
-    ):
-        self.day = day
-        self.df = df
-        self.stock_dim = stock_dim
-        self.hmax = hmax
-        self.initial_list=initial_list
-        self.initial_amount = initial_list[0] # get the initial cash
-        self.buy_cost_pct = buy_cost_pct
-        self.sell_cost_pct = sell_cost_pct
-        self.reward_scaling = reward_scaling
-        self.state_space = state_space
-        self.action_space = action_space
-        self.tech_indicator_list = tech_indicator_list
-        self.action_space = spaces.Box(low=-1, high=1, shape=(self.action_space,))
-        self.observation_space = spaces.Box(
-            low=-np.inf, high=np.inf, shape=(self.state_space,)
-        )
-        self.data = self.df.loc[self.day, :]
-        self.terminal = False
-        self.make_plots = make_plots
-        self.print_verbosity = print_verbosity
-        self.turbulence_threshold = turbulence_threshold
-        self.risk_indicator_col = risk_indicator_col
-        self.initial = initial
-        self.previous_state = previous_state
-        self.model_name = model_name
-        self.mode = mode
-        self.iteration = iteration
-        # initalize state
-        self.state = self._initiate_state()
-
-        # initialize reward
-        self.reward = 0
-        self.turbulence = 0
-        self.cost = 0
-        self.trades = 0
-        self.episode = 0
-        # memorize all the total balance change
-        self.asset_memory = [self.initial_amount+np.sum(np.array(self.initial_list[1:])*np.array(self.state[1:1+self.stock_dim]))] # the initial total asset is calculated by cash + sum (num_share_stock_i * price_stock_i)
-        self.rewards_memory = []
-        self.actions_memory = []
-        self.state_memory=[] # we need sometimes to preserve the state in the middle of trading process 
-        self.date_memory = [self._get_date()]
-        #         self.logger = Logger('results',[CSVOutputFormat])
-        # self.reset()
-        self._seed()
-
-    def _sell_stock(self, index, action):
-        def _do_sell_normal():
-            if self.state[index + 2*self.stock_dim + 1]!=True : # check if the stock is able to sell, for simlicity we just add it in techical index
-            # if self.state[index + 1] > 0: # if we use price<0 to denote a stock is unable to trade in that day, the total asset calculation may be wrong for the price is unreasonable
-                # Sell only if the price is > 0 (no missing data in this particular date)
-                # perform sell action based on the sign of the action
-                if self.state[index + self.stock_dim + 1] > 0:
-                    # Sell only if current asset is > 0
-                    sell_num_shares = min(
-                        abs(action), self.state[index + self.stock_dim + 1]
-                    )
-                    sell_amount = (
-                        self.state[index + 1]
-                        * sell_num_shares
-                        * (1 - self.sell_cost_pct[index])
-                    )
-                    # update balance
-                    self.state[0] += sell_amount
-
-                    self.state[index + self.stock_dim + 1] -= sell_num_shares
-                    self.cost += (
-                        self.state[index + 1] * sell_num_shares * self.sell_cost_pct[index]
-                    )
-                    self.trades += 1
-                else:
-                    sell_num_shares = 0
-            else:
-                sell_num_shares = 0
-
-            return sell_num_shares
-
-        # perform sell action based on the sign of the action
-        if self.turbulence_threshold is not None:
-            if self.turbulence >= self.turbulence_threshold:
-                if self.state[index + 1] > 0:
-                    # Sell only if the price is > 0 (no missing data in this particular date)
-                    # if turbulence goes over threshold, just clear out all positions
-                    if self.state[index + self.stock_dim + 1] > 0:
-                        # Sell only if current asset is > 0
-                        sell_num_shares = self.state[index + self.stock_dim + 1]
-                        sell_amount = (
-                            self.state[index + 1]
-                            * sell_num_shares
-                            * (1 - self.sell_cost_pct[index])
-                        )
-                        # update balance
-                        self.state[0] += sell_amount
-                        self.state[index + self.stock_dim + 1] = 0
-                        self.cost += (
-                            self.state[index + 1] * sell_num_shares * self.sell_cost_pct
-                        )
-                        self.trades += 1
-                    else:
-                        sell_num_shares = 0
-                else:
-                    sell_num_shares = 0
-            else:
-                sell_num_shares = _do_sell_normal()
-        else:
-            sell_num_shares = _do_sell_normal()
-
-        return sell_num_shares
-
-    def _buy_stock(self, index, action):
-        def _do_buy():
-            if self.state[index + 2*self.stock_dim+ 1] !=True: # check if the stock is able to buy
-            # if self.state[index + 1] >0:
-                # Buy only if the price is > 0 (no missing data in this particular date)
-                available_amount = self.state[0] / (self.state[index + 1]*(1 + self.buy_cost_pct[index])) # when buying stocks, we should consider the cost of trading when calculating available_amount, or we may be have cash<0
-                # print('available_amount:{}'.format(available_amount))
-
-                # update balance
-                buy_num_shares = min(available_amount, action)
-                buy_amount = (
-                    self.state[index + 1] * buy_num_shares * (1 + self.buy_cost_pct[index])
-                )
-                self.state[0] -= buy_amount
-
-                self.state[index + self.stock_dim + 1] += buy_num_shares
-
-                self.cost += self.state[index + 1] * buy_num_shares * self.buy_cost_pct[index]
-                self.trades += 1
-            else:
-                buy_num_shares = 0
-
-            return buy_num_shares
-
-        # perform buy action based on the sign of the action
-        if self.turbulence_threshold is None:
-            buy_num_shares = _do_buy()
-        else:
-            if self.turbulence < self.turbulence_threshold:
-                buy_num_shares = _do_buy()
-            else:
-                buy_num_shares = 0
-                pass
-
-        return buy_num_shares
-
-    def _make_plot(self):
-        plt.plot(self.asset_memory, "r")
-        plt.savefig("results/account_value_trade_{}.png".format(self.episode))
-        plt.close()
-
-    def step(self, actions):
-        self.terminal = self.day >= len(self.df.index.unique()) - 1
-        if self.terminal:
-            # print(f"Episode: {self.episode}")
-            if self.make_plots:
-                self._make_plot()
-            end_total_asset = self.state[0] + sum(
-                np.array(self.state[1 : (self.stock_dim + 1)])
-                * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
-            )
-            df_total_value = pd.DataFrame(self.asset_memory)
-            tot_reward = (
-                self.state[0]
-                + sum(
-                    np.array(self.state[1 : (self.stock_dim + 1)])
-                    * np.array(
-                        self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
-                    )
-                )
-                - self.asset_memory[0]
-            ) # initial_amount is only cash part of our initial asset
-            df_total_value.columns = ["account_value"]
-            df_total_value["date"] = self.date_memory
-            df_total_value["daily_return"] = df_total_value["account_value"].pct_change(
-                1
-            )
-            if df_total_value["daily_return"].std() != 0:
-                sharpe = (
-                    (252 ** 0.5)
-                    * df_total_value["daily_return"].mean()
-                    / df_total_value["daily_return"].std()
-                )
-            df_rewards = pd.DataFrame(self.rewards_memory)
-            df_rewards.columns = ["account_rewards"]
-            df_rewards["date"] = self.date_memory[:-1]
-            if self.episode % self.print_verbosity == 0:
-                print(f"day: {self.day}, episode: {self.episode}")
-                print(f"begin_total_asset: {self.asset_memory[0]:0.2f}")
-                print(f"end_total_asset: {end_total_asset:0.2f}")
-                print(f"total_reward: {tot_reward:0.2f}")
-                print(f"total_cost: {self.cost:0.2f}")
-                print(f"total_trades: {self.trades}")
-                if df_total_value["daily_return"].std() != 0:
-                    print(f"Sharpe: {sharpe:0.3f}")
-                print("=================================")
-
-            if (self.model_name != "") and (self.mode != ""):
-                df_actions = self.save_action_memory()
-                df_actions.to_csv(
-                    "results/actions_{}_{}_{}.csv".format(
-                        self.mode, self.model_name, self.iteration
-                    )
-                )
-                df_total_value.to_csv(
-                    "results/account_value_{}_{}_{}.csv".format(
-                        self.mode, self.model_name, self.iteration
-                    ),
-                    index=False,
-                )
-                df_rewards.to_csv(
-                    "results/account_rewards_{}_{}_{}.csv".format(
-                        self.mode, self.model_name, self.iteration
-                    ),
-                    index=False,
-                )
-                plt.plot(self.asset_memory, "r")
-                plt.savefig(
-                    "results/account_value_{}_{}_{}.png".format(
-                        self.mode, self.model_name, self.iteration
-                    ),
-                    index=False,
-                )
-                plt.close()
-
-            # Add outputs to logger interface
-            # logger.record("environment/portfolio_value", end_total_asset)
-            # logger.record("environment/total_reward", tot_reward)
-            # logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100)
-            # logger.record("environment/total_cost", self.cost)
-            # logger.record("environment/total_trades", self.trades)
-
-            return self.state, self.reward, self.terminal, {}
-
-        else:
-            actions = actions * self.hmax  # actions initially is scaled between 0 to 1
-            actions = actions.astype(
-                int
-            )  # convert into integer because we can't by fraction of shares
-            if self.turbulence_threshold is not None:
-                if self.turbulence >= self.turbulence_threshold:
-                    actions = np.array([-self.hmax] * self.stock_dim)
-            begin_total_asset = self.state[0] + sum(
-                np.array(self.state[1 : (self.stock_dim + 1)])
-                * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
-            )
-            # print("begin_total_asset:{}".format(begin_total_asset))
-
-            argsort_actions = np.argsort(actions)
-            sell_index = argsort_actions[: np.where(actions < 0)[0].shape[0]]
-            buy_index = argsort_actions[::-1][: np.where(actions > 0)[0].shape[0]]
-
-            for index in sell_index:
-                # print(f"Num shares before: {self.state[index+self.stock_dim+1]}")
-                # print(f'take sell action before : {actions[index]}')
-                actions[index] = self._sell_stock(index, actions[index]) * (-1)
-                # print(f'take sell action after : {actions[index]}')
-                # print(f"Num shares after: {self.state[index+self.stock_dim+1]}")
-
-            for index in buy_index:
-                # print('take buy action: {}'.format(actions[index]))
-                actions[index] = self._buy_stock(index, actions[index])
-
-            self.actions_memory.append(actions)
-
-            # state: s -> s+1
-            self.day += 1
-            self.data = self.df.loc[self.day, :]
-            if self.turbulence_threshold is not None:
-                if len(self.df.tic.unique()) == 1:
-                    self.turbulence = self.data[self.risk_indicator_col]
-                elif len(self.df.tic.unique()) > 1:
-                    self.turbulence = self.data[self.risk_indicator_col].values[0]
-            self.state = self._update_state()
-
-            end_total_asset = self.state[0] + sum(
-                np.array(self.state[1 : (self.stock_dim + 1)])
-                * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
-            )
-            self.asset_memory.append(end_total_asset)
-            self.date_memory.append(self._get_date())
-            self.reward = end_total_asset - begin_total_asset
-            self.rewards_memory.append(self.reward)
-            self.reward = self.reward * self.reward_scaling
-            self.state_memory.append(self.state) # add current state in state_recorder for each step
-
-        return self.state, self.reward, self.terminal, {}
-
-    def reset(self):
-        # initiate state
-        self.state = self._initiate_state()
-
-        if self.initial:
-            self.asset_memory = [self.initial_amount+np.sum(np.array(self.initial_list[1:])*np.array(self.state[1:1+self.stock_dim]))]
-        else:
-            previous_total_asset = self.previous_state[0] + sum(
-                np.array(self.state[1 : (self.stock_dim + 1)])
-                * np.array(
-                    self.previous_state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
-                )
-            )
-            self.asset_memory = [previous_total_asset]
-
-        self.day = 0
-        self.data = self.df.loc[self.day, :]
-        self.turbulence = 0
-        self.cost = 0
-        self.trades = 0
-        self.terminal = False
-        # self.iteration=self.iteration
-        self.rewards_memory = []
-        self.actions_memory = []
-        self.date_memory = [self._get_date()]
-
-        self.episode += 1
-
-        return self.state
-
-    def render(self, mode="human", close=False):
-        return self.state
-
-    def _initiate_state(self):
-        if self.initial:
-            # For Initial State
-            if len(self.df.tic.unique()) > 1:
-                # for multiple stock
-                state = (
-                    [self.initial_amount]
-                    + self.data.close.values.tolist()
-                    + self.initial_list[1:]
-                    + sum(
-                        [
-                            self.data[tech].values.tolist()
-                            for tech in self.tech_indicator_list
-                        ],
-                        [],
-                    )
-                ) # append initial stocks_share to initial state, instead of all zero 
-            else:
-                # for single stock
-                state = (
-                    [self.initial_amount]
-                    + [self.data.close]
-                    + [0] * self.stock_dim
-                    + sum([[self.data[tech]] for tech in self.tech_indicator_list], [])
-                )
-        else:
-            # Using Previous State
-            if len(self.df.tic.unique()) > 1:
-                # for multiple stock
-                state = (
-                    [self.previous_state[0]]
-                    + self.data.close.values.tolist()
-                    + self.previous_state[
-                        (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
-                    ]
-                    + sum(
-                        [
-                            self.data[tech].values.tolist()
-                            for tech in self.tech_indicator_list
-                        ],
-                        [],
-                    )
-                )
-            else:
-                # for single stock
-                state = (
-                    [self.previous_state[0]]
-                    + [self.data.close]
-                    + self.previous_state[
-                        (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
-                    ]
-                    + sum([[self.data[tech]] for tech in self.tech_indicator_list], [])
-                )
-        return state
-
-    def _update_state(self):
-        if len(self.df.tic.unique()) > 1:
-            # for multiple stock
-            state = (
-                [self.state[0]]
-                + self.data.close.values.tolist()
-                + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
-                + sum(
-                    [
-                        self.data[tech].values.tolist()
-                        for tech in self.tech_indicator_list
-                    ],
-                    [],
-                )
-            )
-
-        else:
-            # for single stock
-            state = (
-                [self.state[0]]
-                + [self.data.close]
-                + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
-                + sum([[self.data[tech]] for tech in self.tech_indicator_list], [])
-            )
-
-        return state
-
-    def _get_date(self):
-        if len(self.df.tic.unique()) > 1:
-            date = self.data.date.unique()[0]
-        else:
-            date = self.data.date
-        return date
-
-    # add save_state_memory to preserve state in the trading process 
-    def save_state_memory(self):
-        if len(self.df.tic.unique()) > 1:
-            # date and close price length must match actions length
-            date_list = self.date_memory[:-1]
-            df_date = pd.DataFrame(date_list)
-            df_date.columns = ["date"]
-
-            state_list = self.state_memory
-            df_states = pd.DataFrame(state_list,columns=['cash','Bitcoin_price','Gold_price','Bitcoin_num','Gold_num','Bitcoin_Disable','Gold_Disable'])
-            df_states.index = df_date.date
-            # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
-        else:
-            date_list = self.date_memory[:-1]
-            state_list = self.state_memory
-            df_states = pd.DataFrame({"date": date_list, "states": state_list})
-        # print(df_states)
-        return df_states
-
-    def save_asset_memory(self):
-        date_list = self.date_memory
-        asset_list = self.asset_memory
-        # print(len(date_list))
-        # print(len(asset_list))
-        df_account_value = pd.DataFrame(
-            {"date": date_list, "account_value": asset_list}
-        )
-        return df_account_value
-
-    def save_action_memory(self):
-        if len(self.df.tic.unique()) > 1:
-            # date and close price length must match actions length
-            date_list = self.date_memory[:-1]
-            df_date = pd.DataFrame(date_list)
-            df_date.columns = ["date"]
-
-            action_list = self.actions_memory
-            df_actions = pd.DataFrame(action_list)
-            df_actions.columns = self.data.tic.values
-            df_actions.index = df_date.date
-            # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
-        else:
-            date_list = self.date_memory[:-1]
-            action_list = self.actions_memory
-            df_actions = pd.DataFrame({"date": date_list, "actions": action_list})
-        return df_actions
-
-    def _seed(self, seed=None):
-        self.np_random, seed = seeding.np_random(seed)
-        return [seed]
-
-    def get_sb_env(self):
-        e = DummyVecEnv([lambda: self])
-        obs = e.reset()
-        return e, obs
+import gym
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from gym import spaces
+from gym.utils import seeding
+from stable_baselines3.common.vec_env import DummyVecEnv
+
+matplotlib.use("Agg")
+
+# from stable_baselines3.common.logger import Logger, KVWriter, CSVOutputFormat
+
+
+class StockTradingEnv(gym.Env):
+    """A stock trading environment for OpenAI gym"""
+
+    metadata = {"render.modes": ["human"]}
+
+    def __init__(
+        self,
+        df,
+        stock_dim,
+        hmax,
+        initial_amount,
+        buy_cost_pct,
+        sell_cost_pct,
+        reward_scaling,
+        state_space,
+        action_space,
+        tech_indicator_list,
+        turbulence_threshold=None,
+        risk_indicator_col="turbulence",
+        make_plots=False,
+        print_verbosity=10,
+        day=0,
+        initial=True,
+        previous_state=[],
+        model_name="",
+        mode="",
+        iteration="",
+    ):
+        self.day = day
+        self.df = df
+        self.stock_dim = stock_dim
+        self.hmax = hmax
+        self.initial_amount = initial_amount
+        self.buy_cost_pct = buy_cost_pct
+        self.sell_cost_pct = sell_cost_pct
+        self.reward_scaling = reward_scaling
+        self.state_space = state_space
+        self.action_space = action_space
+        self.tech_indicator_list = tech_indicator_list
+        self.action_space = spaces.Box(low=-1, high=1, shape=(self.action_space,))
+        self.observation_space = spaces.Box(
+            low=-np.inf, high=np.inf, shape=(self.state_space,)
+        )
+        self.data = self.df.loc[self.day, :]
+        self.terminal = False
+        self.make_plots = make_plots
+        self.print_verbosity = print_verbosity
+        self.turbulence_threshold = turbulence_threshold
+        self.risk_indicator_col = risk_indicator_col
+        self.initial = initial
+        self.previous_state = previous_state
+        self.model_name = model_name
+        self.mode = mode
+        self.iteration = iteration
+        # initalize state
+        self.state = self._initiate_state()
+
+        # initialize reward
+        self.reward = 0
+        self.turbulence = 0
+        self.cost = 0
+        self.trades = 0
+        self.episode = 0
+        # memorize all the total balance change
+        self.asset_memory = [self.initial_amount]
+        self.rewards_memory = []
+        self.actions_memory = []
+        self.date_memory = [self._get_date()]
+#         self.logger = Logger('results',[CSVOutputFormat])
+        # self.reset()
+        self._seed()
+
+    def _sell_stock(self, index, action):
+        def _do_sell_normal():
+            if self.state[index + 1] > 0:
+                # Sell only if the price is > 0 (no missing data in this particular date)
+                # perform sell action based on the sign of the action
+                if self.state[index + self.stock_dim + 1] > 0:
+                    # Sell only if current asset is > 0
+                    sell_num_shares = min(
+                        abs(action), self.state[index + self.stock_dim + 1]
+                    )
+                    sell_amount = (
+                        self.state[index + 1]
+                        * sell_num_shares
+                        * (1 - self.sell_cost_pct)
+                    )
+                    # update balance
+                    self.state[0] += sell_amount
+
+                    self.state[index + self.stock_dim + 1] -= sell_num_shares
+                    self.cost += (
+                        self.state[index + 1] * sell_num_shares * self.sell_cost_pct
+                    )
+                    self.trades += 1
+                else:
+                    sell_num_shares = 0
+            else:
+                sell_num_shares = 0
+
+            return sell_num_shares
+
+        # perform sell action based on the sign of the action
+        if self.turbulence_threshold is not None:
+            if self.turbulence >= self.turbulence_threshold:
+                if self.state[index + 1] > 0:
+                    # Sell only if the price is > 0 (no missing data in this particular date)
+                    # if turbulence goes over threshold, just clear out all positions
+                    if self.state[index + self.stock_dim + 1] > 0:
+                        # Sell only if current asset is > 0
+                        sell_num_shares = self.state[index + self.stock_dim + 1]
+                        sell_amount = (
+                            self.state[index + 1]
+                            * sell_num_shares
+                            * (1 - self.sell_cost_pct)
+                        )
+                        # update balance
+                        self.state[0] += sell_amount
+                        self.state[index + self.stock_dim + 1] = 0
+                        self.cost += (
+                            self.state[index + 1] * sell_num_shares * self.sell_cost_pct
+                        )
+                        self.trades += 1
+                    else:
+                        sell_num_shares = 0
+                else:
+                    sell_num_shares = 0
+            else:
+                sell_num_shares = _do_sell_normal()
+        else:
+            sell_num_shares = _do_sell_normal()
+
+        return sell_num_shares
+
+    def _buy_stock(self, index, action):
+        def _do_buy():
+            if self.state[index + 1] > 0:
+                # Buy only if the price is > 0 (no missing data in this particular date)
+                available_amount = self.state[0] // self.state[index + 1]
+                # print('available_amount:{}'.format(available_amount))
+
+                # update balance
+                buy_num_shares = min(available_amount, action)
+                buy_amount = (
+                    self.state[index + 1] * buy_num_shares * (1 + self.buy_cost_pct)
+                )
+                self.state[0] -= buy_amount
+
+                self.state[index + self.stock_dim + 1] += buy_num_shares
+
+                self.cost += self.state[index + 1] * buy_num_shares * self.buy_cost_pct
+                self.trades += 1
+            else:
+                buy_num_shares = 0
+
+            return buy_num_shares
+
+        # perform buy action based on the sign of the action
+        if self.turbulence_threshold is None:
+            buy_num_shares = _do_buy()
+        else:
+            if self.turbulence < self.turbulence_threshold:
+                buy_num_shares = _do_buy()
+            else:
+                buy_num_shares = 0
+                pass
+
+        return buy_num_shares
+
+    def _make_plot(self):
+        plt.plot(self.asset_memory, "r")
+        plt.savefig("results/account_value_trade_{}.png".format(self.episode))
+        plt.close()
+
+    def step(self, actions):
+        self.terminal = self.day >= len(self.df.index.unique()) - 1
+        if self.terminal:
+            # print(f"Episode: {self.episode}")
+            if self.make_plots:
+                self._make_plot()
+            end_total_asset = self.state[0] + sum(
+                np.array(self.state[1 : (self.stock_dim + 1)])
+                * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
+            )
+            df_total_value = pd.DataFrame(self.asset_memory)
+            tot_reward = (
+                self.state[0]
+                + sum(
+                    np.array(self.state[1 : (self.stock_dim + 1)])
+                    * np.array(
+                        self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
+                    )
+                )
+                - self.initial_amount
+            )
+            df_total_value.columns = ["account_value"]
+            df_total_value["date"] = self.date_memory
+            df_total_value["daily_return"] = df_total_value["account_value"].pct_change(
+                1
+            )
+            if df_total_value["daily_return"].std() != 0:
+                sharpe = (
+                    (252 ** 0.5)
+                    * df_total_value["daily_return"].mean()
+                    / df_total_value["daily_return"].std()
+                )
+            df_rewards = pd.DataFrame(self.rewards_memory)
+            df_rewards.columns = ["account_rewards"]
+            df_rewards["date"] = self.date_memory[:-1]
+            if self.episode % self.print_verbosity == 0:
+                print(f"day: {self.day}, episode: {self.episode}")
+                print(f"begin_total_asset: {self.asset_memory[0]:0.2f}")
+                print(f"end_total_asset: {end_total_asset:0.2f}")
+                print(f"total_reward: {tot_reward:0.2f}")
+                print(f"total_cost: {self.cost:0.2f}")
+                print(f"total_trades: {self.trades}")
+                if df_total_value["daily_return"].std() != 0:
+                    print(f"Sharpe: {sharpe:0.3f}")
+                print("=================================")
+
+            if (self.model_name != "") and (self.mode != ""):
+                df_actions = self.save_action_memory()
+                df_actions.to_csv(
+                    "results/actions_{}_{}_{}.csv".format(
+                        self.mode, self.model_name, self.iteration
+                    )
+                )
+                df_total_value.to_csv(
+                    "results/account_value_{}_{}_{}.csv".format(
+                        self.mode, self.model_name, self.iteration
+                    ),
+                    index=False,
+                )
+                df_rewards.to_csv(
+                    "results/account_rewards_{}_{}_{}.csv".format(
+                        self.mode, self.model_name, self.iteration
+                    ),
+                    index=False,
+                )
+                plt.plot(self.asset_memory, "r")
+                plt.savefig(
+                    "results/account_value_{}_{}_{}.png".format(
+                        self.mode, self.model_name, self.iteration
+                    ),
+                    index=False,
+                )
+                plt.close()
+
+            # Add outputs to logger interface
+            # self.logger.record("environment/portfolio_value", end_total_asset)
+#             self.logger.record("environment/total_reward", tot_reward)
+#             self.logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100)
+#             self.logger.record("environment/total_cost", self.cost)
+#             self.logger.record("environment/total_trades", self.trades)
+
+            return self.state, self.reward, self.terminal, {}
+
+        else:
+
+            actions = actions * self.hmax  # actions initially is scaled between 0 to 1
+            actions = actions.astype(
+                int
+            )  # convert into integer because we can't by fraction of shares
+            if self.turbulence_threshold is not None:
+                if self.turbulence >= self.turbulence_threshold:
+                    actions = np.array([-self.hmax] * self.stock_dim)
+            begin_total_asset = self.state[0] + sum(
+                np.array(self.state[1 : (self.stock_dim + 1)])
+                * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
+            )
+            # print("begin_total_asset:{}".format(begin_total_asset))
+
+            argsort_actions = np.argsort(actions)
+
+            sell_index = argsort_actions[: np.where(actions < 0)[0].shape[0]]
+            buy_index = argsort_actions[::-1][: np.where(actions > 0)[0].shape[0]]
+
+            for index in sell_index:
+                # print(f"Num shares before: {self.state[index+self.stock_dim+1]}")
+                # print(f'take sell action before : {actions[index]}')
+                actions[index] = self._sell_stock(index, actions[index]) * (-1)
+                # print(f'take sell action after : {actions[index]}')
+                # print(f"Num shares after: {self.state[index+self.stock_dim+1]}")
+
+            for index in buy_index:
+                # print('take buy action: {}'.format(actions[index]))
+                actions[index] = self._buy_stock(index, actions[index])
+
+            self.actions_memory.append(actions)
+
+            # state: s -> s+1
+            self.day += 1
+            self.data = self.df.loc[self.day, :]
+            if self.turbulence_threshold is not None:
+                if len(self.df.tic.unique()) == 1:
+                    self.turbulence = self.data[self.risk_indicator_col]
+                elif len(self.df.tic.unique()) > 1:
+                    self.turbulence = self.data[self.risk_indicator_col].values[0]
+            self.state = self._update_state()
+
+            end_total_asset = self.state[0] + sum(
+                np.array(self.state[1 : (self.stock_dim + 1)])
+                * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
+            )
+            self.asset_memory.append(end_total_asset)
+            self.date_memory.append(self._get_date())
+            self.reward = end_total_asset - begin_total_asset
+            self.rewards_memory.append(self.reward)
+            self.reward = self.reward * self.reward_scaling
+
+        return self.state, self.reward, self.terminal, {}
+
+    def reset(self):
+        # initiate state
+        self.state = self._initiate_state()
+
+        if self.initial:
+            self.asset_memory = [self.initial_amount]
+        else:
+            previous_total_asset = self.previous_state[0] + sum(
+                np.array(self.state[1 : (self.stock_dim + 1)])
+                * np.array(
+                    self.previous_state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]
+                )
+            )
+            self.asset_memory = [previous_total_asset]
+
+        self.day = 0
+        self.data = self.df.loc[self.day, :]
+        self.turbulence = 0
+        self.cost = 0
+        self.trades = 0
+        self.terminal = False
+        # self.iteration=self.iteration
+        self.rewards_memory = []
+        self.actions_memory = []
+        self.date_memory = [self._get_date()]
+
+        self.episode += 1
+
+        return self.state
+
+    def render(self, mode="human", close=False):
+        return self.state
+
+    def _initiate_state(self):
+        if self.initial:
+            # For Initial State
+            if len(self.df.tic.unique()) > 1:
+                # for multiple stock
+                state = (
+                    [self.initial_amount]
+                    + self.data.close.values.tolist()
+                    + [0] * self.stock_dim
+                    + sum(
+                        [
+                            self.data[tech].values.tolist()
+                            for tech in self.tech_indicator_list
+                        ],
+                        [],
+                    )
+                )
+            else:
+                # for single stock
+                state = (
+                    [self.initial_amount]
+                    + [self.data.close]
+                    + [0] * self.stock_dim
+                    + sum([[self.data[tech]] for tech in self.tech_indicator_list], [])
+                )
+        else:
+            # Using Previous State
+            if len(self.df.tic.unique()) > 1:
+                # for multiple stock
+                state = (
+                    [self.previous_state[0]]
+                    + self.data.close.values.tolist()
+                    + self.previous_state[
+                        (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
+                    ]
+                    + sum(
+                        [
+                            self.data[tech].values.tolist()
+                            for tech in self.tech_indicator_list
+                        ],
+                        [],
+                    )
+                )
+            else:
+                # for single stock
+                state = (
+                    [self.previous_state[0]]
+                    + [self.data.close]
+                    + self.previous_state[
+                        (self.stock_dim + 1) : (self.stock_dim * 2 + 1)
+                    ]
+                    + sum([[self.data[tech]] for tech in self.tech_indicator_list], [])
+                )
+        return state
+
+    def _update_state(self):
+        if len(self.df.tic.unique()) > 1:
+            # for multiple stock
+            state = (
+                [self.state[0]]
+                + self.data.close.values.tolist()
+                + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
+                + sum(
+                    [
+                        self.data[tech].values.tolist()
+                        for tech in self.tech_indicator_list
+                    ],
+                    [],
+                )
+            )
+
+        else:
+            # for single stock
+            state = (
+                [self.state[0]]
+                + [self.data.close]
+                + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)])
+                + sum([[self.data[tech]] for tech in self.tech_indicator_list], [])
+            )
+
+        return state
+
+    def _get_date(self):
+        if len(self.df.tic.unique()) > 1:
+            date = self.data.date.unique()[0]
+        else:
+            date = self.data.date
+        return date
+
+    def save_asset_memory(self):
+        date_list = self.date_memory
+        asset_list = self.asset_memory
+        # print(len(date_list))
+        # print(len(asset_list))
+        df_account_value = pd.DataFrame(
+            {"date": date_list, "account_value": asset_list}
+        )
+        return df_account_value
+
+    def save_action_memory(self):
+        if len(self.df.tic.unique()) > 1:
+            # date and close price length must match actions length
+            date_list = self.date_memory[:-1]
+            df_date = pd.DataFrame(date_list)
+            df_date.columns = ["date"]
+
+            action_list = self.actions_memory
+            df_actions = pd.DataFrame(action_list)
+            df_actions.columns = self.data.tic.values
+            df_actions.index = df_date.date
+            # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
+        else:
+            date_list = self.date_memory[:-1]
+            action_list = self.actions_memory
+            df_actions = pd.DataFrame({"date": date_list, "actions": action_list})
+        return df_actions
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def get_sb_env(self):
+        e = DummyVecEnv([lambda: self])
+        obs = e.reset()
+        return e, obs