From ca78a708c69cbf2920d4f4d2a5ecaeea9e9098d5 Mon Sep 17 00:00:00 2001 From: Xiao-Yang Liu Date: Thu, 3 Mar 2022 13:37:40 -0500 Subject: [PATCH] Revert "Shenlei151 flexibility enhance" --- RL_stock.py | 133 --- finrl/drl_agents/stablebaselines3/models.py | 2 - .../env_stock_trading/env_stocktrading.py | 991 +++++++++--------- 3 files changed, 484 insertions(+), 642 deletions(-) delete mode 100644 RL_stock.py diff --git a/RL_stock.py b/RL_stock.py deleted file mode 100644 index c7311b622..000000000 --- a/RL_stock.py +++ /dev/null @@ -1,133 +0,0 @@ -# import packages - -from finrl.apps import config -import pandas as pd -import numpy as np -import matplotlib -import matplotlib.pyplot as plt -# matplotlib.use('Agg') -import datetime - -from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader -from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split -from finrl.finrl_meta.env_stock_trading.env_stocktrading import StockTradingEnv -from finrl.drl_agents.stablebaselines3.models import DRLAgent -from finrl.finrl_meta.data_processor import DataProcessor - -from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline -from pprint import pprint - -import sys -sys.path.append("../FinRL-Library") - -import itertools - -import os -if not os.path.exists("./" + config.DATA_SAVE_DIR): - os.makedirs("./" + config.DATA_SAVE_DIR) -if not os.path.exists("./" + config.TRAINED_MODEL_DIR): - os.makedirs("./" + config.TRAINED_MODEL_DIR) -if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR): - os.makedirs("./" + config.TENSORBOARD_LOG_DIR) - - -# read data -df=pd.read_csv("new.csv").iloc[:,1:] - -# data preprocess -# print(df) -# df.loc[0,'USD (PM)']=-1 -# df=df.fillna(-1) -list_ticker=['Gold','Bitcoin'] -list_date=list(pd.date_range(df['date'].min(),df['date'].max()).astype(str)) -combination=list(itertools.product(list_date,list_ticker)) - -df.columns=['date','Bitcoin','Gold'] -df=df.sort_values(['date']) -processed=df.melt(id_vars=['date'],value_vars=['Gold','Bitcoin'],var_name='tic',value_name='close') -# print(df.info()) -# print(processed.info()) -# print(processed) -processed['Disable']=processed['close'].apply(pd.isna) -processed[processed['tic']=='Gold']=processed[processed['tic']=='Gold'].fillna(method='pad') -processed.loc[0,'close']=1324 -processed_full=processed.sort_values(['date','tic'],ignore_index=True) -# processed_full=processed -# print(processed_full) -# print(processed_full.isna().any()) -# processed_full.close=processed.close.astype('object') -# print(processed_full.info()) -time=datetime.date(2016,9,11) - -# initial = [cash, initial_stock1_share, initial_stock2_share] -initial=[1000,0,0] - -# initial dataframe used to store the result of model -all_action=pd.DataFrame(columns=['date','Bitcoin','Gold']) -all_value=pd.DataFrame(columns=['date','account_value']) -all_state=pd.DataFrame(columns=['cash','Bitcoin_price','Gold_price','Bitcoin_num','Gold_num','Bitcoin_Disable','Gold_Disable']) - -# trainning & trading process -while(time+datetime.timedelta(days=30) 0: # if we use price<0 to denote a stock is unable to trade in that day, the total asset calculation may be wrong for the price is unreasonable - # Sell only if the price is > 0 (no missing data in this particular date) - # perform sell action based on the sign of the action - if self.state[index + self.stock_dim + 1] > 0: - # Sell only if current asset is > 0 - sell_num_shares = min( - abs(action), self.state[index + self.stock_dim + 1] - ) - sell_amount = ( - self.state[index + 1] - * sell_num_shares - * (1 - self.sell_cost_pct[index]) - ) - # update balance - self.state[0] += sell_amount - - self.state[index + self.stock_dim + 1] -= sell_num_shares - self.cost += ( - self.state[index + 1] * sell_num_shares * self.sell_cost_pct[index] - ) - self.trades += 1 - else: - sell_num_shares = 0 - else: - sell_num_shares = 0 - - return sell_num_shares - - # perform sell action based on the sign of the action - if self.turbulence_threshold is not None: - if self.turbulence >= self.turbulence_threshold: - if self.state[index + 1] > 0: - # Sell only if the price is > 0 (no missing data in this particular date) - # if turbulence goes over threshold, just clear out all positions - if self.state[index + self.stock_dim + 1] > 0: - # Sell only if current asset is > 0 - sell_num_shares = self.state[index + self.stock_dim + 1] - sell_amount = ( - self.state[index + 1] - * sell_num_shares - * (1 - self.sell_cost_pct[index]) - ) - # update balance - self.state[0] += sell_amount - self.state[index + self.stock_dim + 1] = 0 - self.cost += ( - self.state[index + 1] * sell_num_shares * self.sell_cost_pct - ) - self.trades += 1 - else: - sell_num_shares = 0 - else: - sell_num_shares = 0 - else: - sell_num_shares = _do_sell_normal() - else: - sell_num_shares = _do_sell_normal() - - return sell_num_shares - - def _buy_stock(self, index, action): - def _do_buy(): - if self.state[index + 2*self.stock_dim+ 1] !=True: # check if the stock is able to buy - # if self.state[index + 1] >0: - # Buy only if the price is > 0 (no missing data in this particular date) - available_amount = self.state[0] / (self.state[index + 1]*(1 + self.buy_cost_pct[index])) # when buying stocks, we should consider the cost of trading when calculating available_amount, or we may be have cash<0 - # print('available_amount:{}'.format(available_amount)) - - # update balance - buy_num_shares = min(available_amount, action) - buy_amount = ( - self.state[index + 1] * buy_num_shares * (1 + self.buy_cost_pct[index]) - ) - self.state[0] -= buy_amount - - self.state[index + self.stock_dim + 1] += buy_num_shares - - self.cost += self.state[index + 1] * buy_num_shares * self.buy_cost_pct[index] - self.trades += 1 - else: - buy_num_shares = 0 - - return buy_num_shares - - # perform buy action based on the sign of the action - if self.turbulence_threshold is None: - buy_num_shares = _do_buy() - else: - if self.turbulence < self.turbulence_threshold: - buy_num_shares = _do_buy() - else: - buy_num_shares = 0 - pass - - return buy_num_shares - - def _make_plot(self): - plt.plot(self.asset_memory, "r") - plt.savefig("results/account_value_trade_{}.png".format(self.episode)) - plt.close() - - def step(self, actions): - self.terminal = self.day >= len(self.df.index.unique()) - 1 - if self.terminal: - # print(f"Episode: {self.episode}") - if self.make_plots: - self._make_plot() - end_total_asset = self.state[0] + sum( - np.array(self.state[1 : (self.stock_dim + 1)]) - * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) - ) - df_total_value = pd.DataFrame(self.asset_memory) - tot_reward = ( - self.state[0] - + sum( - np.array(self.state[1 : (self.stock_dim + 1)]) - * np.array( - self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)] - ) - ) - - self.asset_memory[0] - ) # initial_amount is only cash part of our initial asset - df_total_value.columns = ["account_value"] - df_total_value["date"] = self.date_memory - df_total_value["daily_return"] = df_total_value["account_value"].pct_change( - 1 - ) - if df_total_value["daily_return"].std() != 0: - sharpe = ( - (252 ** 0.5) - * df_total_value["daily_return"].mean() - / df_total_value["daily_return"].std() - ) - df_rewards = pd.DataFrame(self.rewards_memory) - df_rewards.columns = ["account_rewards"] - df_rewards["date"] = self.date_memory[:-1] - if self.episode % self.print_verbosity == 0: - print(f"day: {self.day}, episode: {self.episode}") - print(f"begin_total_asset: {self.asset_memory[0]:0.2f}") - print(f"end_total_asset: {end_total_asset:0.2f}") - print(f"total_reward: {tot_reward:0.2f}") - print(f"total_cost: {self.cost:0.2f}") - print(f"total_trades: {self.trades}") - if df_total_value["daily_return"].std() != 0: - print(f"Sharpe: {sharpe:0.3f}") - print("=================================") - - if (self.model_name != "") and (self.mode != ""): - df_actions = self.save_action_memory() - df_actions.to_csv( - "results/actions_{}_{}_{}.csv".format( - self.mode, self.model_name, self.iteration - ) - ) - df_total_value.to_csv( - "results/account_value_{}_{}_{}.csv".format( - self.mode, self.model_name, self.iteration - ), - index=False, - ) - df_rewards.to_csv( - "results/account_rewards_{}_{}_{}.csv".format( - self.mode, self.model_name, self.iteration - ), - index=False, - ) - plt.plot(self.asset_memory, "r") - plt.savefig( - "results/account_value_{}_{}_{}.png".format( - self.mode, self.model_name, self.iteration - ), - index=False, - ) - plt.close() - - # Add outputs to logger interface - # logger.record("environment/portfolio_value", end_total_asset) - # logger.record("environment/total_reward", tot_reward) - # logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100) - # logger.record("environment/total_cost", self.cost) - # logger.record("environment/total_trades", self.trades) - - return self.state, self.reward, self.terminal, {} - - else: - actions = actions * self.hmax # actions initially is scaled between 0 to 1 - actions = actions.astype( - int - ) # convert into integer because we can't by fraction of shares - if self.turbulence_threshold is not None: - if self.turbulence >= self.turbulence_threshold: - actions = np.array([-self.hmax] * self.stock_dim) - begin_total_asset = self.state[0] + sum( - np.array(self.state[1 : (self.stock_dim + 1)]) - * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) - ) - # print("begin_total_asset:{}".format(begin_total_asset)) - - argsort_actions = np.argsort(actions) - sell_index = argsort_actions[: np.where(actions < 0)[0].shape[0]] - buy_index = argsort_actions[::-1][: np.where(actions > 0)[0].shape[0]] - - for index in sell_index: - # print(f"Num shares before: {self.state[index+self.stock_dim+1]}") - # print(f'take sell action before : {actions[index]}') - actions[index] = self._sell_stock(index, actions[index]) * (-1) - # print(f'take sell action after : {actions[index]}') - # print(f"Num shares after: {self.state[index+self.stock_dim+1]}") - - for index in buy_index: - # print('take buy action: {}'.format(actions[index])) - actions[index] = self._buy_stock(index, actions[index]) - - self.actions_memory.append(actions) - - # state: s -> s+1 - self.day += 1 - self.data = self.df.loc[self.day, :] - if self.turbulence_threshold is not None: - if len(self.df.tic.unique()) == 1: - self.turbulence = self.data[self.risk_indicator_col] - elif len(self.df.tic.unique()) > 1: - self.turbulence = self.data[self.risk_indicator_col].values[0] - self.state = self._update_state() - - end_total_asset = self.state[0] + sum( - np.array(self.state[1 : (self.stock_dim + 1)]) - * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) - ) - self.asset_memory.append(end_total_asset) - self.date_memory.append(self._get_date()) - self.reward = end_total_asset - begin_total_asset - self.rewards_memory.append(self.reward) - self.reward = self.reward * self.reward_scaling - self.state_memory.append(self.state) # add current state in state_recorder for each step - - return self.state, self.reward, self.terminal, {} - - def reset(self): - # initiate state - self.state = self._initiate_state() - - if self.initial: - self.asset_memory = [self.initial_amount+np.sum(np.array(self.initial_list[1:])*np.array(self.state[1:1+self.stock_dim]))] - else: - previous_total_asset = self.previous_state[0] + sum( - np.array(self.state[1 : (self.stock_dim + 1)]) - * np.array( - self.previous_state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)] - ) - ) - self.asset_memory = [previous_total_asset] - - self.day = 0 - self.data = self.df.loc[self.day, :] - self.turbulence = 0 - self.cost = 0 - self.trades = 0 - self.terminal = False - # self.iteration=self.iteration - self.rewards_memory = [] - self.actions_memory = [] - self.date_memory = [self._get_date()] - - self.episode += 1 - - return self.state - - def render(self, mode="human", close=False): - return self.state - - def _initiate_state(self): - if self.initial: - # For Initial State - if len(self.df.tic.unique()) > 1: - # for multiple stock - state = ( - [self.initial_amount] - + self.data.close.values.tolist() - + self.initial_list[1:] - + sum( - [ - self.data[tech].values.tolist() - for tech in self.tech_indicator_list - ], - [], - ) - ) # append initial stocks_share to initial state, instead of all zero - else: - # for single stock - state = ( - [self.initial_amount] - + [self.data.close] - + [0] * self.stock_dim - + sum([[self.data[tech]] for tech in self.tech_indicator_list], []) - ) - else: - # Using Previous State - if len(self.df.tic.unique()) > 1: - # for multiple stock - state = ( - [self.previous_state[0]] - + self.data.close.values.tolist() - + self.previous_state[ - (self.stock_dim + 1) : (self.stock_dim * 2 + 1) - ] - + sum( - [ - self.data[tech].values.tolist() - for tech in self.tech_indicator_list - ], - [], - ) - ) - else: - # for single stock - state = ( - [self.previous_state[0]] - + [self.data.close] - + self.previous_state[ - (self.stock_dim + 1) : (self.stock_dim * 2 + 1) - ] - + sum([[self.data[tech]] for tech in self.tech_indicator_list], []) - ) - return state - - def _update_state(self): - if len(self.df.tic.unique()) > 1: - # for multiple stock - state = ( - [self.state[0]] - + self.data.close.values.tolist() - + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) - + sum( - [ - self.data[tech].values.tolist() - for tech in self.tech_indicator_list - ], - [], - ) - ) - - else: - # for single stock - state = ( - [self.state[0]] - + [self.data.close] - + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) - + sum([[self.data[tech]] for tech in self.tech_indicator_list], []) - ) - - return state - - def _get_date(self): - if len(self.df.tic.unique()) > 1: - date = self.data.date.unique()[0] - else: - date = self.data.date - return date - - # add save_state_memory to preserve state in the trading process - def save_state_memory(self): - if len(self.df.tic.unique()) > 1: - # date and close price length must match actions length - date_list = self.date_memory[:-1] - df_date = pd.DataFrame(date_list) - df_date.columns = ["date"] - - state_list = self.state_memory - df_states = pd.DataFrame(state_list,columns=['cash','Bitcoin_price','Gold_price','Bitcoin_num','Gold_num','Bitcoin_Disable','Gold_Disable']) - df_states.index = df_date.date - # df_actions = pd.DataFrame({'date':date_list,'actions':action_list}) - else: - date_list = self.date_memory[:-1] - state_list = self.state_memory - df_states = pd.DataFrame({"date": date_list, "states": state_list}) - # print(df_states) - return df_states - - def save_asset_memory(self): - date_list = self.date_memory - asset_list = self.asset_memory - # print(len(date_list)) - # print(len(asset_list)) - df_account_value = pd.DataFrame( - {"date": date_list, "account_value": asset_list} - ) - return df_account_value - - def save_action_memory(self): - if len(self.df.tic.unique()) > 1: - # date and close price length must match actions length - date_list = self.date_memory[:-1] - df_date = pd.DataFrame(date_list) - df_date.columns = ["date"] - - action_list = self.actions_memory - df_actions = pd.DataFrame(action_list) - df_actions.columns = self.data.tic.values - df_actions.index = df_date.date - # df_actions = pd.DataFrame({'date':date_list,'actions':action_list}) - else: - date_list = self.date_memory[:-1] - action_list = self.actions_memory - df_actions = pd.DataFrame({"date": date_list, "actions": action_list}) - return df_actions - - def _seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def get_sb_env(self): - e = DummyVecEnv([lambda: self]) - obs = e.reset() - return e, obs +import gym +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from gym import spaces +from gym.utils import seeding +from stable_baselines3.common.vec_env import DummyVecEnv + +matplotlib.use("Agg") + +# from stable_baselines3.common.logger import Logger, KVWriter, CSVOutputFormat + + +class StockTradingEnv(gym.Env): + """A stock trading environment for OpenAI gym""" + + metadata = {"render.modes": ["human"]} + + def __init__( + self, + df, + stock_dim, + hmax, + initial_amount, + buy_cost_pct, + sell_cost_pct, + reward_scaling, + state_space, + action_space, + tech_indicator_list, + turbulence_threshold=None, + risk_indicator_col="turbulence", + make_plots=False, + print_verbosity=10, + day=0, + initial=True, + previous_state=[], + model_name="", + mode="", + iteration="", + ): + self.day = day + self.df = df + self.stock_dim = stock_dim + self.hmax = hmax + self.initial_amount = initial_amount + self.buy_cost_pct = buy_cost_pct + self.sell_cost_pct = sell_cost_pct + self.reward_scaling = reward_scaling + self.state_space = state_space + self.action_space = action_space + self.tech_indicator_list = tech_indicator_list + self.action_space = spaces.Box(low=-1, high=1, shape=(self.action_space,)) + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(self.state_space,) + ) + self.data = self.df.loc[self.day, :] + self.terminal = False + self.make_plots = make_plots + self.print_verbosity = print_verbosity + self.turbulence_threshold = turbulence_threshold + self.risk_indicator_col = risk_indicator_col + self.initial = initial + self.previous_state = previous_state + self.model_name = model_name + self.mode = mode + self.iteration = iteration + # initalize state + self.state = self._initiate_state() + + # initialize reward + self.reward = 0 + self.turbulence = 0 + self.cost = 0 + self.trades = 0 + self.episode = 0 + # memorize all the total balance change + self.asset_memory = [self.initial_amount] + self.rewards_memory = [] + self.actions_memory = [] + self.date_memory = [self._get_date()] +# self.logger = Logger('results',[CSVOutputFormat]) + # self.reset() + self._seed() + + def _sell_stock(self, index, action): + def _do_sell_normal(): + if self.state[index + 1] > 0: + # Sell only if the price is > 0 (no missing data in this particular date) + # perform sell action based on the sign of the action + if self.state[index + self.stock_dim + 1] > 0: + # Sell only if current asset is > 0 + sell_num_shares = min( + abs(action), self.state[index + self.stock_dim + 1] + ) + sell_amount = ( + self.state[index + 1] + * sell_num_shares + * (1 - self.sell_cost_pct) + ) + # update balance + self.state[0] += sell_amount + + self.state[index + self.stock_dim + 1] -= sell_num_shares + self.cost += ( + self.state[index + 1] * sell_num_shares * self.sell_cost_pct + ) + self.trades += 1 + else: + sell_num_shares = 0 + else: + sell_num_shares = 0 + + return sell_num_shares + + # perform sell action based on the sign of the action + if self.turbulence_threshold is not None: + if self.turbulence >= self.turbulence_threshold: + if self.state[index + 1] > 0: + # Sell only if the price is > 0 (no missing data in this particular date) + # if turbulence goes over threshold, just clear out all positions + if self.state[index + self.stock_dim + 1] > 0: + # Sell only if current asset is > 0 + sell_num_shares = self.state[index + self.stock_dim + 1] + sell_amount = ( + self.state[index + 1] + * sell_num_shares + * (1 - self.sell_cost_pct) + ) + # update balance + self.state[0] += sell_amount + self.state[index + self.stock_dim + 1] = 0 + self.cost += ( + self.state[index + 1] * sell_num_shares * self.sell_cost_pct + ) + self.trades += 1 + else: + sell_num_shares = 0 + else: + sell_num_shares = 0 + else: + sell_num_shares = _do_sell_normal() + else: + sell_num_shares = _do_sell_normal() + + return sell_num_shares + + def _buy_stock(self, index, action): + def _do_buy(): + if self.state[index + 1] > 0: + # Buy only if the price is > 0 (no missing data in this particular date) + available_amount = self.state[0] // self.state[index + 1] + # print('available_amount:{}'.format(available_amount)) + + # update balance + buy_num_shares = min(available_amount, action) + buy_amount = ( + self.state[index + 1] * buy_num_shares * (1 + self.buy_cost_pct) + ) + self.state[0] -= buy_amount + + self.state[index + self.stock_dim + 1] += buy_num_shares + + self.cost += self.state[index + 1] * buy_num_shares * self.buy_cost_pct + self.trades += 1 + else: + buy_num_shares = 0 + + return buy_num_shares + + # perform buy action based on the sign of the action + if self.turbulence_threshold is None: + buy_num_shares = _do_buy() + else: + if self.turbulence < self.turbulence_threshold: + buy_num_shares = _do_buy() + else: + buy_num_shares = 0 + pass + + return buy_num_shares + + def _make_plot(self): + plt.plot(self.asset_memory, "r") + plt.savefig("results/account_value_trade_{}.png".format(self.episode)) + plt.close() + + def step(self, actions): + self.terminal = self.day >= len(self.df.index.unique()) - 1 + if self.terminal: + # print(f"Episode: {self.episode}") + if self.make_plots: + self._make_plot() + end_total_asset = self.state[0] + sum( + np.array(self.state[1 : (self.stock_dim + 1)]) + * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) + ) + df_total_value = pd.DataFrame(self.asset_memory) + tot_reward = ( + self.state[0] + + sum( + np.array(self.state[1 : (self.stock_dim + 1)]) + * np.array( + self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)] + ) + ) + - self.initial_amount + ) + df_total_value.columns = ["account_value"] + df_total_value["date"] = self.date_memory + df_total_value["daily_return"] = df_total_value["account_value"].pct_change( + 1 + ) + if df_total_value["daily_return"].std() != 0: + sharpe = ( + (252 ** 0.5) + * df_total_value["daily_return"].mean() + / df_total_value["daily_return"].std() + ) + df_rewards = pd.DataFrame(self.rewards_memory) + df_rewards.columns = ["account_rewards"] + df_rewards["date"] = self.date_memory[:-1] + if self.episode % self.print_verbosity == 0: + print(f"day: {self.day}, episode: {self.episode}") + print(f"begin_total_asset: {self.asset_memory[0]:0.2f}") + print(f"end_total_asset: {end_total_asset:0.2f}") + print(f"total_reward: {tot_reward:0.2f}") + print(f"total_cost: {self.cost:0.2f}") + print(f"total_trades: {self.trades}") + if df_total_value["daily_return"].std() != 0: + print(f"Sharpe: {sharpe:0.3f}") + print("=================================") + + if (self.model_name != "") and (self.mode != ""): + df_actions = self.save_action_memory() + df_actions.to_csv( + "results/actions_{}_{}_{}.csv".format( + self.mode, self.model_name, self.iteration + ) + ) + df_total_value.to_csv( + "results/account_value_{}_{}_{}.csv".format( + self.mode, self.model_name, self.iteration + ), + index=False, + ) + df_rewards.to_csv( + "results/account_rewards_{}_{}_{}.csv".format( + self.mode, self.model_name, self.iteration + ), + index=False, + ) + plt.plot(self.asset_memory, "r") + plt.savefig( + "results/account_value_{}_{}_{}.png".format( + self.mode, self.model_name, self.iteration + ), + index=False, + ) + plt.close() + + # Add outputs to logger interface + # self.logger.record("environment/portfolio_value", end_total_asset) +# self.logger.record("environment/total_reward", tot_reward) +# self.logger.record("environment/total_reward_pct", (tot_reward / (end_total_asset - tot_reward)) * 100) +# self.logger.record("environment/total_cost", self.cost) +# self.logger.record("environment/total_trades", self.trades) + + return self.state, self.reward, self.terminal, {} + + else: + + actions = actions * self.hmax # actions initially is scaled between 0 to 1 + actions = actions.astype( + int + ) # convert into integer because we can't by fraction of shares + if self.turbulence_threshold is not None: + if self.turbulence >= self.turbulence_threshold: + actions = np.array([-self.hmax] * self.stock_dim) + begin_total_asset = self.state[0] + sum( + np.array(self.state[1 : (self.stock_dim + 1)]) + * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) + ) + # print("begin_total_asset:{}".format(begin_total_asset)) + + argsort_actions = np.argsort(actions) + + sell_index = argsort_actions[: np.where(actions < 0)[0].shape[0]] + buy_index = argsort_actions[::-1][: np.where(actions > 0)[0].shape[0]] + + for index in sell_index: + # print(f"Num shares before: {self.state[index+self.stock_dim+1]}") + # print(f'take sell action before : {actions[index]}') + actions[index] = self._sell_stock(index, actions[index]) * (-1) + # print(f'take sell action after : {actions[index]}') + # print(f"Num shares after: {self.state[index+self.stock_dim+1]}") + + for index in buy_index: + # print('take buy action: {}'.format(actions[index])) + actions[index] = self._buy_stock(index, actions[index]) + + self.actions_memory.append(actions) + + # state: s -> s+1 + self.day += 1 + self.data = self.df.loc[self.day, :] + if self.turbulence_threshold is not None: + if len(self.df.tic.unique()) == 1: + self.turbulence = self.data[self.risk_indicator_col] + elif len(self.df.tic.unique()) > 1: + self.turbulence = self.data[self.risk_indicator_col].values[0] + self.state = self._update_state() + + end_total_asset = self.state[0] + sum( + np.array(self.state[1 : (self.stock_dim + 1)]) + * np.array(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) + ) + self.asset_memory.append(end_total_asset) + self.date_memory.append(self._get_date()) + self.reward = end_total_asset - begin_total_asset + self.rewards_memory.append(self.reward) + self.reward = self.reward * self.reward_scaling + + return self.state, self.reward, self.terminal, {} + + def reset(self): + # initiate state + self.state = self._initiate_state() + + if self.initial: + self.asset_memory = [self.initial_amount] + else: + previous_total_asset = self.previous_state[0] + sum( + np.array(self.state[1 : (self.stock_dim + 1)]) + * np.array( + self.previous_state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)] + ) + ) + self.asset_memory = [previous_total_asset] + + self.day = 0 + self.data = self.df.loc[self.day, :] + self.turbulence = 0 + self.cost = 0 + self.trades = 0 + self.terminal = False + # self.iteration=self.iteration + self.rewards_memory = [] + self.actions_memory = [] + self.date_memory = [self._get_date()] + + self.episode += 1 + + return self.state + + def render(self, mode="human", close=False): + return self.state + + def _initiate_state(self): + if self.initial: + # For Initial State + if len(self.df.tic.unique()) > 1: + # for multiple stock + state = ( + [self.initial_amount] + + self.data.close.values.tolist() + + [0] * self.stock_dim + + sum( + [ + self.data[tech].values.tolist() + for tech in self.tech_indicator_list + ], + [], + ) + ) + else: + # for single stock + state = ( + [self.initial_amount] + + [self.data.close] + + [0] * self.stock_dim + + sum([[self.data[tech]] for tech in self.tech_indicator_list], []) + ) + else: + # Using Previous State + if len(self.df.tic.unique()) > 1: + # for multiple stock + state = ( + [self.previous_state[0]] + + self.data.close.values.tolist() + + self.previous_state[ + (self.stock_dim + 1) : (self.stock_dim * 2 + 1) + ] + + sum( + [ + self.data[tech].values.tolist() + for tech in self.tech_indicator_list + ], + [], + ) + ) + else: + # for single stock + state = ( + [self.previous_state[0]] + + [self.data.close] + + self.previous_state[ + (self.stock_dim + 1) : (self.stock_dim * 2 + 1) + ] + + sum([[self.data[tech]] for tech in self.tech_indicator_list], []) + ) + return state + + def _update_state(self): + if len(self.df.tic.unique()) > 1: + # for multiple stock + state = ( + [self.state[0]] + + self.data.close.values.tolist() + + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) + + sum( + [ + self.data[tech].values.tolist() + for tech in self.tech_indicator_list + ], + [], + ) + ) + + else: + # for single stock + state = ( + [self.state[0]] + + [self.data.close] + + list(self.state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)]) + + sum([[self.data[tech]] for tech in self.tech_indicator_list], []) + ) + + return state + + def _get_date(self): + if len(self.df.tic.unique()) > 1: + date = self.data.date.unique()[0] + else: + date = self.data.date + return date + + def save_asset_memory(self): + date_list = self.date_memory + asset_list = self.asset_memory + # print(len(date_list)) + # print(len(asset_list)) + df_account_value = pd.DataFrame( + {"date": date_list, "account_value": asset_list} + ) + return df_account_value + + def save_action_memory(self): + if len(self.df.tic.unique()) > 1: + # date and close price length must match actions length + date_list = self.date_memory[:-1] + df_date = pd.DataFrame(date_list) + df_date.columns = ["date"] + + action_list = self.actions_memory + df_actions = pd.DataFrame(action_list) + df_actions.columns = self.data.tic.values + df_actions.index = df_date.date + # df_actions = pd.DataFrame({'date':date_list,'actions':action_list}) + else: + date_list = self.date_memory[:-1] + action_list = self.actions_memory + df_actions = pd.DataFrame({"date": date_list, "actions": action_list}) + return df_actions + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def get_sb_env(self): + e = DummyVecEnv([lambda: self]) + obs = e.reset() + return e, obs