-
Notifications
You must be signed in to change notification settings - Fork 3
/
testing_model.py
48 lines (39 loc) · 1.24 KB
/
testing_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import gym
import ArmEnv
import matplotlib.pyplot as plt
import numpy as np
import pickle
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3 import SAC
env = gym.make('PointToPoint-v0',gui=True,mode='P',record=True)
model = PPO.load("logs/best_model_75.zip", env=env,custom_objects={"learning_rate": 0.0,
"lr_schedule": lambda _: 0.0,
"clip_range": lambda _: 0.0,})
#model = PPO.load("trial.zip", env=env)
rew = []
for i in range(1):
done = False
obs = env.reset()
while not done:
action, _state =model.predict(obs)
#action = np.array([0,0,0,0,0,0,0])
obs,reward,done,_ = env.step(action)
print(reward)
print(obs)
print(action)
#print(action)
if i==0:
rew.append(reward)
t = np.arange(len(rew))
print(sum(rew))
fig,ax = plt.subplots()
ax.plot(t,rew)
ax.set_title("Testing")
ax.set_xlabel("Timesteps")
ax.set_ylabel("Rewards")
plt.show()
#goal: [0.4,0,1.1] loc6: [0.44738302 0.04233105 1.11781087]
#goal: [0.6,0,0.9] loc6: [0.5669553 0.07910315 1.04583075]
#goal: [0.3,0,1.2] loc6: [0.27454447 0.03467133 1.20632827]