-
Notifications
You must be signed in to change notification settings - Fork 12
/
rl_helper.py
86 lines (67 loc) · 2.5 KB
/
rl_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/python
import numpy as np
import math
from scipy.stats import norm
import vrep
import vrep_rotors, vrep_imu
class RL(object):
def __init__(self, clientID):
self.clientID = clientID
self.quadHandle = None
self.pos = [0, 0, 0]
self.rotor_data = [0.0, 0.0, 0.0, 0.0]
self.orig_location = [0, 0, 0]
self.curr_location = [0, 0, 0]
self.target_z = 0.0
'''
Initialize all sensors and reset quadcopter position in world
'''
def init_sensors(self):
# Initialize IMU
err, self.quadHandle = vrep.simxGetObjectHandle(self.clientID, 'Quadricopter', vrep.simx_opmode_blocking)
vrep_imu.init_imu(self.clientID, self.quadHandle)
# Initialize Rotors
vrep_rotors.init_rotors(self.clientID)
# Reset quadcopter position
err, self.pos = vrep.simxGetObjectPosition(self.clientID, self.quadHandle, -1, vrep.simx_opmode_buffer)
self.pos[2] = 0.0
vrep.simxSetObjectPosition(self.clientID, self.quadHandle, -1, self.pos, vrep.simx_opmode_oneshot)
err, self.orig_location = vrep.simxGetObjectPosition(self.clientID, self.quadHandle, -1, vrep.simx_opmode_buffer)
'''
Start V-REP simulation
'''
def start_sim(self):
vrep.simxStartSimulation(self.clientID, vrep.simx_opmode_oneshot_wait)
return
'''
Stop V-REP simulation
'''
def stop_sim(self):
vrep.simxStopSimulation(self.clientID, vrep.simx_opmode_oneshot_wait)
return
'''
This function returns reward based on current and previous location data (x,y,z)
'''
def get_reward(self):
self.curr_location = self.get_state()
deviation_x = np.linalg.norm(self.curr_location[0] - self.orig_location[0])
deviation_y = np.linalg.norm(self.curr_location[1] - self.orig_location[1])
deviation_z = np.linalg.norm(self.target_z - self.curr_location[2])
gaussian = norm(0, 2)
reward_x = gaussian.pdf(deviation_x)
reward_y = gaussian.pdf(deviation_y)
reward_z = 1 - math.exp(deviation_z)
total_reward = 2 * (0.5 * reward_x + 0.5 * reward_y + reward_z)
return total_reward
'''
This function moves quadcopter rotors
'''
def do_action(self):
vrep_rotors.move_rotors(self.clientID, self.rotor_data)
return
'''
This function gets quadcopter state
'''
def get_state(self):
self.pos = vrep_imu.get_pos(self.clientID, self.quadHandle)
return self.pos