-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReinforce.py
155 lines (130 loc) · 5.63 KB
/
Reinforce.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import json
from time import time
from copy import deepcopy
from stable_baselines3 import DQN, A2C, PPO
from Dataset import Dataset
from CourseRecEnv import CourseRecEnv, EvaluateCallback
class Reinforce:
def __init__(
self, dataset, model, k, threshold, run, total_steps=1000, eval_freq=100
):
self.dataset = dataset
self.model_name = model
self.k = k
self.threshold = threshold
self.run = run
self.total_steps = total_steps
self.eval_freq = eval_freq
# Create the training and evaluation environments
self.train_env = CourseRecEnv(dataset, threshold=self.threshold, k=self.k)
self.eval_env = CourseRecEnv(dataset, threshold=self.threshold, k=self.k)
self.get_model()
self.all_results_filename = (
"all_"
+ self.model_name
+ "_nbskills_"
+ str(len(self.dataset.skills))
+ "_k_"
+ str(self.k)
+ "_run_"
+ str(run)
+ ".txt"
)
self.final_results_filename = (
"final_"
+ self.model_name
+ "_nbskills_"
+ str(len(self.dataset.skills))
+ "_k_"
+ str(self.k)
+ "_run_"
+ str(self.run)
+ ".json"
)
self.eval_callback = EvaluateCallback(
self.eval_env,
eval_freq=self.eval_freq,
all_results_filename=self.all_results_filename,
)
def get_model(self):
"""Sets the model to be used for the recommendation. The model is from stable-baselines3 and is chosen based on the model_name attribute."""
if self.model_name == "dqn":
self.model = DQN(env=self.train_env, verbose=0, policy="MlpPolicy")
elif self.model_name == "a2c":
self.model = A2C(
env=self.train_env, verbose=0, policy="MlpPolicy", device="cpu"
)
elif self.model_name == "ppo":
self.model = PPO(env=self.train_env, verbose=0, policy="MlpPolicy")
def update_learner_profile(self, learner, course):
"""Updates the learner's profile with the skills and levels of the course.
Args:
learner (list): list of skills and mastery level of the learner
course (list): list of required (resp. provided) skills and mastery level of the course
"""
# Update the learner profile with the skills and levels provided by the course (course [1] is the list of skills and levels provided by the course)
for cskill, clevel in course[1]:
found = False
i = 0
while not found and i < len(learner):
lskill, llevel = learner[i]
if cskill == lskill:
learner[i] = (lskill, max(llevel, clevel))
found = True
i += 1
if not found:
learner.append((cskill, clevel))
def reinforce_recommendation(self):
"""Train and evaluates the reinforcement learning model to make recommendations for every learner in the dataset. The results are saved in a json file."""
results = dict()
avg_l_attrac = self.dataset.get_avg_learner_attractiveness()
print(f"The average attractiveness of the learners is {avg_l_attrac:.2f}")
results["original_attractiveness"] = avg_l_attrac
avg_app_j = self.dataset.get_avg_applicable_jobs(self.threshold)
print(f"The average nb of applicable jobs per learner is {avg_app_j:.2f}")
results["original_applicable_jobs"] = avg_app_j
# Train the model
self.model.learn(total_timesteps=self.total_steps, callback=self.eval_callback)
# Evaluate the model
time_start = time()
recommendations = dict()
for i, learner in enumerate(self.dataset.learners):
self.eval_env.reset(learner=learner)
done = False
index = self.dataset.learners_index[i]
recommendation_sequence = []
while not done:
obs = self.eval_env._get_obs()
action, _state = self.model.predict(obs, deterministic=True)
obs, reward, done, _, info = self.eval_env.step(action)
if reward != -1:
recommendation_sequence.append(action.item())
for course in recommendation_sequence:
self.update_learner_profile(learner, self.dataset.courses[course])
recommendations[index] = [
self.dataset.courses_index[course_id]
for course_id in recommendation_sequence
]
time_end = time()
avg_recommendation_time = (time_end - time_start) / len(self.dataset.learners)
print(f"Average Recommendation Time: {avg_recommendation_time:.2f} seconds")
results["avg_recommendation_time"] = avg_recommendation_time
avg_l_attrac = self.dataset.get_avg_learner_attractiveness()
print(f"The new average attractiveness of the learners is {avg_l_attrac:.2f}")
results["new_attractiveness"] = avg_l_attrac
avg_app_j = self.dataset.get_avg_applicable_jobs(self.threshold)
print(f"The new average nb of applicable jobs per learner is {avg_app_j:.2f}")
results["new_applicable_jobs"] = avg_app_j
results["recommendations"] = recommendations
json.dump(
results,
open(
os.path.join(
self.dataset.config["results_path"],
self.final_results_filename,
),
"w",
),
indent=4,
)