diff --git a/spacy/ml/tb_framework.pyx b/spacy/ml/tb_framework.pyx index 9b2114900d3..2a3a5682380 100644 --- a/spacy/ml/tb_framework.pyx +++ b/spacy/ml/tb_framework.pyx @@ -338,9 +338,9 @@ def _forward_fallback( all_ids.append(ids) all_statevecs.append(statevecs) all_which.append(which) + n_moves += 1 if n_moves >= max_moves >= 1: break - n_moves += 1 def backprop_parser(d_states_d_scores): ids = ops.xp.vstack(all_ids) diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index 2d2a3625287..eb0f3004563 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -258,7 +258,7 @@ class Parser(TrainablePipe): # batch uniform length. Since we do not have a gold standard # sequence, we use the teacher's predictions as the gold # standard. - max_moves = int(random.uniform(max(max_moves // 2, 1), max_moves * 2)) + max_moves = random.randrange(max(max_moves // 2, 1), max_moves * 2) states = self._init_batch_from_teacher(teacher_pipe, student_docs, max_moves) else: states = self.moves.init_batch(student_docs) @@ -425,7 +425,7 @@ class Parser(TrainablePipe): if max_moves >= 1: # Chop sequences into lengths of this many words, to make the # batch uniform length. - max_moves = int(random.uniform(max(max_moves // 2, 1), max_moves * 2)) + max_moves = random.randrange(max(max_moves // 2, 1), max_moves * 2) init_states, gold_states, _ = self._init_gold_batch( examples, max_length=max_moves @@ -729,9 +729,10 @@ class Parser(TrainablePipe): action.do(state.c, action.label) if state.is_final(): break - if moves.has_gold(eg, start_state.B(0), state.B(0)): - states.append(start_state) - golds.append(gold) + start_buf_head = start_state.B(0) + buf_head = state.B(0) + states.append(start_state) + golds.append(gold) if state.is_final(): break return states, golds, max_length