Use binary_crossentropy instead of sparse_categorical_crossentropy.

Fix that confidence score will not be optimized when number of classes is one.
zzh8829 · Aug 3, 2020 · 34fd1c9 · 34fd1c9
1 parent 0d70db3
commit 34fd1c9
Showing 1 changed file with 6 additions and 3 deletions.
diff --git a/yolov3_tf2/models.py b/yolov3_tf2/models.py
@@ -303,9 +303,12 @@ def yolo_loss(y_true, y_pred):
         obj_loss = binary_crossentropy(true_obj, pred_obj)
         obj_loss = obj_mask * obj_loss + \
             (1 - obj_mask) * ignore_mask * obj_loss
-        # TODO: use binary_crossentropy instead
-        class_loss = obj_mask * sparse_categorical_crossentropy(
-            true_class_idx, pred_class)
+        # sparse_categorical_crossentropy will always output 0 when number of classes is 1,
+        # so convert true_class into one hot label and use binary_crossentropy.
+        true_class_one_hot = tf.one_hot(
+            tf.cast(true_class_idx[..., 0], tf.int32), classes)
+        class_loss = obj_mask * binary_crossentropy(
+            true_class_one_hot, pred_class)
 
         # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
         xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))