From aa71b38d637fffd7a7bbfffbe0e170e1787be300 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Markus=20V=C3=B6lk?=
Date: Tue, 29 Jun 2021 11:55:56 +0200
Subject: [PATCH] fix broken links, ...
---
README.md | 28 ++++++++++++------------
SL_predict.ipynb | 54 +++++++++++++++++++++++++++++++++++------------
data_utils.py | 8 +++----
utils/model.py | 11 +++++++++-
utils/training.py | 33 ++++++++++++++---------------
5 files changed, 85 insertions(+), 49 deletions(-)
diff --git a/README.md b/README.md
index 79ee89f..250f4d5 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# SSD-based object and text detection with Keras
This repository contains the implementation of various approaches to object detection in general and text detection/recognition in particular.
-Its code was initially used to carry out the experiments for the author's master thesis [End-to-End Scene Text Recognition based on Artificial Neural Networks](http://83.169.39.135/thesis/thesis.pdf) and later extended with the implementation of more recent approaches.
+Its code was initially used to carry out the experiments for the author's master thesis [End-to-End Scene Text Recognition based on Artificial Neural Networks](http://46.163.79.21/thesis/thesis.pdf) and later extended with the implementation of more recent approaches.
## Technical background
@@ -60,19 +60,19 @@ The usage of the code is quite straightforward, clone the repository and run the
## Pretrained models
Pretrained SSD models can be converted from the [original Caffe implementation](https://github.com/weiliu89/caffe/tree/ssd).
-#### [Converted SSD300 VOC](http://83.169.39.135/ssd_detectors/ssd300_voc_weights_fixed.zip)
+#### [Converted SSD300 VOC](http://46.163.79.21/ssd_detectors/ssd300_voc_weights_fixed.zip)
PASCAL VOC 07+12+COCO SSD300* from Caffe implementation
-#### [Converted SSD512 VOC](http://83.169.39.135/ssd_detectors/ssd512_voc_weights_fixed.zip)
+#### [Converted SSD512 VOC](http://46.163.79.21/ssd_detectors/ssd512_voc_weights_fixed.zip)
PASCAL VOC 07+12+COCO SSD512* from Caffe implementation
-#### [Converted SSD300 COCO](http://83.169.39.135/ssd_detectors/ssd300_coco_weights_fixed.zip)
+#### [Converted SSD300 COCO](http://46.163.79.21/ssd_detectors/ssd300_coco_weights_fixed.zip)
COCO trainval35k SSD300* from Caffe implementation
-#### [Converted SSD512 COCO](http://83.169.39.135/ssd_detectors/ssd512_coco_weights_fixed.zip)
+#### [Converted SSD512 COCO](http://46.163.79.21/ssd_detectors/ssd512_coco_weights_fixed.zip)
COCO trainval35k SSD512* from Caffe implementation
-#### [SegLink](http://83.169.39.135/ssd_detectors/201809231008_sl512_synthtext.zip)
+#### [SegLink](http://46.163.79.21/ssd_detectors/201809231008_sl512_synthtext.zip)
initialized with converted SSD512 weights
trained and tested on subsets of SynthText
segment_threshold 0.60
@@ -83,7 +83,7 @@ f-measure 0.869
parameters 24,358,681
model size 94 MB
-#### [SegLink with DSOD backbone and Focal Loss](http://83.169.39.135/ssd_detectors/201806021007_dsodsl512_synthtext.zip)
+#### [SegLink with DSOD backbone and Focal Loss](http://46.163.79.21/ssd_detectors/201806021007_dsodsl512_synthtext.zip)
trained and tested on subsets of SynthText
segment_threshold 0.60
link_threshold 0.50
@@ -93,7 +93,7 @@ f-measure 0.932
parameters 12,905,177
model size 50 MB
-#### [TextBoxes++ with DSOD backbone and Focal Loss](http://83.169.39.135/ssd_detectors/201906190710_dsodtbpp512fl_synthtext.zip)
+#### [TextBoxes++ with DSOD backbone and Focal Loss](http://46.163.79.21/ssd_detectors/201906190710_dsodtbpp512fl_synthtext.zip)
trained and tested on subsets of SynthText
threshold 0.35
precision 0.984
@@ -102,7 +102,7 @@ f-measure 0.934
parameters 23,477,798
model size 91 MB
-#### [TextBoxes++ with dense blocks, separable convolution and Focal Loss](http://83.169.39.135/ssd_detectors/202003070004_dstbpp512fl_synthtext.zip)
+#### [TextBoxes++ with dense blocks, separable convolution and Focal Loss](http://46.163.79.21/ssd_detectors/202003070004_dstbpp512fl_synthtext.zip)
the number of parameters has been reduced by ≈ 0.94% compared to the original TextBoxes++ with VGG backbone (35,763,078 parameters)
trained and tested on subsets of SynthText
threshold 0.45
@@ -113,7 +113,7 @@ parameters 2,226,374
model size 9 MB
-#### [CRNN with LSTM](http://83.169.39.135/ssd_detectors/201806162129_crnn_lstm_synthtext.zip)
+#### [CRNN with LSTM](http://46.163.79.21/ssd_detectors/201806162129_crnn_lstm_synthtext.zip)
trained and tested on cropped word level bounding boxes form SynthText
mean editdistance 0.332
mean normalized editdistance 0.081
@@ -124,7 +124,7 @@ parameters 8,747,351
model size 34 MB
runtime (GPU) 114 ms ± 2.75 ms
-#### [CRNN with GRU](http://83.169.39.135/ssd_detectors/201806190711_crnn_gru_synthtext.zip)
+#### [CRNN with GRU](http://46.163.79.21/ssd_detectors/201806190711_crnn_gru_synthtext.zip)
trained and tested on cropped word level bounding boxes form SynthText
mean editdistance 0.333
mean normalized editdistance 0.081
@@ -135,7 +135,7 @@ parameters 7,959,895
model size 31 MB
runtime (GPU) 85.1 ms ± 1.19 ms
-#### [CRNN with CNN](http://83.169.39.135/ssd_detectors/202001131747_crnn_cnn_synthtext.zip)
+#### [CRNN with CNN](http://46.163.79.21/ssd_detectors/202001131747_crnn_cnn_synthtext.zip)
fully convolutional architecture for the recognition stage (probably not optimal)
trained and tested on cropped word level bounding boxes form SynthText
mean editdistance 0.355
@@ -147,7 +147,7 @@ parameters 7,877,719
model size 31 MB
runtime (GPU) 3.68 ms ± 24.5 µs
-#### [CRNN with CNN concat](http://83.169.39.135/ssd_detectors/202002030820_crnn_cnn_synthtext_concat_continued.zip)
+#### [CRNN with CNN concat](http://46.163.79.21/ssd_detectors/202002030820_crnn_cnn_synthtext_concat_continued.zip)
fine-tuned fully convolutional model on concatenated word images form SynthText
mean editdistance 1.842
@@ -193,5 +193,5 @@ iterations 600k+100k
#### SegLink with DenseNet, Focal Loss and CRNN end-to-end real-time recogniton
-[](http://83.169.39.135/ssd_detectors/dsodslcrnn_end2end_record.mp4)
+[](http://46.163.79.21/ssd_detectors/dsodslcrnn_end2end_record.mp4)
diff --git a/SL_predict.ipynb b/SL_predict.ipynb
index 8b301ae..bf80ae2 100644
--- a/SL_predict.ipynb
+++ b/SL_predict.ipynb
@@ -4,7 +4,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": [
@@ -27,7 +30,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": [
@@ -41,7 +47,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": [
@@ -55,7 +64,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": [
@@ -74,7 +86,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": [
@@ -89,7 +104,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": [
@@ -102,8 +120,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true,
- "scrolled": false
+ "tags": []
},
"outputs": [],
"source": [
@@ -138,7 +155,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": [
@@ -164,7 +184,9 @@
"execution_count": null,
"metadata": {
"collapsed": true,
- "scrolled": false
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": [
@@ -186,7 +208,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": []
@@ -195,7 +220,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
},
"outputs": [],
"source": []
@@ -263,5 +291,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/data_utils.py b/data_utils.py
index 8241d95..d332fa6 100644
--- a/data_utils.py
+++ b/data_utils.py
@@ -79,14 +79,14 @@ def lighting(self, img):
def horizontal_flip(self, img, y):
if np.random.random() < self.hflip_prob:
- img = img[:, ::-1]
- y[:, [0, 2]] = 1 - y[:, [2, 0]]
+ img = img[:,::-1]
+ y[:,(0,2)] = 1 - y[:,(2,0)]
return img, y
def vertical_flip(self, img, y):
if np.random.random() < self.vflip_prob:
- img = img[::-1]
- y[:, [1, 3]] = 1 - y[:, [3, 1]]
+ img = img[::-1,:]
+ y[:,(1,3)] = 1 - y[:,(3,1)]
return img, y
def random_sized_crop(self, img, targets):
diff --git a/utils/model.py b/utils/model.py
index ce84b2d..846c863 100644
--- a/utils/model.py
+++ b/utils/model.py
@@ -122,7 +122,16 @@ def plot_parameter_statistic(model, layer_types=['Dense', 'Conv2D'], trainable=T
offset += np.array(counts_non_trainable, dtype=int)
legend.append('non-trainable')
if outputs:
- counts_outputs = [np.sum([np.sum([np.prod(s[1:]) for s in n.output_shapes]) for n in l._inbound_nodes]) for l in layers]
+ #counts_outputs = [np.sum([np.sum([np.prod(s[1:]) for s in n.output_shapes]) for n in l._inbound_nodes]) for l in layers]
+ counts_outputs = []
+ for l in layers:
+ shapes = []
+ for n in l._inbound_nodes:
+ if type(n.output_shapes) == list:
+ shapes.extend(n.output_shapes)
+ else:
+ shapes.append(n.output_shapes)
+ counts_outputs.append(np.sum([np.prod(s[1:]) for s in shapes]))
plt.barh(y, counts_outputs, align='center', color=colors[2], left=offset)
offset += np.array(counts_outputs, dtype=int)
legend.append('outputs')
diff --git a/utils/training.py b/utils/training.py
index 8da4c21..a33f9bc 100644
--- a/utils/training.py
+++ b/utils/training.py
@@ -126,12 +126,13 @@ def reduced_focal_loss(y_true, y_pred, gamma=2., alpha=1., th=0.5):
return tf.reduce_sum(loss, axis=-1)
-def ciou_loss(y_true, y_pred):
+def ciou_loss(y_true, y_pred, variant='diou'):
'''Conpute Distance-IoU loss.
# Arguments
y_true: Ground truth bounding boxes, tensor of shape (..., 4)
y_pred: Predicted bounding boxes, tensor of shape (..., 4)
+ variant: 'diou', 'ciou', 'logciou'
# Returns
loss: Distance-IoU loss, tensor of shape (...)
@@ -196,18 +197,16 @@ def ciou_loss(y_true, y_pred):
w_temp = 2 * w_pred
ar = (8 / (np.pi ** 2)) * arctan * ((w_pred - w_temp) * h_pred)
- # calculate diou
- diouk = 1-iouk + u
-
- # calculate ciou
- #ciouk = 1-iouk + u + alpha*ar
-
- # "I found that -log(IoU) is more stable and converge faster than (1-IoU)"
- #ciouk = -tf.math.log(iouk) + u + alpha*ar
-
- return diouk
- #return ciouk
-
+ # calculate diou, ciou, ...
+ if variant == 'diou':
+ return 1-iouk + u
+ elif variant == 'ciou':
+ return 1-iouk + u + alpha*ar
+ elif variant == 'logciou':
+ # "I found that -log(IoU) is more stable and converge faster than (1-IoU)"
+ return -tf.math.log(iouk) + u + alpha*ar
+ else:
+ return None
class LearningRateDecay(Callback):
def __init__(self, methode='linear', base_lr=1e-3, n_desired=40000, desired=0.1, bias=0.0, minimum=0.1):
@@ -477,7 +476,7 @@ def filter_signal(x, y, window_length=1000):
return x, y
-def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only=False, autoscale=True):
+def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only=False, autoscale=True, legend_loc='best'):
"""Plot and compares the training log contained in './checkpoints/'.
# Agrumets
@@ -494,7 +493,7 @@ def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only
Different batch size leads to different epoch length.
"""
- loss_terms = {'loss', 'error'}
+ loss_terms = {'loss', 'error', 'abs'}
metric_terms = {'precision', 'recall', 'fmeasure', 'accuracy', 'sparsity', 'visibility'}
if type(log_dirs) == str:
@@ -580,7 +579,7 @@ def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only
if ymax > 0:
plt.title(k, y=1.05)
- plt.legend()
+ plt.legend(loc=legend_loc)
ax1 = plt.gca()
ax1.set_xlim(xmin, xmax)
@@ -611,7 +610,7 @@ def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only
def plot_history(log_dirs, names=None, limits=None, autoscale=True):
- loss_terms = {'loss', 'error'}
+ loss_terms = {'loss', 'error', 'abs'}
metric_terms = {'precision', 'recall', 'fmeasure', 'accuracy', 'sparsity', 'visibility'}
if type(log_dirs) == str: