fix broken links, ...

mvoelk · Jun 29, 2021 · aa71b38 · aa71b38
1 parent a251f26
commit aa71b38
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 49 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # SSD-based object and text detection with Keras
 This repository contains the implementation of various approaches to object detection in general and text detection/recognition in particular.
 
-Its code was initially used to carry out the experiments for the author's master thesis [End-to-End Scene Text Recognition based on Artificial Neural Networks](http://83.169.39.135/thesis/thesis.pdf) and later extended with the implementation of more recent approaches.
+Its code was initially used to carry out the experiments for the author's master thesis [End-to-End Scene Text Recognition based on Artificial Neural Networks](http://46.163.79.21/thesis/thesis.pdf) and later extended with the implementation of more recent approaches.
 
 ## Technical background
 
@@ -60,19 +60,19 @@ The usage of the code is quite straightforward, clone the repository and run the
 ## Pretrained models
 Pretrained SSD models can be converted from the [original Caffe implementation](https://github.com/weiliu89/caffe/tree/ssd).
 
-#### [Converted SSD300 VOC](http://83.169.39.135/ssd_detectors/ssd300_voc_weights_fixed.zip)
+#### [Converted SSD300 VOC](http://46.163.79.21/ssd_detectors/ssd300_voc_weights_fixed.zip)
 PASCAL VOC 07+12+COCO SSD300* from Caffe implementation
 
-#### [Converted SSD512 VOC](http://83.169.39.135/ssd_detectors/ssd512_voc_weights_fixed.zip)
+#### [Converted SSD512 VOC](http://46.163.79.21/ssd_detectors/ssd512_voc_weights_fixed.zip)
 PASCAL VOC 07+12+COCO SSD512* from Caffe implementation
 
-#### [Converted SSD300 COCO](http://83.169.39.135/ssd_detectors/ssd300_coco_weights_fixed.zip)
+#### [Converted SSD300 COCO](http://46.163.79.21/ssd_detectors/ssd300_coco_weights_fixed.zip)
 COCO trainval35k SSD300* from Caffe implementation
 
-#### [Converted SSD512 COCO](http://83.169.39.135/ssd_detectors/ssd512_coco_weights_fixed.zip)
+#### [Converted SSD512 COCO](http://46.163.79.21/ssd_detectors/ssd512_coco_weights_fixed.zip)
 COCO trainval35k SSD512* from Caffe implementation
 
-#### [SegLink](http://83.169.39.135/ssd_detectors/201809231008_sl512_synthtext.zip)
+#### [SegLink](http://46.163.79.21/ssd_detectors/201809231008_sl512_synthtext.zip)
 initialized with converted SSD512 weights  
 trained and tested on subsets of SynthText  
 segment_threshold  0.60  
@@ -83,7 +83,7 @@ f-measure         0.869
 parameters   24,358,681  
 model size        94 MB  
 
-#### [SegLink with DSOD backbone and Focal Loss](http://83.169.39.135/ssd_detectors/201806021007_dsodsl512_synthtext.zip)
+#### [SegLink with DSOD backbone and Focal Loss](http://46.163.79.21/ssd_detectors/201806021007_dsodsl512_synthtext.zip)
 trained and tested on subsets of SynthText  
 segment_threshold  0.60  
 link_threshold     0.50  
@@ -93,7 +93,7 @@ f-measure         0.932
 parameters   12,905,177  
 model size        50 MB  
 
-#### [TextBoxes++ with DSOD backbone and Focal Loss](http://83.169.39.135/ssd_detectors/201906190710_dsodtbpp512fl_synthtext.zip)
+#### [TextBoxes++ with DSOD backbone and Focal Loss](http://46.163.79.21/ssd_detectors/201906190710_dsodtbpp512fl_synthtext.zip)
 trained and tested on subsets of SynthText  
 threshold          0.35  
 precision         0.984  
@@ -102,7 +102,7 @@ f-measure         0.934
 parameters   23,477,798  
 model size        91 MB  
 
-#### [TextBoxes++ with dense blocks, separable convolution and Focal Loss](http://83.169.39.135/ssd_detectors/202003070004_dstbpp512fl_synthtext.zip)
+#### [TextBoxes++ with dense blocks, separable convolution and Focal Loss](http://46.163.79.21/ssd_detectors/202003070004_dstbpp512fl_synthtext.zip)
 the number of parameters has been reduced by ≈ 0.94% compared to the original TextBoxes++ with VGG backbone (35,763,078 parameters)  
 trained and tested on subsets of SynthText  
 threshold          0.45  
@@ -113,7 +113,7 @@ parameters    2,226,374
 model size         9 MB  
 
 
-#### [CRNN with LSTM](http://83.169.39.135/ssd_detectors/201806162129_crnn_lstm_synthtext.zip)
+#### [CRNN with LSTM](http://46.163.79.21/ssd_detectors/201806162129_crnn_lstm_synthtext.zip)
 trained and tested on cropped word level bounding boxes form SynthText  
 mean editdistance             0.332  
 mean normalized editdistance  0.081  
@@ -124,7 +124,7 @@ parameters                8,747,351
 model size                    34 MB  
 runtime (GPU)      114 ms ± 2.75 ms  
 
-#### [CRNN with GRU](http://83.169.39.135/ssd_detectors/201806190711_crnn_gru_synthtext.zip)
+#### [CRNN with GRU](http://46.163.79.21/ssd_detectors/201806190711_crnn_gru_synthtext.zip)
 trained and tested on cropped word level bounding boxes form SynthText  
 mean editdistance             0.333  
 mean normalized editdistance  0.081  
@@ -135,7 +135,7 @@ parameters                7,959,895
 model size                    31 MB  
 runtime (GPU)     85.1 ms ± 1.19 ms  
 
-#### [CRNN with CNN](http://83.169.39.135/ssd_detectors/202001131747_crnn_cnn_synthtext.zip)
+#### [CRNN with CNN](http://46.163.79.21/ssd_detectors/202001131747_crnn_cnn_synthtext.zip)
 fully convolutional architecture for the recognition stage (probably not optimal)  
 trained and tested on cropped word level bounding boxes form SynthText  
 mean editdistance             0.355  
@@ -147,7 +147,7 @@ parameters                7,877,719
 model size                    31 MB  
 runtime (GPU)     3.68 ms ± 24.5 µs  
 
-#### [CRNN with CNN concat](http://83.169.39.135/ssd_detectors/202002030820_crnn_cnn_synthtext_concat_continued.zip)
+#### [CRNN with CNN concat](http://46.163.79.21/ssd_detectors/202002030820_crnn_cnn_synthtext_concat_continued.zip)
 fine-tuned fully convolutional model on concatenated word images form SynthText  
 <img src="./images/crnn_input_concat1.jpg" />
 mean editdistance             1.842  
@@ -193,5 +193,5 @@ iterations                600k+100k
 </p>
 
 #### SegLink with DenseNet, Focal Loss and CRNN end-to-end real-time recogniton
-[<img src="./images/dsodslcrnn_end2end_record_preview.jpg" width="768" />](http://83.169.39.135/ssd_detectors/dsodslcrnn_end2end_record.mp4)
+[<img src="./images/dsodslcrnn_end2end_record_preview.jpg" width="768" />](http://46.163.79.21/ssd_detectors/dsodslcrnn_end2end_record.mp4)
 
diff --git a/SL_predict.ipynb b/SL_predict.ipynb
@@ -4,7 +4,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": [
@@ -27,7 +30,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": [
@@ -41,7 +47,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": [
@@ -55,7 +64,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": [
@@ -74,7 +86,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": [
@@ -89,7 +104,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": [
@@ -102,8 +120,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true,
-    "scrolled": false
+    "tags": []
    },
    "outputs": [],
    "source": [
@@ -138,7 +155,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": [
@@ -164,7 +184,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": true,
-    "scrolled": false
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": [
@@ -186,7 +208,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": []
@@ -195,7 +220,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "outputs": [],
    "source": []
@@ -263,5 +291,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/data_utils.py b/data_utils.py
@@ -79,14 +79,14 @@ def lighting(self, img):
 
     def horizontal_flip(self, img, y):
         if np.random.random() < self.hflip_prob:
-            img = img[:, ::-1]
-            y[:, [0, 2]] = 1 - y[:, [2, 0]]
+            img = img[:,::-1]
+            y[:,(0,2)] = 1 - y[:,(2,0)]
         return img, y
 
     def vertical_flip(self, img, y):
         if np.random.random() < self.vflip_prob:
-            img = img[::-1]
-            y[:, [1, 3]] = 1 - y[:, [3, 1]]
+            img = img[::-1,:]
+            y[:,(1,3)] = 1 - y[:,(3,1)]
         return img, y
 
     def random_sized_crop(self, img, targets):

diff --git a/utils/model.py b/utils/model.py
@@ -122,7 +122,16 @@ def plot_parameter_statistic(model, layer_types=['Dense', 'Conv2D'], trainable=T
         offset += np.array(counts_non_trainable, dtype=int)
         legend.append('non-trainable')
     if outputs:
-        counts_outputs = [np.sum([np.sum([np.prod(s[1:]) for s in n.output_shapes]) for n in l._inbound_nodes]) for l in layers]
+        #counts_outputs = [np.sum([np.sum([np.prod(s[1:]) for s in n.output_shapes]) for n in l._inbound_nodes]) for l in layers]
+        counts_outputs = []
+        for l in layers:
+            shapes = []
+            for n in l._inbound_nodes:
+                if type(n.output_shapes) == list:
+                    shapes.extend(n.output_shapes)
+                else:
+                    shapes.append(n.output_shapes)
+            counts_outputs.append(np.sum([np.prod(s[1:]) for s in shapes]))
         plt.barh(y, counts_outputs, align='center', color=colors[2], left=offset)
         offset += np.array(counts_outputs, dtype=int)
         legend.append('outputs')

diff --git a/utils/training.py b/utils/training.py
@@ -126,12 +126,13 @@ def reduced_focal_loss(y_true, y_pred, gamma=2., alpha=1., th=0.5):
     return tf.reduce_sum(loss, axis=-1)
 
 
-def ciou_loss(y_true, y_pred):
+def ciou_loss(y_true, y_pred, variant='diou'):
     '''Conpute Distance-IoU loss.
 
     # Arguments
         y_true: Ground truth bounding boxes, tensor of shape (..., 4)
         y_pred: Predicted bounding boxes, tensor of shape (..., 4)
+        variant: 'diou', 'ciou', 'logciou'
 
     # Returns
         loss: Distance-IoU loss, tensor of shape (...)
@@ -196,18 +197,16 @@ def ciou_loss(y_true, y_pred):
     w_temp = 2 * w_pred
     ar = (8 / (np.pi ** 2)) * arctan * ((w_pred - w_temp) * h_pred)
 
-    # calculate diou
-    diouk = 1-iouk + u
-
-    # calculate ciou
-    #ciouk = 1-iouk + u + alpha*ar
-
-    # "I found that -log(IoU) is more stable and converge faster than (1-IoU)"
-    #ciouk = -tf.math.log(iouk) + u + alpha*ar
-
-    return diouk
-    #return ciouk
-
+    # calculate diou, ciou, ...
+    if variant == 'diou':
+        return 1-iouk + u
+    elif variant == 'ciou':
+        return 1-iouk + u + alpha*ar
+    elif variant == 'logciou':
+        # "I found that -log(IoU) is more stable and converge faster than (1-IoU)"
+        return -tf.math.log(iouk) + u + alpha*ar
+    else:
+        return None
 
 class LearningRateDecay(Callback):
     def __init__(self, methode='linear', base_lr=1e-3, n_desired=40000, desired=0.1, bias=0.0, minimum=0.1):
@@ -477,7 +476,7 @@ def filter_signal(x, y, window_length=1000):
     return x, y
 
 
-def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only=False, autoscale=True):
+def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only=False, autoscale=True, legend_loc='best'):
     """Plot and compares the training log contained in './checkpoints/'.
     
     # Agrumets
@@ -494,7 +493,7 @@ def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only
         Different batch size leads to different epoch length.
     """
 
-    loss_terms = {'loss', 'error'}
+    loss_terms = {'loss', 'error', 'abs'}
     metric_terms = {'precision', 'recall', 'fmeasure', 'accuracy', 'sparsity', 'visibility'}
 
     if type(log_dirs) == str:
@@ -580,7 +579,7 @@ def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only
 
         if ymax > 0:
             plt.title(k, y=1.05)
-            plt.legend()
+            plt.legend(loc=legend_loc)
 
             ax1 = plt.gca()
             ax1.set_xlim(xmin, xmax)
@@ -611,7 +610,7 @@ def plot_log(log_dirs, names=None, limits=None, window_length=250, filtered_only
 
 def plot_history(log_dirs, names=None, limits=None, autoscale=True):
 
-    loss_terms = {'loss', 'error'}
+    loss_terms = {'loss', 'error', 'abs'}
     metric_terms = {'precision', 'recall', 'fmeasure', 'accuracy', 'sparsity', 'visibility'}
 
     if type(log_dirs) == str: