support Font color and Line color

Merge code from #66
Sanster · Sep 22, 2019 · d8c43fc · d8c43fc
1 parent 7082899
commit d8c43fc
Show file tree

Hide file tree

Showing 10 changed files with 286 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -44,6 +44,8 @@ new config file and use it by `--config_file` option), here are some examples:
 |Emboss|![emboss](./imgs/effects/emboss.jpg)|
 |Reverse color|![reverse color](./imgs/effects/reverse.jpg)|
 |Blur|![blur](./imgs/effects/blur.jpg)|
+|Text color|![font_color](./imgs/effects/colored.jpg)|
+|Line color|![line_color](./imgs/effects/table.jpg)|
 
 3. Run `main.py` file.
 

diff --git a/configs/default.yaml b/configs/default.yaml
@@ -3,6 +3,19 @@ font_size:
   min: 14
   max: 23
 
+# choose Text color range
+# color boundary is in R,G,B format
+font_color:
+  enable: false
+  blue:
+    fraction: 0.5
+    l_boundary: [0,0,150]
+    h_boundary: [60,60,255]
+  brown:
+    fraction: 0.5
+    l_boundary: [139,70,19]
+    h_boundary: [160,82,43]
+
 # By default, text is drawed by Pillow with (https://stackoverflow.com/questions/43828955/measuring-width-of-text-python-pil)
 # If `random_space` is enabled, some text will be drawed char by char with a random space
 random_space:
@@ -112,6 +125,17 @@ line:
     enable: false
     fraction: 0.5
 
+line_color:
+  enable: false
+  black:
+    fraction: 0.5
+    l_boundary: 0,0,0
+    h_boundary: 64,64,64
+  blue:
+    fraction: 0.5
+    l_boundary: [0,0,150]
+    h_boundary: [60,60,255]
+
 # These operates are applied on the final output image,
 # so actually it can also be applied in training process as an data augmentation method.
 

diff --git a/configs/test.yaml b/configs/test.yaml
@@ -0,0 +1,155 @@
+# Small font_size will make text looks like blured/prydown
+font_size:
+  min: 14
+  max: 23
+
+# choose Text color range
+# color boundary is in R,G,B format
+font_color:
+  enable: true
+  blue:
+    fraction: 0.5
+    l_boundary: [0,0,150]
+    h_boundary: [60,60,255]
+  brown:
+    fraction: 0.5
+    l_boundary: [139,70,19]
+    h_boundary: [160,82,43]
+
+# By default, text is drawed by Pillow with (https://stackoverflow.com/questions/43828955/measuring-width-of-text-python-pil)
+# If `random_space` is enabled, some text will be drawed char by char with a random space
+random_space:
+  enable: true
+  fraction: 0.3
+  min: -0.1 # -0.1 will make chars very close or even overlapped
+  max: 0.1
+
+# Do remap with sin()
+# Currently this process is very slow!
+curve:
+  enable: true
+  fraction: 0.5
+  period: 360  # degree, sin 函数的周期
+  min: 1 # sin 函数的幅值范围
+  max: 5
+
+# random crop text height
+crop:
+  enable: true
+  fraction: 0.5
+
+  # top and bottom will applied equally
+  top:
+    min: 5
+    max: 10 # in pixel, this value should small than img_height
+  bottom:
+    min: 5
+    max: 10 # in pixel, this value should small than img_height
+
+# Use image in bg_dir as background for text
+img_bg:
+  enable: true
+  fraction: 0.5
+
+# Not work when random_space applied
+text_border:
+  enable: true
+  fraction: 0.5
+
+  # lighter than word color
+  light:
+    enable: true
+    fraction: 0.5
+
+  # darker than word color
+  dark:
+    enable: true
+    fraction: 0.5
+
+# https://docs.opencv.org/3.4/df/da0/group__photo__clone.html#ga2bf426e4c93a6b1f21705513dfeca49d
+# https://www.cs.virginia.edu/~connelly/class/2014/comp_photo/proj2/poisson.pdf
+# Use opencv seamlessClone() to draw text on background
+# For some background image, this will make text image looks more real
+seamless_clone:
+  enable: true
+  fraction: 0.5
+
+perspective_transform:
+  max_x: 25
+  max_y: 25
+  max_z: 3
+
+blur:
+  enable: true
+  fraction: 0.03
+
+# If an image is applied blur, it will not be applied prydown
+prydown:
+  enable: true
+  fraction: 0.03
+  max_scale: 1.5 # Image will first resize to 1.5x, and than resize to 1x
+
+noise:
+  enable: true
+  fraction: 0.3
+
+  gauss:
+    enable: true
+    fraction: 0.25
+
+  uniform:
+    enable: true
+    fraction: 0.25
+
+  salt_pepper:
+    enable: true
+    fraction: 0.25
+
+  poisson:
+    enable: true
+    fraction: 0.25
+
+line:
+  enable: true
+  fraction: 0.5
+
+  under_line:
+    enable: true
+    fraction: 0.2
+
+  table_line:
+    enable: true
+    fraction: 0.3
+
+  middle_line:
+    enable: true
+    fraction: 0.5
+
+line_color:
+  enable: true
+  black:
+    fraction: 0.5
+    l_boundary: [0,0,0]
+    h_boundary: [64,64,64]
+  blue:
+    fraction: 0.5
+    l_boundary: [0,0,150]
+    h_boundary: [60,60,255]
+
+# These operates are applied on the final output image,
+# so actually it can also be applied in training process as an data augmentation method.
+
+# By default, text is darker than background.
+# If `reverse_color` is enabled, some images will have dark background and light text
+reverse_color:
+  enable: true
+  fraction: 0.5
+
+emboss:
+  enable: true
+  fraction: 0.1
+
+sharp:
+  enable: true
+  fraction: 0.1
+
diff --git a/imgs/effects/colored.jpg b/imgs/effects/colored.jpg
diff --git a/imgs/effects/extra.jpg b/imgs/effects/extra.jpg
diff --git a/imgs/effects/table.jpg b/imgs/effects/table.jpg
diff --git a/libs/utils.py b/libs/utils.py
@@ -84,7 +84,7 @@ def load_bgs(bg_dir):
             image_path = os.path.join(root, file_name)
 
             # For load non-ascii image_path on Windows
-            bg = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_GRAYSCALE)
+            bg = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
 
             dst.append(bg)
 

diff --git a/textrenderer/liner.py b/textrenderer/liner.py
@@ -21,6 +21,25 @@ def __init__(self, cfg):
         self.linestate = LineState()
         self.cfg = cfg
 
+    def get_line_color(self):
+        p = []
+        colors = []
+        for k, v in self.cfg.line_color.items():
+            if k == 'enable':
+                continue
+            p.append(v.fraction)
+            colors.append(k)
+
+        # pick color by fraction
+        color_name = np.random.choice(colors, p=p)
+        l_boundary = self.cfg.line_color[color_name].l_boundary
+        h_boundary = self.cfg.line_color[color_name].h_boundary
+        # random color by low and high RGB boundary
+        r = np.random.randint(l_boundary[0], h_boundary[0])
+        g = np.random.randint(l_boundary[1], h_boundary[1])
+        b = np.random.randint(l_boundary[2], h_boundary[2])
+        return b, g, r
+
     def apply(self, word_img, text_box_pnts, word_color):
         """
         :param word_img:  word image with big background
@@ -47,16 +66,19 @@ def apply(self, word_img, text_box_pnts, word_color):
 
         line_effect_func = np.random.choice(funcs, p=line_p)
 
-        return line_effect_func(word_img, text_box_pnts, word_color)
+        if self.cfg.line_color.enable or self.cfg.font_color.enable:
+            line_color = self.get_line_color()
+        else:
+            line_color = word_color + random.randint(0, 10)
+
+        return line_effect_func(word_img, text_box_pnts, line_color)
 
-    def apply_under_line(self, word_img, text_box_pnts, word_color):
+    def apply_under_line(self, word_img, text_box_pnts, line_color):
         y_offset = random.choice([0, 1])
 
         text_box_pnts[2][1] += y_offset
         text_box_pnts[3][1] += y_offset
 
-        line_color = word_color + random.randint(0, 10)
-
         dst = cv2.line(word_img,
                        (text_box_pnts[2][0], text_box_pnts[2][1]),
                        (text_box_pnts[3][0], text_box_pnts[3][1]),
@@ -66,7 +88,7 @@ def apply_under_line(self, word_img, text_box_pnts, word_color):
 
         return dst, text_box_pnts
 
-    def apply_table_line(self, word_img, text_box_pnts, word_color):
+    def apply_table_line(self, word_img, text_box_pnts, line_color):
         """
         共有 8 种可能的画法，横线横穿整张 word_img
         0/1/2/3: 仅单边（左上右下）
@@ -75,7 +97,6 @@ def apply_table_line(self, word_img, text_box_pnts, word_color):
         dst = word_img
         option = random.choice(self.linestate.tableline_options)
         thickness = random.choice(self.linestate.tableline_thickness)
-        line_color = word_color + random.randint(0, 10)
 
         top_y_offset = random.choice(self.linestate.tableline_y_offsets)
         bottom_y_offset = random.choice(self.linestate.tableline_y_offsets)
@@ -144,16 +165,15 @@ def is_right():
 
         return dst, text_box_pnts
 
-    def apply_middle_line(self, word_img, text_box_pnts, word_color):
+    def apply_middle_line(self, word_img, text_box_pnts, line_color):
         y_center = int((text_box_pnts[0][1] + text_box_pnts[3][1]) / 2)
 
-        img_mean = int(np.mean(word_img))
         thickness = np.random.choice(self.linestate.middleline_thickness, p=self.linestate.middleline_thickness_p)
 
         dst = cv2.line(word_img,
                        (text_box_pnts[0][0], y_center),
                        (text_box_pnts[1][0], y_center),
-                       color=img_mean,
+                       color=line_color,
                        thickness=thickness,
                        lineType=cv2.LINE_AA)
 

diff --git a/textrenderer/noiser.py b/textrenderer/noiser.py
@@ -41,11 +41,11 @@ def apply_gauss_noise(self, img):
         """
         Gaussian-distributed additive noise.
         """
-        row, col = img.shape
+        row, col, channel = img.shape
 
         mean = 0
         stddev = np.sqrt(15)
-        gauss_noise = np.zeros((row, col))
+        gauss_noise = np.zeros((row, col, channel))
         cv2.randn(gauss_noise, mean, stddev)
         out = img + gauss_noise
 
@@ -55,18 +55,18 @@ def apply_uniform_noise(self, img):
         """
         Apply zero-mean uniform noise
         """
-        row, col = img.shape
+        row, col, channel = img.shape
         alpha = 0.05
-        gauss = np.random.uniform(0 - alpha, alpha, (row, col))
-        gauss = gauss.reshape(row, col)
+        gauss = np.random.uniform(0 - alpha, alpha, (row, col, channel))
+        gauss = gauss.reshape(row, col, channel)
         out = img + img * gauss
         return out
 
     def apply_sp_noise(self, img):
         """
         Salt and pepper noise. Replaces random pixels with 0 or 255.
         """
-        row, col = img.shape
+        row, col, channel = img.shape
         s_vs_p = 0.5
         amount = np.random.uniform(0.004, 0.01)
         out = np.copy(img)