-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcalvo_trainer.py
132 lines (110 loc) · 5.94 KB
/
calvo_trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -----------------------------------------------------------------------------
# Program Name: calvo_trainer.py
# Program Description: Rodan wrapper for Calvo's classifier training
# -----------------------------------------------------------------------------
import cv2
import numpy as np
import os
from rodan.jobs.base import RodanTask
from . import training_engine as training
"""Wrap Calvo classifier training in Rodan."""
class CalvoTrainer(RodanTask):
name = "Training model for Pixelwise Analysis of Music Document"
author = "Jorge Calvo-Zaragoza, Gabriel Vigliensoni, and Ichiro Fujinaga"
description = "The job performs the training of a neural network model for the pixelwise analysis of music document images."
enabled = True
category = "OMR - Layout analysis"
interactive = False
settings = {
'title': 'Training parameters',
'job_queue': 'GPU',
'type': 'object',
'properties': {
'Maximum number of samples per class': {
'type': 'integer',
'minimum': 1,
'default': 50
},
'Maximum number of training epochs': {
'type': 'integer',
'minimum': 1,
'default': 5
},
'Vertical span': {
'type': 'integer',
'minimum': 1,
'default': 25
},
'Horizontal span': {
'type': 'integer',
'minimum': 1,
'default': 25
}
}
}
input_port_types = (
{'name': 'Image', 'minimum': 1, 'maximum': 1, 'resource_types': lambda mime: mime.startswith('image/')},
{'name': 'rgba PNG - Background layer', 'minimum': 1, 'maximum': 1, 'resource_types': ['image/rgba+png']},
{'name': 'rgba PNG - Music symbol layer', 'minimum': 1, 'maximum': 1, 'resource_types': ['image/rgba+png']},
{'name': 'rgba PNG - Staff lines layer', 'minimum': 1, 'maximum': 1, 'resource_types': ['image/rgba+png']},
{'name': 'rgba PNG - Text', 'minimum': 1, 'maximum': 1, 'resource_types': ['image/rgba+png']},
{'name': 'rgba PNG - Selected regions', 'minimum': 1, 'maximum': 1, 'resource_types': ['image/rgba+png']}
)
output_port_types = (
{'name': 'Model', 'minimum': 1, 'maximum': 1, 'resource_types': ['keras/model+hdf5']},
)
"""
How it works:
- Layers are processed to create a categorical ground-truth:
- Matrix with the same shape as the original input
- Each cell contains a single categorical value
- When no information is known about a pixel, it is indicated as -1
- Input and categorical images are used to call the training engine
- The output port is also passed in order to ensure the availability of the destination
- The training engine returns saves the model (+'.hdf5'), and return status
- Because of how Rodan works, the path is renamed to the exact output port (-'.hdf5')
"""
def run_my_task(self, inputs, settings, outputs):
# Ports
input_image = cv2.imread(inputs['Image'][0]['resource_path'], True) # 3-channel
background = cv2.imread(inputs['rgba PNG - Background layer'][0]['resource_path'], cv2.IMREAD_UNCHANGED) # 4-channel
notes = cv2.imread(inputs['rgba PNG - Music symbol layer'][0]['resource_path'], cv2.IMREAD_UNCHANGED) # 4-channel
lines = cv2.imread(inputs['rgba PNG - Staff lines layer'][0]['resource_path'], cv2.IMREAD_UNCHANGED) # 4-channel
text = cv2.imread(inputs['rgba PNG - Text'][0]['resource_path'], cv2.IMREAD_UNCHANGED) # 4-channel
regions = cv2.imread(inputs['rgba PNG - Selected regions'][0]['resource_path'], cv2.IMREAD_UNCHANGED) # 4-channel
# Settings
vspan = settings['Vertical span']
hspan = settings['Horizontal span']
max_samples_per_class = settings['Maximum number of samples per class']
max_number_of_epochs = settings['Maximum number of training epochs']
# Create categorical ground-truth
regions_mask = (regions[:, :, 3] == 255)
notes_mask = (notes[:, :, 3] == 255)
notes_mask = np.logical_and(notes_mask, regions_mask) # restrict layer to only the notes in the selected regions
lines_mask = (lines[:, :, 3] == 255)
lines_mask = np.logical_and(lines_mask, regions_mask) # restrict layer to only the staff lines in the selected regions
text_mask = (text[:, :, 3] == 255)
text_mask = np.logical_and(text_mask, regions_mask) # restrict layer to only the text in the selected regions
background_mask = (background[:, :, 3] == 255) # background is already restricted to the selected regions (based on Pixel.js' behaviour)
gt = np.ones((background.shape[0],background.shape[1]), 'uint8')*-1
gt += (background_mask*1 + notes_mask*2 + lines_mask*3 + text_mask*4) # -> -1 or 0,1,2,3
'''
labeled = background_mask + notes_mask + lines_mask + text_mask
for row in range(gt.shape[0]):
for col in range(gt.shape[1]):
if labeled[row][col]:
# Single category per pixel is assumed
gt[row][col] = (background_mask[row][col]*0
+ notes_mask[row][col]*1
+ lines_mask[row][col]*2
+ text_mask[row][col]*3)
'''
output_model_path = outputs['Model'][0]['resource_path']
status = training.train_model(input_image,gt,
hspan,vspan,
output_model_path=output_model_path + '.hdf5',
max_samples_per_class=max_samples_per_class,
epochs=max_number_of_epochs)
print('Finishing the job')
os.rename(output_model_path + '.hdf5', output_model_path)
return True