-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcalvo_classifier.py
executable file
·99 lines (81 loc) · 3.7 KB
/
calvo_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#-----------------------------------------------------------------------------
# Program Name: calvo_classifier.py
# Program Description: Rodan wrapper for Calvo's classifier
#-----------------------------------------------------------------------------
import cv2
import numpy as np
import os
from rodan.jobs.base import RodanTask
from . import recognition_engine as recognition
"""Wrap Calvo classifier in Rodan."""
class CalvoClassifier(RodanTask):
name = "Pixelwise Analysis of Music Document"
author = "Jorge Calvo-Zaragoza, Gabriel Vigliensoni, and Ichiro Fujinaga"
description = "Given a pre-trained Convolutional neural network, the job performs a pixelwise analysis of music document images."
enabled = True
category = "OMR - Layout analysis"
interactive = False
settings = {
'title': 'Feature window',
'job_queue': 'GPU',
'type': 'object',
'properties': {
'Vertical span': {
'type': 'integer',
'minimum': 1,
'default': 25
},
'Horizontal span': {
'type': 'integer',
'minimum': 1,
'default': 25
}
}
}
input_port_types = (
{'name': 'Image', 'minimum': 1, 'maximum': 1, 'resource_types': lambda mime: mime.startswith('image/')},
{'name': 'Model', 'minimum': 1, 'maximum': 1, 'resource_types': ['keras/model+hdf5'] },
)
output_port_types = (
{'name': 'Background', 'minimum': 0, 'maximum': 1, 'resource_types': ['image/rgba+png']},
{'name': 'Music symbol', 'minimum': 0, 'maximum': 1, 'resource_types': ['image/rgba+png']},
{'name': 'Staff lines', 'minimum': 0, 'maximum': 1, 'resource_types': ['image/rgba+png']},
{'name': 'Text', 'minimum': 0, 'maximum': 1, 'resource_types': ['image/rgba+png']}
)
"""
Entry point
"""
def run_my_task(self, inputs, settings, outputs):
# Ports
image_filepath = inputs['Image'][0]['resource_path']
model_filepath = inputs['Model'][0]['resource_path']
# Settings
vspan = settings['Vertical span']
hspan = settings['Horizontal span']
# Process
image = cv2.imread(image_filepath,True)
analysis = recognition.process_image(image,model_filepath,vspan,hspan)
# Let user define the number of labels?
for label in range(0,4):
lower_range = np.array(label, dtype=np.uint8)
upper_range = np.array(label, dtype=np.uint8)
mask = cv2.inRange(analysis, lower_range, upper_range)
original_masked = cv2.bitwise_and(image,image,mask = mask)
original_masked[mask == 0] = (255, 255, 255)
# Alpha = 0 when background
alpha_channel = np.ones(mask.shape, dtype=mask.dtype)*255
alpha_channel[mask == 0] = 0
b_channel, g_channel, r_channel = cv2.split(original_masked)
original_masked_alpha = cv2.merge((b_channel, g_channel, r_channel, alpha_channel))
if label == 0:
port = 'Background'
elif label == 1:
port = 'Music symbol'
elif label == 2:
port = 'Staff lines'
elif label == 3:
port = 'Text'
if port in outputs:
cv2.imwrite(outputs[port][0]['resource_path']+'.png',original_masked_alpha)
os.rename(outputs[port][0]['resource_path']+'.png',outputs[port][0]['resource_path'])
return True