forked from cltl/KafNafParserPy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
coreference_data.py
173 lines (147 loc) · 5.06 KB
/
coreference_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
"""
This module implements a parser for the coreference layer in KAF/NAF
"""
from lxml import etree
from external_references_data import *
from span_data import Cspan
class Ccoreference:
"""
This class encapsulates a coreference object in KAF/NAF
"""
def __init__(self,node=None,type='NAF'):
"""
Constructor of the object
@type node: xml Element or None (to create and empty one)
@param node: this is the node of the element. If it is None it will create a new object
@type type: string
@param type: the type of the object (KAF or NAF)
"""
self.type = type
if node is None:
self.node = etree.Element('coref')
else:
self.node = node
def get_node(self):
"""
Returns the node of the element
@rtype: xml Element
@return: the node of the element
"""
return self.node
def get_id(self):
"""
Returns the identifier of the object
@rtype: string
@return: identifier of the corefence object
"""
if self.type == 'NAF':
return self.node.get('id')
elif self.type == 'KAF':
return self.node.get('coid')
def set_id(self, this_id):
"""
Sets the identifier of the object
@type: string
@param: identifier of the corefence object
"""
if self.type == 'NAF':
return self.node.set('id', this_id)
elif self.type == 'KAF':
return self.node.set('coid', this_id)
def get_type(self):
"""
Returns the type of the coreference object
@rtype: string
@return: type of the corefence object
"""
if self.type == 'NAF':
return self.node.get('type')
def set_type(self, this_type):
"""
Sets the type of the coreference object
@type: string
@param: type of the corefence object
"""
if self.type == 'NAF':
return self.node.set('type', this_type)
def add_span(self,term_span):
"""
Adds a list of term ids a new span in the references
@type term_span: list
@param term_span: list of term ids
"""
new_span = Cspan()
new_span.create_from_ids(term_span)
self.node.append(new_span.get_node())
def get_spans(self):
"""
Iterator that returns all the span objects of the corerefence
@rtype: L{Cspan}
@return: list of span objects for the coreference object
"""
for node_span in self.node.findall('span'):
yield Cspan(node_span)
def get_external_references(self):
"""
Iterator to get the external references
@rtype: L{CexternalReference}
@return: iterator for external references
"""
node = self.node.find('externalReferences')
if node is not None:
ext_refs = CexternalReferences(node)
for ext_ref in ext_refs:
yield ext_ref
class Ccoreferences:
"""
This class encapsulates the coreference layer (a set of coreference objects)
"""
def __init__(self,node=None, type='NAF'):
"""
Constructor of the object
@type node: xml Element or None (to create and empty one)
@param node: this is the node of the element. If it is None it will create a new object
@type type: string
@param type: the type of the object (KAF or NAF)
"""
self.type = type
if node is None:
self.node = etree.Element('coreferences')
else:
self.node = node
def add_coreference(self,coreference):
self.node.append(coreference.get_node())
def get_node(self):
"""
Returns the node of the element
@rtype: xml Element
@return: the node of the element
"""
return self.node
def __get_corefs_nodes(self):
for coref_node in self.node.findall('coref'):
yield coref_node
def get_corefs(self):
"""
Iterator that returns all the coreference objects
@rtype: L{Ccoreference}
@return: list of coreference objects (iterator)
"""
for coref_node in self.__get_corefs_nodes():
yield Ccoreference(coref_node,self.type)
def to_kaf(self):
"""
Converts the coreference layer to KAF
"""
if self.type == 'NAF':
for node_coref in self.__get_corefs_nodes():
node_coref.set('coid',node_coref.get('id'))
del node_coref.attrib['id']
def to_naf(self):
"""
Converts the coreference layer to NAF
"""
if self.type == 'KAF':
for node_coref in self.__get_corefs_nodes():
node_coref.set('id',node_coref.get('coid'))
del node_coref.attrib['coid']