-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTagHandler.py
336 lines (298 loc) · 12.4 KB
/
TagHandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
import os
import sqlite3
import logging
logobj = logging.getLogger(__name__)
class TagHandler() :
def __init__(self, tagdb_file):
self.tagdb = tagdb_file
if not os.path.exists(self.tagdb):
self.init()
self.conn = sqlite3.connect(self.tagdb)
def __del__(self):
self.conn.close()
def __init_tags_table(self, dbconn) :
dbconn.execute('''CREATE TABLE TAGS
(ID INT PRIMARY KEY NOT NULL,
TAGNAME TEXT NOT NULL);''')
dbconn.execute('''INSERT INTO TAGS
VALUES (0, 'dummy');''')
dbconn.execute('''CREATE TABLE TAGLINKS
(TAGID INT NOT NULL,
TAGPARENTID INT NOT NULL);''')
dbconn.execute('''INSERT INTO TAGLINKS
VALUES (0, 0);''')
dbconn.commit()
def __init_resources_table(self, dbconn) :
dbconn.execute('''CREATE TABLE RESOURCES
(ID INT PRIMARY KEY NOT NULL,
URL TEXT NOT NULL);''')
dbconn.execute('''INSERT INTO RESOURCES
VALUES (0,"dummy");''')
dbconn.execute('''CREATE TABLE RESOURCELINKS
(RESID INT NOT NULL,
TAGID INT NOT NULL)''')
dbconn.execute('''INSERT INTO RESOURCELINKS
VALUES (0,0);''')
dbconn.commit()
def init(self) :
dbconn = sqlite3.connect(self.tagdb)
self.__init_tags_table(dbconn)
self.__init_resources_table(dbconn)
dbconn.close()
# can also be used to check if tag is valid
def get_tag_id(self, tag_name) -> int:
query_str = "SELECT ID FROM TAGS WHERE TAGNAME='" + tag_name + "';"
res = self.conn.execute(query_str)
r = res.fetchone()
tid = -1
if r != None :
tid = r[0]
return tid
def get_tag_name(self, tag_id) -> str:
query_str = "SELECT TAGNAME FROM TAGS WHERE ID=" + str(tag_id) + ";"
res = self.conn.execute(query_str)
r = res.fetchone()
tag_name = ""
if r != None :
tag_name = str(r[0])
return tag_name
def get_tag_list(self) -> list:
query_str = "SELECT TAGNAME FROM TAGS WHERE ID > 0;"
res = self.conn.execute(query_str)
taglist = []
for r in res :
taglist.append(r[0])
return taglist
def add_tag(self, tag_name) -> int :
tag_exists = self.get_tag_id(tag_name)
if tag_exists > 0 :
return False
res = self.conn.execute("SELECT max(ID) FROM TAGS;")
r = res.fetchone()
max_tag_id = r[0]
new_tag_id = max_tag_id + 1
query_str = "INSERT INTO TAGS VALUES (" + str(new_tag_id) + ",'" + tag_name + "');"
self.conn.execute(query_str)
self.conn.commit()
return True
def rename_tag(self, tag_name, new_tag_name) :
tag_exists = self.get_tag_id(new_tag_name)
if tag_exists > 0 :
return False
tag_exists = self.get_tag_id(tag_name)
if tag_exists < 0 :
return False
query_str = "UPDATE TAGS SET TAGNAME='" + new_tag_name + "' WHERE ID=" + str(tag_exists) + ";"
self.conn.execute(query_str)
self.conn.commit()
return True
def link_tag(self, tag_name, tag_parent_name) :
src_tag_id = self.get_tag_id(tag_name)
parent_tag_id = self.get_tag_id(tag_parent_name)
if (src_tag_id < 0 or parent_tag_id < 0) :
if src_tag_id < 0 :
logobj.error("tags not in db")
return False
query_str = "INSERT INTO TAGLINKS VALUES (" + str(src_tag_id) + "," + str(parent_tag_id) + ");"
self.conn.execute(query_str)
self.conn.commit()
return True
def unlink_tag(self, tag_name, tag_parent_name) :
src_tag_id = self.get_tag_id(tag_name)
parent_tag_id = self.get_tag_id(tag_parent_name)
if (src_tag_id < 0 or parent_tag_id < 0) :
if src_tag_id < 0 :
logobj.error("tags not in db")
return False
query_str = "DELETE FROM TAGLINKS WHERE TAGID=" + str(src_tag_id) + " AND TAGPARENTID=" + str(parent_tag_id) + ";"
self.conn.execute(query_str)
self.conn.commit()
return True
def get_parent_tags_by_id(self, tag_id) -> list :
query_str = "SELECT TAGPARENTID FROM TAGLINKS WHERE TAGID=" + str(tag_id) + ";"
res = self.conn.execute(query_str)
parent_ids = []
for r in res :
parent_ids.append(r[0])
return parent_ids
def get_parent_tags(self, tag_name) -> list :
tag_id = self.get_tag_id(tag_name)
parent_ids = self.get_parent_tags_by_id(tag_id)
parent_tags = map(self.get_tag_name, parent_ids)
return list(parent_tags)
def get_child_tags_by_id(self, tag_id) -> list :
query_str = "SELECT TAGID FROM TAGLINKS WHERE TAGPARENTID=" + str(tag_id) + ";"
res = self.conn.execute(query_str)
child_ids = []
for r in res :
child_ids.append(r[0])
return child_ids
def get_child_tags(self, tag_name) -> list :
tag_id = self.get_tag_id(tag_name)
child_ids = self.get_child_tags_by_id(tag_id)
child_tags = map(self.get_tag_name, child_ids)
return list(child_tags)
def get_downstream_tags_by_id(self, tag_id) -> list :
traverse_ids = [tag_id]
downstream_ids = []
visited_ids = set()
while (len(traverse_ids) > 0) :
node_id = traverse_ids.pop(0)
if node_id in visited_ids :
continue
visited_ids.add(node_id)
child_ids = self.get_child_tags_by_id(node_id)
for cid in child_ids :
downstream_ids.append(cid)
traverse_ids.append(cid)
return downstream_ids
def get_downstream_tags(self, tag_name) -> list :
tag_id = self.get_tag_id(tag_name)
downstream_tagids = self.get_downstream_tags_by_id(tag_id)
downstream_tags = map(self.get_tag_name, downstream_tagids)
return list(downstream_tags)
def get_tag_closure(self, tags) :
tags_closure = []
for tag in tags :
tagid = self.get_tag_id(tag)
if tagid < 0 :
logobj.warning("tag " + tag + " not present in the db")
continue
tags_closure.append(tag)
downstreamtags = self.get_downstream_tags(tag)
for dtag in downstreamtags :
tags_closure.append(dtag)
tags_closure = list(set(tags_closure))
return tags_closure
def add_resource(self, resource_url) :
res = self.conn.execute("SELECT max(ID) FROM RESOURCES;")
r = res.fetchone()
max_res_id = 0
if r != None :
max_res_id = r[0]
new_res_id = max_res_id + 1
query_str = "INSERT INTO RESOURCES VALUES (" + str(new_res_id) + ",\"" + resource_url + "\")"
self.conn.execute(query_str)
self.conn.commit()
return new_res_id
def get_resource_ids(self) :
res = self.conn.execute("SELECT ID FROM RESOURCES WHERE ID > 0;")
tag_ids = []
for r in res :
tag_ids.append(r[0])
return tag_ids
def get_resource_url(self, res_id) -> str :
query_str = "SELECT URL FROM RESOURCES WHERE ID=" + str(res_id)+ ";"
res = self.conn.execute(query_str)
r = res.fetchone()
url = ""
if r != None :
url = r[0]
return url
def update_resource_url_by_id(self, resource_id, resource_url) :
query_str = "UPDATE RESOURCES SET URL=\"" + str(resource_url) + "\"" + " WHERE ID=" + str(resource_id) + ";"
self.conn.execute(query_str)
self.conn.commit()
def update_resource_url(self, resource_url, new_resource_url) :
res_id = self.get_resource_id(resource_url)
if res_id < 0 :
logobj.error("resource not tracked")
self.update_resource_url_by_id(res_id, new_resource_url)
def update_resource_sub_url(self, sub_resource_url, update_url) :
res_ids = self.get_resource_ids_containing_url(sub_resource_url)
for res_id in res_ids :
url = self.get_resource_url(res_id)
url = url.replace(sub_resource_url, update_url, 1)
self.update_resource_url_by_id(res_id, url)
def get_resource_ids_containing_url(self, sub_resource_url) :
query_str = "SELECT ID FROM RESOURCES WHERE URL LIKE " + "\"%" + sub_resource_url + "%\""
res = self.conn.execute(query_str)
res_ids = []
for r in res :
res_ids.append(r[0])
return res_ids
def get_resource_id(self, resource_url) :
query_str = "SELECT ID FROM RESOURCES WHERE URL=\"" + resource_url + "\";"
res = self.conn.execute(query_str)
r = res.fetchone()
res_id = -1
if r != None :
res_id = r[0]
return res_id
def get_resource_tags_by_id(self, resource_id) -> list :
query_str = "SELECT TAGID FROM RESOURCELINKS WHERE RESID=" + str(resource_id) + ";"
res = self.conn.execute(query_str)
tag_ids = []
for r in res :
tag_ids.append(r[0])
return tag_ids
def get_resource_tags(self , resource_url) -> list :
res_id = self.get_resource_id(resource_url)
tag_ids = self.get_resource_tags_by_id(res_id)
return tag_ids
def add_resource_tag_by_id(self, resource_id, tag_id) :
current_tags = self.get_resource_tags_by_id(resource_id)
if current_tags.count(tag_id) == 0 :
query_str = "INSERT INTO RESOURCELINKS VALUES (" + str(resource_id) + "," + str(tag_id) + ");"
self.conn.execute(query_str)
self.conn.commit()
def del_all_resource_tags(self, resource_id) :
query_str = "DELETE FROM RESOURCELINKS WHERE RESID=" + str(resource_id) + ";"
self.conn.execute(query_str)
self.conn.commit()
def del_resource_tag_by_id(self, resource_id, tag_id) :
current_tags = self.get_resource_tags_by_id(resource_id)
if current_tags.count(tag_id) > 0 :
query_str = "DELETE FROM RESOURCELINKS WHERE RESID=" + str(resource_id) + " AND TAGID=" + str(tag_id) + ";"
self.conn.execute(query_str)
self.conn.commit()
def del_resource_tags(self, resource_url, tags) :
res_id = self.get_resource_id(resource_url)
if res_id < 0 :
logobj.error("resource not tracked")
tag_ids = list(map(self.get_tag_id, tags))
for tid in tag_ids :
if tid < 0 :
logobj.error("tag not id db")
self.del_resource_tag_by_id(res_id, tid)
def add_resource_tags(self, resource_url, tags) :
res_id = self.get_resource_id(resource_url)
if res_id < 0 :
logobj.error("resource not tracked")
unsuccessful_tags = []
for tag in tags :
tag_id = self.get_tag_id(tag)
if tag_id > 0 :
self.add_resource_tag_by_id(res_id, tag_id)
else :
unsuccessful_tags.append(tag)
return unsuccessful_tags
def get_resource_tags(self, resource_url) :
res_id = self.get_resource_id(resource_url)
if res_id < 0 :
logobj.error("resource not tracked")
tag_ids = self.get_resource_tags_by_id(res_id)
return list(map(self.get_tag_name, tag_ids))
def get_resources_by_tag_id(self, tag_ids) :
resource_ids = []
for tid in tag_ids :
query_str = "SELECT RESID FROM RESOURCELINKS WHERE TAGID=" + str(tid) + ";"
res = self.conn.execute(query_str)
for r in res :
resource_ids.append(r[0])
resource_ids = list(set(resource_ids))
return resource_ids
def get_resources_by_tag(self, tags) :
tag_ids = list(map(self.get_tag_id, tags))
res_ids = self.get_resources_by_tag_id(tag_ids)
res = list(map(self.get_resource_url, res_ids))
return res
def del_resource(self, resource_url) :
res_id = self.get_resource_id(resource_url)
if res_id < 0 :
logobj.warning("resource not tracked")
return
self.del_all_resource_tags(res_id)
query_str = "DELETE FROM RESOURCES WHERE ID=" + str(res_id) + ";"
self.conn.execute(query_str)
self.conn.commit()