-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutils.py
66 lines (55 loc) · 2.42 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# tagger do pos tagging of the question
# ===== imports =====
from nltk.corpus import wordnet as wn
from nltk.corpus.reader.wordnet import WordNetError
import re
# ===== definitions =====
def dist_all_synsets(first,second) :
f_syns = wn.synsets(first)
s_syns = wn.synsets(second)
#Path SImilarity
#A 0-1 similarity score based on the shortest path that connects the senses in the is-a (hypernym/hypnoym) taxonomy.
#A score of 1 represents identity i.e. comparing a sense with itself will return 1.
least_sim = 0.0
for f in f_syns :
for s in s_syns :
path_sim = wn.path_similarity(f,s)
if path_sim > least_sim :
least_sim = path_sim;
return 1.-least_sim
def splitCamelCase(string):
if type(string) is list:
return [splitCamelCase(w) for w in string]
return ' '.join(re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)', string)).lower()
def splitSnakeCase(string):
return ' '.join(string.split('_'))
def makeRelation(string):
if type(string) is list:
string=' '.join(string)
upper = string.title()
words = upper.split()
words[0]=words[0].lower()
return ''.join(words)
def getSynonyms(word):
if type(word) is list:
return [getSynonyms(w) for w in word]
syns = [x.lemma_names() for x in wn.synsets(word)]
syns = list(set(reduce(lambda x,y: x+y,syns)))
return [splitSnakeCase(s) for s in syns]
def stripDownExtraWords(orginalString):
if type(orginalString) is list:
return [stripDownExtraWords(string) for string in orginalString]
toRemove = ['do', 'the', 'a', 'of', 'as', 'in', 'on', 'at', 'does', 'who', 'determiner','did',
'pronoun', 'adjective', 'number', 'conjunction', 'preposition', 'modal','are', 'an',
'whom', 'what', 'why', 'how', 'where', 'was', 'were', 'this', 'is', 'by', 'which',
'for', 'has', 'have', 'all', 'often']
words = orginalString.lower().split()
exists = [x for x in words if x in toRemove]
newWords = [x for x in words if x not in exists]
return ' '.join(newWords)
# ===== main testing =====
if __name__ == "__main__":
#print(stripDownExtraWords(splitCamelCase(['thisIsNotANiceString','GoodFeeling','Holy moly'])))
#print(getSynonyms('flows'))
#print(makeRelation('yaser is great'))
print(splitCamelCase('yaser'))