Skip to content

Commit a8ec16b

Browse files
committed
feat(model): Integrate with evaluator and accuracy score
1 parent 851036e commit a8ec16b

File tree

3 files changed

+35
-15
lines changed

3 files changed

+35
-15
lines changed

atarashi/agents/models/test.py

+26-13
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,34 @@
11
import joblib
2+
import os
3+
import pickle
24
import argparse
35
from atarashi.libs.commentPreprocessor import CommentPreprocessor
46
from sklearn.svm import LinearSVC
57
from sklearn.feature_extraction.text import CountVectorizer
68
from sklearn.linear_model import LogisticRegression
79
from sklearn.naive_bayes import MultinomialNB
810

11+
def new(processed_comment,model):
12+
13+
temp = os.path.dirname(os.path.abspath(__file__))
14+
# pathv = os.path.join(temp, 'binaryFiles/vectorizer.pkl')
15+
16+
with open(os.path.join(temp, 'binaryFiles/vectorizer.pkl'), 'rb') as f:
17+
loaded_vect = joblib.load(f)
18+
if model == "lr_classifier":
19+
classifier = joblib.load(os.path.join(temp, 'binaryFiles/lr_model.pkl'))
20+
license_name = classifier.predict((loaded_vect.transform([processed_comment])))
21+
22+
elif model == "nb_classifier":
23+
classifier = joblib.load(os.path.join(temp, 'binaryFiles/nb_model.pkl'))
24+
license_name = classifier.predict((loaded_vect.transform([processed_comment])))
25+
26+
elif model == "svc_classifier":
27+
classifier = joblib.load(os.path.join(temp, 'binaryFiles/svc_model.pkl'))
28+
license_name = classifier.predict((loaded_vect.transform([processed_comment])))
29+
30+
return license_name
31+
932

1033
def similarity_calc(filePath, model):
1134

@@ -22,24 +45,14 @@ def similarity_calc(filePath, model):
2245
:rtype: list(JSON Format)
2346
'''
2447

48+
match = []
49+
2550
commentFile = CommentPreprocessor.extract(filePath)
2651
with open(commentFile) as file:
2752
doc = file.read()
28-
29-
match = []
3053
processed_comment = CommentPreprocessor.preprocess(doc)
31-
loaded_vect = joblib.load("atarashi/agents/models/binaryFiles/vectorizer.pkl")
32-
if model == "lr_classifier":
33-
classifier = joblib.load("atarashi/agents/models/binaryFiles/nb_model.pkl")
34-
license_name = classifier.predict((loaded_vect.transform([processed_comment])))
54+
license_name = new(processed_comment,model)
3555

36-
elif model == "nb_classifier":
37-
classifier = joblib.load("atarashi/agents/models/binaryFiles/nb_model.pkl")
38-
license_name = classifier.predict((loaded_vect.transform([processed_comment])))
39-
40-
elif model == "svc_classifier":
41-
classifier = joblib.load("atarashi/agents/models/binaryFiles/nb_model.pkl")
42-
license_name = classifier.predict((loaded_vect.transform([processed_comment])))
4356

4457
match.append({
4558
'shortname': license_name[0],

atarashi/atarashii.py

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
5858
scanner = WordFrequencySimilarity(processedLicense)
5959
elif agent_name == "lr_classifier" or agent_name == "svc_classifier" or agent_name == "nb_classifier":
6060
result = similarity_calc(inputFile,agent_name)
61+
return result
6162
elif agent_name == "DLD":
6263
scanner = DameruLevenDist(processedLicense)
6364
elif agent_name == "tfidf":

atarashi/evaluator/evaluator.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ def getCommand(agent_name, similarity):
5151
command = "atarashi -a wordFrequencySimilarity"
5252
elif agent_name == "DLD":
5353
command = "atarashi -a DLD"
54+
elif agent_name == "lr_classifier":
55+
command = "atarashi -a lr_classifier"
56+
elif agent_name == "nb_classifier":
57+
command = "atarashi -a nb_classifier"
58+
elif agent_name == "svc_classifier":
59+
command = "atarashi -a svc_classifier"
5460
elif agent_name == "tfidf":
5561
command = "atarashi -a tfidf"
5662
if similarity == "CosineSim":
@@ -129,9 +135,9 @@ def evaluate(command):
129135
if __name__ == "__main__":
130136
parser = argparse.ArgumentParser()
131137
parser.add_argument("-a", "--agent_name", required=True,
132-
choices=['wordFrequencySimilarity', 'DLD', 'tfidf', 'Ngram'], help="Name of the agent that you want to evaluate")
138+
choices=['wordFrequencySimilarity', 'DLD',"lr_classifier","svc_classifier","nb_classifier", 'tfidf', 'Ngram'], help="Name of the agent that you want to evaluate")
133139
parser.add_argument("-s", "--similarity", required=False,
134-
default=" ", choices=["ScoreSim", "CosineSim", "DiceSim", " ", "BigramCosineSim"], help="Specify the similarity algorithm that you want to evaluate"
140+
default=" ", choices=["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"], help="Specify the similarity algorithm that you want to evaluate"
135141
" First 2 are for TFIDF and last 3 are for Ngram")
136142
args = parser.parse_args()
137143
agent_name = args.agent_name

0 commit comments

Comments
 (0)