diff --git a/TraceabilityRunner.py b/TraceabilityRunner.py index bae4963..293667a 100644 --- a/TraceabilityRunner.py +++ b/TraceabilityRunner.py @@ -22,12 +22,10 @@ from precalculating.TraceLinkDataStructure import ElementLevelTraceLinkDataStructure, FileLevelTraceLinkDataStructure from precalculating.TraceLinkDataStructureFactory import ElementLevelTraceLinkDataStructureFactory, \ FileLevelTraceLinkDataStructureFactory -from preprocessing.CodeASTTokenizer import JavaCodeASTTokenizer, CCodeASTTokenizer from preprocessing.Preprocessor import CamelCaseSplitter, LowerCaseTransformer, \ NonLetterFilter, UrlRemover, Separator, JavaCodeStopWordRemover, \ StopWordRemover, Lemmatizer, WordLengthFilter, Preprocessor, POSFilter -from preprocessing.Tokenizer import JavaDocDescriptionOnlyTokenizer, \ - WordAndSentenceTokenizer, UCTokenizer, WordTokenizer, NameAndDescriptionTokenizer +from preprocessing.Tokenizer import WordAndSentenceTokenizer, UCTokenizer, NameAndDescriptionTokenizer from traceLinkProcessing.ElementFilter import ElementFilter from traceLinkProcessing.NeighborHandler import NeighborStrategy from traceLinkProcessing.SimilarityFilter import SimilarityFilter diff --git a/embeddingCreator/CodeEmbeddingCreator.py b/embeddingCreator/CodeEmbeddingCreator.py index 4909578..e377611 100644 --- a/embeddingCreator/CodeEmbeddingCreator.py +++ b/embeddingCreator/CodeEmbeddingCreator.py @@ -63,7 +63,8 @@ def _handle_no_method_vectors_case(self, classifier, class_embedding_container): class_embedding_container.set_non_cg_vector(self._build_class_name_voter_key(classifier), class_name_vector) return class_embedding_container - def _build_class_name_voter_key(self, classifier): + @staticmethod + def _build_class_name_voter_key(classifier): return classifier.get_original_name() + "." + ClassEmbeddingContainer.CLASS_NAME_VOTER diff --git a/embeddingCreator/EmbeddingCreator.py b/embeddingCreator/EmbeddingCreator.py index c514942..3a3c961 100644 --- a/embeddingCreator/EmbeddingCreator.py +++ b/embeddingCreator/EmbeddingCreator.py @@ -1,4 +1,4 @@ -import abc, logging, traceback +import abc, logging from javalang.parser import JavaSyntaxError, JavaParserError from javalang.tokenizer import LexerError @@ -58,7 +58,7 @@ def embedd_all_files_in_directory(self, directory): except (JavaParserError, LexerError) as j: log.info(f"SKIPPED: Error on tokenizing {filename} (Note: code files needs to be compilable): {j}") continue - except (FileExtensionNotSupportedError): + except FileExtensionNotSupportedError: continue file_embedding = self._create_embeddings(file_representation) if file_embedding: diff --git a/embeddingCreator/RequirementEmbeddingCreator.py b/embeddingCreator/RequirementEmbeddingCreator.py index 0d36cb2..598b6eb 100644 --- a/embeddingCreator/RequirementEmbeddingCreator.py +++ b/embeddingCreator/RequirementEmbeddingCreator.py @@ -1,10 +1,11 @@ import logging +from abc import ABC from pathlib import Path -from embeddingCreator.EmbeddingContainer import RequirementEmbeddingContainer, EmbeddingContainer +from embeddingCreator.EmbeddingContainer import RequirementEmbeddingContainer from embeddingCreator.EmbeddingCreator import EmbeddingCreator -from preprocessing.FileRepresentation import FileRepresentation, UseCaseFileRepresentation +from preprocessing.FileRepresentation import UseCaseFileRepresentation from utility import Util logging.basicConfig(level=logging.INFO) @@ -13,7 +14,7 @@ PREPROCESSED_REQ_OUTPUT_DIR = Path(__file__).parent.parent / "output/Preprocessed_Req" -class RequirementEmbeddingCreator(EmbeddingCreator): +class RequirementEmbeddingCreator(EmbeddingCreator, ABC): def __init__(self, requirements_word_chooser, preprocessor, wordemb_creator, tokenizer, preprocessed_token_output_directory=PREPROCESSED_REQ_OUTPUT_DIR): @@ -46,8 +47,8 @@ class RequirementVectorEmbeddingCreator(RequirementEmbeddingCreator): def _create_embeddings(self, file_representation): chosen_word_groups = self._requirements_word_chooser.choose_words_from(file_representation) - chosen_word_groups_embeddings = [] - requirement_element_vectors = [] + #chosen_word_groups_embeddings = [] + #requirement_element_vectors = [] element_vectors = {} for word_group_id in chosen_word_groups: word_embeddings = self._word_embedding_creator.create_word_list_embedding(chosen_word_groups[word_group_id]) diff --git a/embeddingCreator/SimilarityComparator.py b/embeddingCreator/SimilarityComparator.py index d40a461..c591eb7 100644 --- a/embeddingCreator/SimilarityComparator.py +++ b/embeddingCreator/SimilarityComparator.py @@ -2,16 +2,6 @@ from utility import Util from _functools import partial -from scipy import spatial - -from pyemd import emd -from gensim.corpora import Dictionary - -from numpy import zeros,\ - double, sqrt,\ - sum as np_sum - -import numpy as np WMD_VALUE_MAP_FUNCTION = partial(Util.map_value_range, 0, 2) class SimilarityComparator: @@ -39,7 +29,7 @@ def _wmd(self, boe_1, boe_2): max = 0 for vector_2 in boe_2: similarity = self._cosine_similarity(vector_1, vector_2) - if(similarity > max): + if similarity > max: max = similarity sum += max diff --git a/embeddingCreator/WordEmbeddingCreator.py b/embeddingCreator/WordEmbeddingCreator.py index 812f6e7..342f032 100644 --- a/embeddingCreator/WordEmbeddingCreator.py +++ b/embeddingCreator/WordEmbeddingCreator.py @@ -2,12 +2,8 @@ import logging import random -from pyemd import emd - import gensim.models.wrappers -from gensim.models.word2vec import Word2Vec - from utility import Util log = logging.getLogger(__name__) diff --git a/embeddingCreator/unixcoder.py b/embeddingCreator/unixcoder.py index b8c8d3f..612fd98 100644 --- a/embeddingCreator/unixcoder.py +++ b/embeddingCreator/unixcoder.py @@ -157,12 +157,12 @@ def __init__(self, size, eos, device): self.finished = [] def getCurrentState(self): - "Get the outputs for the current timestep." + """Get the outputs for the current timestep.""" batch = self.nextYs[-1].view(-1, 1) return batch def getCurrentOrigin(self): - "Get the backpointers for the current timestep." + """Get the backpointers for the current timestep.""" return self.prevKs[-1] def advance(self, wordLk): diff --git a/evaluation/Evaluator.py b/evaluation/Evaluator.py index 85a13b8..4be5b08 100644 --- a/evaluation/Evaluator.py +++ b/evaluation/Evaluator.py @@ -1,7 +1,6 @@ from abc import ABC, abstractmethod from traceLinkProcessing.TraceLink import TraceLink -from utility import Util class Evaluator(ABC): @@ -230,7 +229,7 @@ def get_defining_value(self): pass -class EmptyResultObject(): +class EmptyResultObject(EvalResultObject): def __init__(self, message: str): self._message = message @@ -281,7 +280,7 @@ def build_prec_recall_f1_print_str(precision, recall, f_1, true_positives, num_f return print_str -class MAPResultObject(ABC): +class MAPResultObject(EvalResultObject): def __init__(self, mAP, k): self.mAP = mAP @@ -299,7 +298,7 @@ def get_defining_value(self): return self.mAP -class LagResultObject(ABC): +class LagResultObject(EvalResultObject): def __init__(self, lag): self.lag = lag diff --git a/evaluation/OutputService.py b/evaluation/OutputService.py index 2c9072a..e6718bc 100644 --- a/evaluation/OutputService.py +++ b/evaluation/OutputService.py @@ -1,7 +1,6 @@ -from abc import ABC , abstractmethod +from abc import ABC from typing import Dict, List import logging -from pathlib import Path from autograd.builtins import isinstance @@ -149,7 +148,8 @@ def _add_best_f1_2D_excel_rows(self, excel_array, print_str_dict, best_eval_resu return excel_array - def _get_context_thresholds(self, all_threshs, best_thresh): + @staticmethod + def _get_context_thresholds(all_threshs, best_thresh): if len(all_threshs) > 1: context_threshs = [] @@ -290,9 +290,8 @@ def process_trace_link_dict(self, trace_link_dict: Dict[float, List[TraceLink]]) excel_array.append([lag_message]) excel_array.append([""]) # Add empty row as divider - csv_array = [] - csv_array.append(["Best:", "", "", "", "", "Default:", "", "", "MAP", "LAG"]) - csv_array.append(["Maj", "Final", "Precision", "Recall", "F1", "Precision", "Recall", "F1", "MAP", "LAG"]) + csv_array = [["Best:", "", "", "", "", "Default:", "", "", "MAP", "LAG"], + ["Maj", "Final", "Precision", "Recall", "F1", "Precision", "Recall", "F1", "MAP", "LAG"]] if isinstance(best_eval_result, F1ResultObject): default_prec = 0 default_rec = 0 @@ -363,9 +362,8 @@ def process_trace_link_2D_dict(self, trace_link_2D_dict: Dict[float, Dict[float, excel_array.append([lag_message]) excel_array.append([""]) # Add empty row as divider - csv_array = [] - csv_array.append(["Best:","","","","","Default:","","","MAP","LAG"]) - csv_array.append(["Maj","Final","Precision","Recall","F1","Precision","Recall","F1","MAP","LAG"]) + csv_array = [["Best:", "", "", "", "", "Default:", "", "", "MAP", "LAG"], + ["Maj", "Final", "Precision", "Recall", "F1", "Precision", "Recall", "F1", "MAP", "LAG"]] if isinstance(best_eval_result, F1ResultObject): default_prec = 0 default_rec = 0 @@ -399,10 +397,6 @@ def __init__(self, dataset, excel_output_file_path, also_print_eval=True): def process_trace_link_2D_dict(self, trace_link_2D_dict: Dict[float, Dict[float, List[TraceLink]]]): results = {} for maj_thresh in trace_link_2D_dict: - best_eval_result = None - best_thresh = None - - print_str_dict = {} for final_threshold in trace_link_2D_dict[maj_thresh]: # header_row.append(self.FILE_LEVEL_DROP_THRESH_PATTERN.format(final_threshold)) eval_result_object = self._evaluator.evaluate(trace_link_2D_dict[maj_thresh][final_threshold]) diff --git a/evaluation/SolutionComparator.py b/evaluation/SolutionComparator.py index bd0efd2..257989c 100644 --- a/evaluation/SolutionComparator.py +++ b/evaluation/SolutionComparator.py @@ -32,7 +32,7 @@ def get_true_positives(self, trace_link_candidates): elif self._print_false_positives: false_positives_matrix.add_trace_pair(trace_link.req_key, trace_link.code_key) if self._print_false_negatives: - self._print_false_negatives(sol_matrix_copy) + self.print_false_negatives(sol_matrix_copy) if self._print_false_positives: log.info("\n\nFalse Positives: {} Links, {} unique Reqs, {} unique Code".format(false_positives_matrix._number_of_trace_links, false_positives_matrix.num_unique_reqs(), false_positives_matrix.num_unique_code())) @@ -60,10 +60,11 @@ def get_similarity_relevance_dict(self, trace_links): req_dict[req_name] = [sim_rel_tuple_to_add] if self._print_false_negatives: - self._print_false_negatives(sol_matrix_copy) + self.print_false_negatives(sol_matrix_copy) return req_dict - - def _print_false_negatives(self, sol_matrix_with_false_negatives): + + @staticmethod + def print_false_negatives(sol_matrix_with_false_negatives): log.info(f"\nFalse Negatives: {sol_matrix_with_false_negatives._number_of_trace_links} Links, {sol_matrix_with_false_negatives.num_unique_reqs()} unique Reqs, {sol_matrix_with_false_negatives.num_unique_code()} unique Code") log.info("\n" + sol_matrix_with_false_negatives.print_str()) diff --git a/precalculating/TraceLinkDataStructureFactory.py b/precalculating/TraceLinkDataStructureFactory.py index 81f817a..339ad06 100644 --- a/precalculating/TraceLinkDataStructureFactory.py +++ b/precalculating/TraceLinkDataStructureFactory.py @@ -93,6 +93,7 @@ def _calculate_similarities_to_all_req_elements(self, similarity_matrix, req_emb similarity_matrix.set_value(key, element_key, similarity) return similarity_matrix - def _build_req_element_key(self, req_file_name, index): + @staticmethod + def _build_req_element_key(req_file_name, index): return f"{req_file_name}.{index}" diff --git a/preprocessing/CallGraphUtil.py b/preprocessing/CallGraphUtil.py index 3a96108..d49dee8 100644 --- a/preprocessing/CallGraphUtil.py +++ b/preprocessing/CallGraphUtil.py @@ -7,7 +7,7 @@ import logging import re -from datasets.Dataset import Dataset, Smos +from datasets.Dataset import Dataset from utility import FileUtil log = logging.getLogger(__name__) @@ -103,8 +103,8 @@ def remove_external_calls(): continue # Leave out inner classes if create_class_callgraph: - insert_class(caller_class_name, set([callee_class_name]), set()) - insert_class(callee_class_name, set(), set([caller_class_name])) + insert_class(caller_class_name, {callee_class_name}, set()) + insert_class(callee_class_name, set(), {caller_class_name}) elif row_split[0] == "M": # method level call # row_split[1] = Class of caller method @@ -143,8 +143,8 @@ def remove_external_calls(): # called_by = caller_dict_key # calls = callee_dict_key - insert_entry(caller_dict_key, caller_class, caller_name, caller_param, set(), set([callee_dict_key])) - insert_entry(callee_dict_key, callee_class, callee_name, callee_param, set([caller_dict_key]), set()) + insert_entry(caller_dict_key, caller_class, caller_name, caller_param, set(), {callee_dict_key}) + insert_entry(callee_dict_key, callee_class, callee_name, callee_param, {caller_dict_key}, set()) else: log.error("Unknow start character: " + row_split[0]) diff --git a/preprocessing/CodeASTTokenizer.py b/preprocessing/CodeASTTokenizer.py index afc3454..7bd47c1 100644 --- a/preprocessing/CodeASTTokenizer.py +++ b/preprocessing/CodeASTTokenizer.py @@ -7,7 +7,6 @@ from preprocessing import Tokenizer, JavaLangUtil, PycparserUtil, JSParserUtil from pycparser import parse_file -from esprima import parse from preprocessing.CodeFileRepresentation import CodeFileRepresentation, IdentifierString, Classifier from preprocessing.CommentParserUtil import parse_and_add_comments_to_file from utility import FileUtil diff --git a/preprocessing/CodeFileRepresentation.py b/preprocessing/CodeFileRepresentation.py index 17c0bec..7bbb103 100644 --- a/preprocessing/CodeFileRepresentation.py +++ b/preprocessing/CodeFileRepresentation.py @@ -205,7 +205,7 @@ def get_param_tuples(self) -> [([str], [str])]: # returns [([param_type], [para [([boolean], [is, active]), ([type, tokens], [name, tokens]), ...] The type and name string list can contain multiple tokens if a camel case splitter was applied beforehand. """ - self.__check_and_clean_param_list + self.__check_and_clean_param_list() return [param.get_param_tuple() for param in self.parameters] def get_param_plain_list(self) -> [str]: @@ -213,7 +213,7 @@ def get_param_plain_list(self) -> [str]: Returns param tokens as plain string list (No assumptions about order): [type1, name1, type2, name2...] """ - self.__check_and_clean_param_list + self.__check_and_clean_param_list() param_words = [] for param in self.parameters: param_words.extend(param.get_param_words()) @@ -224,7 +224,7 @@ def get_param_names_plain_list(self) -> [str]: Returns param names tokens as plain string list (No assumptions about order): [name1, name2...] """ - self.__check_and_clean_param_list + self.__check_and_clean_param_list() param_words = [] for param in self.parameters: param_words.extend(param.get_param_name_words()) diff --git a/preprocessing/CommentParserUtil.py b/preprocessing/CommentParserUtil.py index 6ee8902..1a39e91 100644 --- a/preprocessing/CommentParserUtil.py +++ b/preprocessing/CommentParserUtil.py @@ -4,7 +4,6 @@ import logging from comment_parser import comment_parser from preprocessing.CodeFileRepresentation import IdentifierString, Enum_ -from preprocessing.Tokenizer import WordTokenizer log = logging.getLogger(__name__) diff --git a/preprocessing/FileRepresentation.py b/preprocessing/FileRepresentation.py index da05cc7..78d556d 100644 --- a/preprocessing/FileRepresentation.py +++ b/preprocessing/FileRepresentation.py @@ -122,13 +122,11 @@ def preprocess(self, preprocessor): self.__clean_up_strings() def get_printable_string(self): - print_str = ["UC_NAME || " + "|".join(self.name_words)] - print_str.append("DESCRIPTION || " + "|".join(self.description_words)) - print_str.append("ACTOR || " + "|".join(self.actor_words)) - print_str.append("PRECOND || " + "|".join(self.precondition_words)) - print_str.append("EVENTFLOW || " + ", ".join(["|".join(sent_group) for sent_group in self.flow_of_events_words])) - print_str.append("POSTCOND || " + "|".join(self.postcondition_words)) - print_str.append("QUALIREQ || " + "|".join(self.quality_requirement_words)) + print_str = ["UC_NAME || " + "|".join(self.name_words), "DESCRIPTION || " + "|".join(self.description_words), + "ACTOR || " + "|".join(self.actor_words), "PRECOND || " + "|".join(self.precondition_words), + "EVENTFLOW || " + ", ".join(["|".join(sent_group) for sent_group in self.flow_of_events_words]), + "POSTCOND || " + "|".join(self.postcondition_words), + "QUALIREQ || " + "|".join(self.quality_requirement_words)] return "\n".join(print_str) def get_csv_string(self): diff --git a/preprocessing/JSParserUtil.py b/preprocessing/JSParserUtil.py index d1cdd9f..8530f79 100644 --- a/preprocessing/JSParserUtil.py +++ b/preprocessing/JSParserUtil.py @@ -17,7 +17,6 @@ def extract_FileAST(fileAST, filepath): file_name = FileUtil.get_filename_from_path(filepath) # Use mock class to hold the methods class_name = IdentifierString(file_name, file_name.replace(".jsp", "")) - super_classifiers = [] class_object = Classifier(class_name, IdentifierString(file_name, "")) attributes = [] functions = [] diff --git a/preprocessing/JavaLangUtil.py b/preprocessing/JavaLangUtil.py index 0b939f0..234a6e4 100644 --- a/preprocessing/JavaLangUtil.py +++ b/preprocessing/JavaLangUtil.py @@ -105,7 +105,7 @@ def _extract_attributes(attribute_nodes, file_name): """Return an attribute object list.""" attr_list = [] for attr in attribute_nodes: - if not(ONLY_PUBLIC_ATTRIBUTES) or attr.modifiers.contains("public"): # logical Implication + if not ONLY_PUBLIC_ATTRIBUTES or attr.modifiers.contains("public"): # logical Implication attr_init_value, left_side_identifier = _extract_children_strings(attr.declarators[0].initializer, file_name) attr_obj = Attribute(IdentifierString(file_name, attr.type.name), IdentifierString(file_name, attr.declarators[0].name), attr_init_value, IdentifierString(file_name, "")) @@ -120,7 +120,7 @@ def _extract_methods(method_nodes, file_name): meth_list = [] for meth in method_nodes: #if not(ONLY_PUBLIC_METHODS) or not("private" in meth.modifiers or "protected" in meth.modifiers): - if not(ONLY_PUBLIC_METHODS) or "public" in meth.modifiers: # logical Implication + if not ONLY_PUBLIC_METHODS or "public" in meth.modifiers: # logical Implication meth_obj = Method(IdentifierString(file_name, ""), IdentifierString(file_name, meth.name), IdentifierString(file_name, ""), IdentifierString(file_name, ""), IdentifierString(file_name, "")) if meth.return_type: @@ -144,7 +144,7 @@ def _extract_parameters(parameter_nodes, file_name): return param_list -def _extract_children_strings(body_node, file_name) -> IdentifierString: +def _extract_children_strings(body_node, file_name) -> tuple[IdentifierString, IdentifierString]: """Returns an IdentifierString that contains all identifiers in the given body node""" strings_in_body, left_side_identifiers = _traverse_node(body_node, file_name) diff --git a/preprocessing/PycparserUtil.py b/preprocessing/PycparserUtil.py index 7491112..1aac725 100644 --- a/preprocessing/PycparserUtil.py +++ b/preprocessing/PycparserUtil.py @@ -17,7 +17,6 @@ def extract_FileAST(fileAST, filepath): file_name = FileUtil.get_filename_from_path(filepath) # Use mock class to hold the methods class_name = IdentifierString(file_name, file_name.replace(".c", "")) - super_classifiers = [] class_object = Classifier(class_name, IdentifierString(file_name, "")) attributes = [] functions = [] diff --git a/preprocessing/Tokenizer.py b/preprocessing/Tokenizer.py index 8cf377f..233a65f 100644 --- a/preprocessing/Tokenizer.py +++ b/preprocessing/Tokenizer.py @@ -1,5 +1,6 @@ import abc, logging import re +from abc import ABC from nltk.tokenize import word_tokenize, sent_tokenize @@ -20,7 +21,7 @@ def tokenize(self, file_path) -> FileRepresentation: pass -class NaturalSpeechTokenizer(Tokenizer): +class NaturalSpeechTokenizer(Tokenizer, ABC): def __init__(self, dataset, italian=False): super(NaturalSpeechTokenizer, self).__init__(dataset) diff --git a/traceLinkProcessing/ElementFilter.py b/traceLinkProcessing/ElementFilter.py index d639ecd..f46a8e2 100644 --- a/traceLinkProcessing/ElementFilter.py +++ b/traceLinkProcessing/ElementFilter.py @@ -25,7 +25,8 @@ def filter(self, trace_link_data_structure: ElementLevelTraceLinkDataStructure, def _filter(self, trace_link_data_structure: ElementLevelTraceLinkDataStructure, df, file, idx): pass - def check_and_remove(self, trace_link_data_structure: ElementLevelTraceLinkDataStructure, df, file, idx, columns): + @staticmethod + def check_and_remove(trace_link_data_structure: ElementLevelTraceLinkDataStructure, df, file, idx, columns): result = False for column,value in columns: result = result or (df.loc[idx, column] == value) diff --git a/traceLinkProcessing/NeighborHandler.py b/traceLinkProcessing/NeighborHandler.py index aecdc45..88281d6 100644 --- a/traceLinkProcessing/NeighborHandler.py +++ b/traceLinkProcessing/NeighborHandler.py @@ -24,7 +24,7 @@ def get_neighbor_method_keys_of(self, method_key): all_neighbor_keys = method_callgraph_entry[CallGraphUtil.CALLED_BY] + method_callgraph_entry[CallGraphUtil.CALLS] elif self._neighbor_strategy == NeighborStrategy.up: all_neighbor_keys = method_callgraph_entry[CallGraphUtil.CALLED_BY] - elif self._neighbor_strategy == self.NeighborStrategy.down: + elif self._neighbor_strategy == NeighborStrategy.down: all_neighbor_keys = method_callgraph_entry[CallGraphUtil.CALLS] else: log.error("Unknown neighbor strategy: " + str(self._neighbor_strategy)) diff --git a/traceLinkProcessing/SimilarityFilter.py b/traceLinkProcessing/SimilarityFilter.py index 88c7708..bfcd97b 100644 --- a/traceLinkProcessing/SimilarityFilter.py +++ b/traceLinkProcessing/SimilarityFilter.py @@ -3,11 +3,13 @@ class SimilarityFilter: - def _bigger_is_more_sim(self, a, b): + @staticmethod + def _bigger_is_more_sim(a, b): return a > b - def _smaller_is_more_sim(self, a, b): + @staticmethod + def _smaller_is_more_sim(a, b): return a < b def __init__(self, bigger_is_more_similar=True): diff --git a/traceLinkProcessing/TraceLinkCreator.py b/traceLinkProcessing/TraceLinkCreator.py index 2b82af8..b85a0a3 100644 --- a/traceLinkProcessing/TraceLinkCreator.py +++ b/traceLinkProcessing/TraceLinkCreator.py @@ -146,7 +146,7 @@ def _collect_votes_and_similarities(self, majority_drop_thresh, code_file_name): for code_elem in self._trace_link_data_structure.all_code_elements_of(code_file_name): for req_file_name in self._trace_link_data_structure.all_req_file_names(): similarity = self._trace_link_data_structure.similarity_between(req_file_name, code_elem) - if (self._similarity_filter.is_more_similar(similarity, majority_drop_thresh)): + if self._similarity_filter.is_more_similar(similarity, majority_drop_thresh): votes.append(req_file_name) sims_per_req.append(req_file_name, similarity) return votes, sims_per_req diff --git a/utility/UCoutput.py b/utility/UCoutput.py index 8fca1c7..27130d8 100755 --- a/utility/UCoutput.py +++ b/utility/UCoutput.py @@ -56,8 +56,7 @@ req_tokenizer = UCTokenizer(dataset, not dataset.is_english()) all_filenames = FileUtil.get_files_in_directory(dataset.req_folder()) -output = [] -output.append("file,ID,text") +output = ["file,ID,text"] for file_path in all_filenames: file_representation = req_tokenizer.tokenize(file_path) file_representation.preprocess(req_preprocessor)