Skip to content

Commit

Permalink
#27 fixed computation of component similarity
Browse files Browse the repository at this point in the history
  • Loading branch information
StephanPirnbaum committed Jul 30, 2019
1 parent 8151ad3 commit d658fe9
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 137 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public Set<ComponentDescriptor> execute(ClassificationConfigurationDescriptor ex
// Step 4: Execute the cohesion criterion
Set<ComponentDescriptor> cohesionResult = executeCohesionCriterion(cohesionCriterionDescriptor, components);
// Step 5: Merge user components with the result of the cohesion based classification
components = mergeComponents(cohesionResult, components, iteration);
components = mergeComponents(cohesionResult, components);
}

// Step 6: Export the result
Expand Down Expand Up @@ -289,7 +289,7 @@ private Set<ComponentDescriptor> executeCohesionCriterion(CohesionCriterionDescr
}


private Set<ComponentDescriptor> mergeComponents(Set<ComponentDescriptor> cohesionResult, Set<ComponentDescriptor> userResult, Integer iteration) {
private Set<ComponentDescriptor> mergeComponents(Set<ComponentDescriptor> cohesionResult, Set<ComponentDescriptor> userResult) {
//so we have several solutions, time to make one out of them :)
this.xoManager.currentTransaction().begin();

Expand All @@ -299,8 +299,7 @@ private Set<ComponentDescriptor> mergeComponents(Set<ComponentDescriptor> cohesi
ComponentDescriptor bestUserComponent = null;
Double bestUserComponentSimilarity = 0.75;
for (ComponentDescriptor userComponent : userResult) {
Double similarity = computeTverskyIndex(userComponent, cohesionComponent, iteration);
System.out.println(similarity);
Double similarity = computeTverskyIndex(userComponent, cohesionComponent);
if (similarity > bestUserComponentSimilarity) {
bestUserComponent = userComponent;
bestUserComponentSimilarity = similarity;
Expand All @@ -309,6 +308,8 @@ private Set<ComponentDescriptor> mergeComponents(Set<ComponentDescriptor> cohesi
if (bestUserComponent != null) {
LOGGER.info("Merging user component {}:{} with cohesion component: {}:{} (Tversky Index: {})",
bestUserComponent.getShape(), bestUserComponent.getName(), cohesionComponent.getShape(), cohesionComponent.getName(), bestUserComponentSimilarity);
cohesionComponent.setShape(bestUserComponent.getShape());
cohesionComponent.setName(bestUserComponent.getName());
}
}

Expand All @@ -328,27 +329,36 @@ private Set<ComponentDescriptor> getComponentHierarchy(Set<ComponentDescriptor>
return childs;
}

private Double computeTverskyIndex(ComponentDescriptor userComponent, ComponentDescriptor cohesionComponent, Integer iteration) {
Double jaccard = componentRepository.computeJaccardSimilarity(
cohesionComponent.getShape(), cohesionComponent.getName(),
userComponent.getShape(), userComponent.getName(),
iteration);
Long intersection = componentRepository.computeComponentIntersectionCardinality(
cohesionComponent.getShape(), cohesionComponent.getName(),
userComponent.getShape(), userComponent.getName(),
iteration);
Long cohesionComponentCardinality = componentRepository.computeComponentCardinality(cohesionComponent.getShape(), cohesionComponent.getName(), iteration);
Long userComponentCardinality = componentRepository.computeComponentCardinality(userComponent.getShape(), userComponent.getName(), iteration);
Long ofCD1InCD2 = componentRepository.computeComplementCardinality(cohesionComponent.getShape(), cohesionComponent.getName(), userComponent.getShape(), userComponent.getName(), iteration);
Long ofCD2InCD1 = componentRepository.computeComplementCardinality(userComponent.getShape(), userComponent.getName(), cohesionComponent.getShape(), cohesionComponent.getName(), iteration);
private Double computeTverskyIndex(ComponentDescriptor userComponent, ComponentDescriptor cohesionComponent) {
Result<TypeDescriptor> cohesionTypes = componentRepository.getContainedTypesRecursively(this.xoManager.getId(cohesionComponent));
Set<Long> cohesionTypeIds = new HashSet<>();
cohesionTypes.forEach(c -> cohesionTypeIds.add(c.getId()));
Set<Long> userTypeIds = userComponent.getContainedTypes().stream().map(t -> (Long) t.getId()).collect(Collectors.toSet());

double alpha = 0.8;
double beta = 0.6;
Double tversky = intersection.doubleValue() / (intersection + beta * (alpha * Math.min(ofCD2InCD1, ofCD1InCD2) + (1 - alpha) * Math.max(ofCD2InCD1, ofCD1InCD2)));
LOGGER.debug("Jaccard: {}", jaccard);
LOGGER.debug("Cardinality 1: {}", cohesionComponentCardinality);
LOGGER.debug("Cardinality 2: {}", userComponentCardinality);
LOGGER.debug("Intersection: {}", intersection);

Set<Long> intersectionSet = new HashSet<>(cohesionTypeIds);
intersectionSet.retainAll(userTypeIds);

Set<Long> aWithoutBSet = new HashSet<>(cohesionTypeIds);
aWithoutBSet.removeAll(userTypeIds);

Set<Long> bWithoutASet = new HashSet<>(userTypeIds);
bWithoutASet.removeAll(cohesionTypeIds);

int a = Math.min(aWithoutBSet.size(), bWithoutASet.size());
int b = Math.max(aWithoutBSet.size(), bWithoutASet.size());

Double intersectionCardinality = Double.valueOf(intersectionSet.size());

double tversky = intersectionCardinality / (intersectionCardinality + beta * (alpha * a + (1 - alpha) * b));

LOGGER.debug("A: {}", a);
LOGGER.debug("B: {}", b);
LOGGER.debug("Intersection: {}", intersectionCardinality);
LOGGER.debug("Tversky: {}", + tversky);

return tversky;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,6 @@
@Repository
public interface ComponentRepository extends TypedNeo4jRepository<ComponentDescriptor> {

@ResultOf
@Cypher("MATCH (c:SARF:Component) RETURN c")
Result<ComponentDescriptor> findAll();

@ResultOf
@Cypher("MATCH" +
" (conf:ClassificationConfiguration) " +
"WITH" +
" max(conf.iteration) AS current " +
"MATCH" +
" (:ClassificationConfiguration{iteration: current})-[:CONTAINS]->(:ClassificationCriterion)" +
" -[:CREATED]->(:ClassificationInfo)-[:MAPS]->(c:Component)" +
"RETURN" +
" DISTINCT c")
Result<ComponentDescriptor> getComponentsOfCurrentIteration();

@ResultOf
@Cypher("MATCH" +
" (conf:ClassificationConfiguration) " +
Expand All @@ -52,86 +36,6 @@ public interface ComponentRepository extends TypedNeo4jRepository<ComponentDescr
" DISTINCT c")
Result<ComponentDescriptor> getComponentOfCurrentIteration(@Parameter("shape") String shape, @Parameter("name") String name);

@ResultOf
@Cypher("MATCH\n" +
" (:SARF:Component {shape:{shape2}, name:{name2}})" +
" <-[:MAPS]-(info2:ClassificationInfo {iteration:{iteration}})-[:CLASSIFIES]->" +
" (type1:Type:Internal)" +
" <-[:CLASSIFIES]-(info1:ClassificationInfo {iteration:{iteration}})-[:MAPS]->" +
" (:SARF:Component {shape:{shape1}, name:{name1}})\n" +
"WITH \n" +
" count(DISTINCT type1) AS intersection\n" +
"MATCH\n" +
" (type1:Type:Internal)" +
" <-[:CLASSIFIES]-(info1:ClassificationInfo {iteration:{iteration}})-[:MAPS]->" +
" (comp1:SARF:Component {shape:{shape1}, name:{name1}})\n" +
"WITH\n" +
" intersection, collect(type1) AS types\n" +
"MATCH\n" +
" (type2:Type:Internal)" +
" <-[:CLASSIFIES]-(info2:ClassificationInfo {iteration:{iteration}})-[:MAPS]->" +
" (comp2:SARF:Component {shape:{shape2}, name:{name2}})\n" +
"WITH \n" +
" intersection, types + collect(type2) AS rows\n" +
"UNWIND \n" +
" rows AS row\n" +
"RETURN\n" +
" toFloat(intersection)/count(DISTINCT row)")
Double computeJaccardSimilarity(@Parameter("shape1") String shape1, @Parameter("name1") String name1,
@Parameter("shape2") String shape2, @Parameter("name2") String nam2,
@Parameter("iteration") Integer iteration);

@ResultOf
@Cypher("MATCH" +
" (:SARF:Component{shape:{shape}, name:{name}})" +
" <-[:MAPS]-(:ClassificationInfo{iteration:{iteration}})-[:CLASSIFIES]->" +
" (t:Type) " +
"RETURN" +
" count(DISTINCT t)")
Long computeComponentCardinality(@Parameter("shape") String shape, @Parameter("name") String string,
@Parameter("iteration") Integer iteration);

@ResultOf
@Cypher("MATCH" +
" (:SARF:Component {shape:{shape2}, name:{name2}})" +
" <-[:MAPS]-(info2:ClassificationInfo {iteration:{iteration}})-[:CLASSIFIES]->" +
" (type:Type:Internal)" +
" <-[:CLASSIFIES]-(info1:ClassificationInfo {iteration:{iteration}})-[:MAPS]->" +
" (:SARF:Component {shape:{shape1}, name:{name1}})" +
"RETURN" +
" count(DISTINCT type)")
Long computeComponentIntersectionCardinality(@Parameter("shape1") String shape1, @Parameter("name1") String name1,
@Parameter("shape2") String shape2, @Parameter("name2") String name2,
@Parameter("iteration") Integer iteration);

@ResultOf
@Cypher("MATCH" +
" (:SARF:Component {shape:{shape2}, name:{name2}})" +
" <-[:MAPS]-(:ClassificationInfo {iteration:{iteration}})-[:CLASSIFIES]->" +
" (type:Type:Internal) " +
"WITH" +
" type " +
"OPTIONAL MATCH" +
" (type)" +
" <-[:CLASSIFIES]-(info1:ClassificationInfo {iteration:{iteration}})-[:MAPS]->" +
" (:SARF:Component {shape:{shape1}, name:{name1}}) " +
"WHERE" +
" info1 IS NULL " +
"RETURN" +
" count(DISTINCT type)")
Long computeComplementCardinality(@Parameter("shape1") String ofShape, @Parameter("name1") String ofName,
@Parameter("shape2") String inShape, @Parameter("name2") String inName,
@Parameter("iteration") Integer iteration);

@ResultOf
@Cypher("MATCH" +
" (c:SARF:Component)<-[:MAPS]-(info:ClassificationInfo) " +
"WHERE" +
" ID(c) = {id} " +
"RETURN" +
" info")
Result<ClassificationInfoDescriptor> getCandidateTypes(@Parameter("id") Long componentId);

@ResultOf
@Cypher("MATCH" +
" (e1), (e2) " +
Expand Down Expand Up @@ -167,15 +71,6 @@ Long computeComplementCardinality(@Parameter("shape1") String ofShape, @Paramete
" 1")
Long getBestComponentForShape(@Parameter("ids") long[] longs, @Parameter("shape") String shape, @Parameter("tid") Long typeId);

@ResultOf
@Cypher("MATCH" +
" (c:Component:SARF) " +
"WHERE" +
" ID(c) IN {ids} " +
"RETURN" +
" c")
Result<ComponentDescriptor> getComponentsWithId(@Parameter("ids") long[] longs);

@ResultOf
@Cypher("MATCH\n" +
" (c1:Component:SARF)-[cont1:CONTAINS]->(e1)-[coup:COUPLES]->(e2)<-[:CONTAINS]-(c2:Component:SARF) \n" +
Expand All @@ -197,16 +92,6 @@ Long computeComplementCardinality(@Parameter("shape1") String ofShape, @Paramete
" (c1)-[:COUPLES{coupling:relCoupling}]->(c2)")
void computeCouplingBetweenComponents(@Parameter("ids") long[] ids);

@ResultOf
@Cypher("MATCH" +
" (c:Component:SARF)," +
" (t:Type) " +
"WHERE" +
" ID(c) = {cId} AND ID(t) = {tId} " +
"RETURN" +
" exists((c)-[:CONTAINS]->(t))")
boolean containsType(@Parameter("cId") Long cId, @Parameter("tId") Long tId);

@ResultOf
@Cypher("MATCH\n" +
" (c1:Component:SARF)-[:COUPLES]-(c)-[:COUPLES]-(c2:Component:SARF)\n" +
Expand Down

0 comments on commit d658fe9

Please sign in to comment.