@@ -40,6 +40,29 @@ public class MaximumBipartiteMatchingAlgorithm<TypeA extends Matchable, TypeB ex
4040 private Processable <Correspondence <TypeA , TypeB >> correspondences ;
4141 private Processable <Correspondence <TypeA , TypeB >> result ;
4242
43+ private boolean groupByLeftDataSource = false ;
44+ private boolean groupByRightDataSource = false ;
45+
46+ /**
47+ *
48+ * Specifies if correspondences should first be grouped by the data source ID of the left-hand side of the correspondences.
49+ * If true, all data sources on the left-hand side will be processed individually
50+ *
51+ * @param groupByLeftDataSource the groupByLeftDataSource to set
52+ */
53+ public void setGroupByLeftDataSource (boolean groupByLeftDataSource ) {
54+ this .groupByLeftDataSource = groupByLeftDataSource ;
55+ }
56+ /**
57+ * Specifies if correspondences should first be grouped by the data source ID of the right-hand side of the correspondences.
58+ * If true, all data source on the right-hand side will be processed individually
59+ *
60+ * @param groupByRightDataSource the groupByRightDataSource to set
61+ */
62+ public void setGroupByRightDataSource (boolean groupByRightDataSource ) {
63+ this .groupByRightDataSource = groupByRightDataSource ;
64+ }
65+
4366 public MaximumBipartiteMatchingAlgorithm (Processable <Correspondence <TypeA , TypeB >> correspondences ) {
4467 this .correspondences = correspondences ;
4568 }
@@ -51,7 +74,7 @@ public MaximumBipartiteMatchingAlgorithm(Processable<Correspondence<TypeA, TypeB
5174 public void run () {
5275
5376 // group correspondences by data source and then run the maximum matching.
54- // if multiple sources are matched at the same time, the maximum matching would only allow one source to the matched to one other source
77+ // if multiple sources are matched at the same time, the maximum matching would only allow one source to be matched to one other source
5578 // but we want that one element from a certain source (record or attribute) can only be matched to one other element.
5679 // two elements from different sources can be mapped to the same element in another source.
5780
@@ -63,7 +86,10 @@ public void run() {
6386 public void mapRecordToKey (Correspondence <TypeA , TypeB > record ,
6487 DataIterator <Pair <Pair <Integer , Integer >, Correspondence <TypeA , TypeB >>> resultCollector ) {
6588
66- resultCollector .next (new Pair <Pair <Integer ,Integer >, Correspondence <TypeA ,TypeB >>(new Pair <>(record .getFirstRecord ().getDataSourceIdentifier (), record .getSecondRecord ().getDataSourceIdentifier ()), record ));
89+ int leftGroup = groupByLeftDataSource ? record .getFirstRecord ().getDataSourceIdentifier () : 0 ;
90+ int rightGroup = groupByRightDataSource ? record .getSecondRecord ().getDataSourceIdentifier () : 0 ;
91+
92+ resultCollector .next (new Pair <Pair <Integer ,Integer >, Correspondence <TypeA ,TypeB >>(new Pair <>(leftGroup , rightGroup ), record ));
6793
6894 }
6995
0 commit comments