Skip to content

Commit 732e691

Browse files
committed
Refactor classes with package by feature
1 parent 9b94dc2 commit 732e691

File tree

8 files changed

+191
-94
lines changed

8 files changed

+191
-94
lines changed
Lines changed: 10 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,31 @@
11
package us.narin.summarizer;
22

3-
import kr.bydelta.koala.data.Morpheme;
4-
import kr.bydelta.koala.data.Sentence;
5-
import kr.bydelta.koala.data.Word;
6-
import kr.bydelta.koala.hnn.SentenceSplitter;
7-
import kr.bydelta.koala.hnn.Tagger;
8-
import org.jgrapht.alg.interfaces.VertexScoringAlgorithm;
9-
import org.jgrapht.alg.scoring.PageRank;
10-
import org.jgrapht.graph.DefaultWeightedEdge;
11-
import org.jgrapht.graph.SimpleWeightedGraph;
12-
import scala.collection.Iterator;
13-
import us.narin.summarizer.utils.ListUtils;
3+
import us.narin.summarizer.graph.GraphBuilder;
4+
import us.narin.summarizer.sentence.SentenceSource;
5+
import us.narin.summarizer.sentence.ranker.SentenceRanker;
146

157
import java.util.*;
168
import java.util.stream.Collectors;
179

1810
public class Summarizer {
1911

20-
private Tagger tagger;
21-
private SentenceSplitter sentenceSplitter;
2212
private String content;
23-
private List<String> splitSentenceList;
2413

2514
public Summarizer(String content) {
2615
this.content = content;
27-
this.tagger = new Tagger();
28-
this.sentenceSplitter = new SentenceSplitter();
29-
this.splitSentenceList = new ArrayList<>();
3016
}
3117

32-
List<String> summarize() {
33-
return getRankedSentences().stream().map(Map.Entry::getKey).collect(Collectors.toList());
34-
}
35-
36-
private Map<String, List<String>> extractSentences(List<String> splitSentenceList) {
37-
38-
final Map<String, List<String>> parsedSentence = new LinkedHashMap<>();
18+
public List<String> summarize() {
19+
final SentenceSource sentenceSource = new SentenceSource(content);
3920

40-
for (String sentence : splitSentenceList) {
41-
Sentence analyzedSentence = tagger.tagSentence(sentence);
42-
Iterator iterator = analyzedSentence.words().iterator();
43-
List<String> detectedNouns = new ArrayList<>();
21+
final List<String> sentences = sentenceSource.getSentences();
22+
final Map<String, List<String>> extractedSentences = sentenceSource.getExtractedSentences();
4423

45-
while (iterator.hasNext()) {
46-
Word word = (Word) iterator.next();
47-
Iterator wordIterator = word.iterator();
24+
final GraphBuilder graphBuilder = new GraphBuilder(extractedSentences);
4825

49-
while (wordIterator.hasNext()) {
50-
Morpheme morpheme = (Morpheme) wordIterator.next();
51-
if (morpheme.isNoun()) {
52-
String plainWord = morpheme.toString().split("/")[0];
53-
detectedNouns.add(plainWord);
54-
}
55-
}
56-
}
57-
parsedSentence.put(sentence, detectedNouns);
58-
}
59-
return parsedSentence;
26+
return new SentenceRanker(sentences, graphBuilder.build()).getRankedSentences()
27+
.stream().map(Map.Entry::getKey).collect(Collectors.toList());
6028
}
61-
62-
private SimpleWeightedGraph<String, DefaultWeightedEdge> buildGraph() {
63-
64-
SimpleWeightedGraph<String, DefaultWeightedEdge> graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
65-
66-
splitSentenceList = sentenceSplitter
67-
.jSentences(content)
68-
.stream()
69-
.map(String::trim)
70-
.collect(Collectors.toList());
71-
72-
Map<String, List<String>> parsedSentence = extractSentences(splitSentenceList);
73-
splitSentenceList.forEach(graph::addVertex);
74-
75-
for (Map.Entry<String, List<String>> entrySource : parsedSentence.entrySet()) {
76-
for (Map.Entry<String, List<String>> entryTarget : parsedSentence.entrySet()) {
77-
if (!Objects.equals(entrySource.getKey(), entryTarget.getKey())) {
78-
79-
float similarity = getSimilarity(entrySource, entryTarget);
80-
81-
if (similarity > 0 && graph.getEdge(entrySource.getKey(), entryTarget.getKey()) == null) {
82-
DefaultWeightedEdge e = graph.addEdge(entrySource.getKey(), entryTarget.getKey());
83-
graph.setEdgeWeight(e, similarity);
84-
}
85-
}
86-
}
87-
}
88-
return graph;
89-
}
90-
91-
private List<Map.Entry<String, Double>> getRankedSentences() {
92-
VertexScoringAlgorithm<String, Double> pageRank = new PageRank<>(buildGraph());
93-
return pageRank.getScores().entrySet()
94-
.stream()
95-
.sorted((o1, o2) -> o1.getValue() < o2.getValue() ? 1 : -1)
96-
.limit(3)
97-
.collect(Collectors.toList())
98-
.stream()
99-
.sorted((source, target) ->
100-
splitSentenceList.indexOf(source.getKey()) > splitSentenceList.indexOf(target.getKey()) ? 1 : -1)
101-
.collect(Collectors.toList());
102-
103-
}
104-
105-
private float getSimilarity(Map.Entry<String, List<String>> entrySource, Map.Entry<String, List<String>> entryTarget) {
106-
List<String> intersection = ListUtils.intersection(entrySource.getValue(), entryTarget.getValue());
107-
return (float) intersection.size() / (float) (Math.sqrt(entrySource.getValue().size()) * Math.sqrt(entryTarget.getValue().size()));
108-
}
109-
11029
}
11130

11231

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package us.narin.summarizer.graph;
2+
3+
4+
import org.jgrapht.graph.DefaultWeightedEdge;
5+
import org.jgrapht.graph.SimpleWeightedGraph;
6+
import us.narin.summarizer.sentence.similarity.Similarity;
7+
import us.narin.summarizer.sentence.similarity.SimilarityManager;
8+
9+
import java.util.List;
10+
import java.util.Map;
11+
import java.util.Objects;
12+
13+
public class GraphBuilder {
14+
private Map<String, List<String>> extractedSentences;
15+
16+
public GraphBuilder(Map<String, List<String>> extractedSentences) {
17+
this.extractedSentences = extractedSentences;
18+
}
19+
20+
public SimpleWeightedGraph<String, DefaultWeightedEdge> build(){
21+
final SimpleWeightedGraph<String, DefaultWeightedEdge> graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
22+
23+
extractedSentences.forEach((s, strings) -> graph.addVertex(s));
24+
25+
for (Map.Entry<String, List<String>> entrySource : extractedSentences.entrySet()) {
26+
for (Map.Entry<String, List<String>> entryTarget : extractedSentences.entrySet()) {
27+
if (!Objects.equals(entrySource.getKey(), entryTarget.getKey())) {
28+
29+
final float similarity = new SimilarityManager(entrySource, entryTarget).getSimilarity(Similarity.SIMILARITY_COSINE);
30+
31+
if (similarity > 0 && graph.getEdge(entrySource.getKey(), entryTarget.getKey()) == null) {
32+
DefaultWeightedEdge e = graph.addEdge(entrySource.getKey(), entryTarget.getKey());
33+
graph.setEdgeWeight(e, similarity);
34+
}
35+
}
36+
}
37+
}
38+
return graph;
39+
}
40+
41+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package us.narin.summarizer.sentence;
2+
3+
import kr.bydelta.koala.data.Morpheme;
4+
import kr.bydelta.koala.data.Sentence;
5+
import kr.bydelta.koala.data.Word;
6+
import kr.bydelta.koala.hnn.SentenceSplitter;
7+
import kr.bydelta.koala.hnn.Tagger;
8+
import scala.collection.Iterator;
9+
10+
import java.util.ArrayList;
11+
import java.util.LinkedHashMap;
12+
import java.util.List;
13+
import java.util.Map;
14+
import java.util.stream.Collectors;
15+
16+
/**
17+
* Created by endlessdev on 8/23/17.
18+
*/
19+
public class SentenceSource {
20+
private String article;
21+
22+
public SentenceSource(String article) {
23+
this.article = article;
24+
}
25+
26+
public List<String> getSentences() {
27+
final SentenceSplitter sentenceSplitter = new SentenceSplitter();
28+
return sentenceSplitter.jSentences(this.article)
29+
.stream()
30+
.map(String::trim)
31+
.collect(Collectors.toList());
32+
}
33+
34+
public Map<String, List<String>> getExtractedSentences() {
35+
final Tagger tagger = new Tagger();
36+
final Map<String, List<String>> parsedSentence = new LinkedHashMap<>();
37+
38+
for (String sentence : getSentences()) {
39+
Sentence analyzedSentence = tagger.tagSentence(sentence);
40+
Iterator iterator = analyzedSentence.words().iterator();
41+
List<String> detectedNouns = new ArrayList<>();
42+
43+
while (iterator.hasNext()) {
44+
Word word = (Word) iterator.next();
45+
Iterator wordIterator = word.iterator();
46+
47+
while (wordIterator.hasNext()) {
48+
Morpheme morpheme = (Morpheme) wordIterator.next();
49+
if (morpheme.isNoun()) {
50+
String plainWord = morpheme.toString().split("/")[0];
51+
detectedNouns.add(plainWord);
52+
}
53+
}
54+
}
55+
parsedSentence.put(sentence, detectedNouns);
56+
}
57+
return parsedSentence;
58+
}
59+
60+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package us.narin.summarizer.sentence.ranker;
2+
3+
import org.jgrapht.alg.interfaces.VertexScoringAlgorithm;
4+
import org.jgrapht.alg.scoring.PageRank;
5+
import org.jgrapht.graph.DefaultWeightedEdge;
6+
import org.jgrapht.graph.SimpleWeightedGraph;
7+
8+
import java.util.List;
9+
import java.util.Map;
10+
import java.util.stream.Collectors;
11+
12+
public class SentenceRanker {
13+
private List<String> sentences;
14+
private SimpleWeightedGraph<String, DefaultWeightedEdge> graph;
15+
16+
public SentenceRanker(List<String> sentences, SimpleWeightedGraph<String, DefaultWeightedEdge> graph) {
17+
this.sentences = sentences;
18+
this.graph = graph;
19+
}
20+
21+
public List<Map.Entry<String, Double>> getRankedSentences() {
22+
VertexScoringAlgorithm<String, Double> pageRank = new PageRank<>(graph);
23+
return pageRank.getScores().entrySet()
24+
.stream()
25+
.sorted((o1, o2) -> o1.getValue() < o2.getValue() ? 1 : -1)
26+
.limit(3)
27+
.collect(Collectors.toList())
28+
.stream()
29+
.sorted((source, target) ->
30+
sentences.indexOf(source.getKey()) > sentences.indexOf(target.getKey()) ? 1 : -1)
31+
.collect(Collectors.toList());
32+
}
33+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package us.narin.summarizer.sentence.similarity;
2+
3+
public enum Similarity {
4+
SIMILARITY_COSINE, SIMILARITY_JACCARD
5+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package us.narin.summarizer.sentence.similarity;
2+
3+
import us.narin.summarizer.set.SetOperator;
4+
5+
import java.util.List;
6+
import java.util.Map;
7+
8+
public class SimilarityManager {
9+
private Map.Entry<String, List<String>> entrySource;
10+
private Map.Entry<String, List<String>> entryTarget;
11+
12+
public SimilarityManager(Map.Entry<String, List<String>> entrySource, Map.Entry<String, List<String>> entryTarget) {
13+
this.entrySource = entrySource;
14+
this.entryTarget = entryTarget;
15+
}
16+
17+
public float getSimilarity(Similarity similarity){
18+
switch (similarity){
19+
case SIMILARITY_COSINE:
20+
return getCosineSimilarity();
21+
case SIMILARITY_JACCARD:
22+
return getJaccardSimilarity();
23+
}
24+
return getJaccardSimilarity();
25+
}
26+
27+
private float getCosineSimilarity() {
28+
final List<String> intersection = SetOperator.intersection(entrySource.getValue(), entryTarget.getValue());
29+
return (float) intersection.size() / (float) (Math.sqrt(entrySource.getValue().size()) * Math.sqrt(entryTarget.getValue().size()));
30+
}
31+
32+
private float getJaccardSimilarity() {
33+
final List<String> intersection = SetOperator.intersection(entrySource.getValue(), entryTarget.getValue());
34+
final List<String> union = SetOperator.union(entrySource.getValue(), entryTarget.getValue());
35+
return (float) intersection.size() / (float) union.size();
36+
}
37+
38+
}

src/main/java/us/narin/summarizer/utils/ListUtils.java renamed to src/main/java/us/narin/summarizer/set/SetOperator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package us.narin.summarizer.utils;
1+
package us.narin.summarizer.set;
22

33
import java.util.ArrayList;
44
import java.util.HashSet;
@@ -9,7 +9,7 @@
99
* Created by endlessdev on 7/8/17.
1010
*/
1111

12-
public class ListUtils {
12+
public class SetOperator {
1313

1414
public static <T> List<T> union(List<T> list1, List<T> list2) {
1515
Set<T> set = new HashSet<>();

src/main/java/us/narin/summarizer/CLI.java renamed to src/main/java/us/narin/summarizer/utils/CLI.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
package us.narin.summarizer;
1+
package us.narin.summarizer.utils;
22

33
import org.json.JSONArray;
4+
import us.narin.summarizer.Summarizer;
45

56
/**
67
* Created by endlessdev on 7/28/17.

0 commit comments

Comments
 (0)