Skip to content

Commit dfe6eea

Browse files
committed
Initial Commit ๐Ÿ˜
0 parents  commit dfe6eea

File tree

6 files changed

+298
-0
lines changed

6 files changed

+298
-0
lines changed

โ€Ž.gitignoreโ€Ž

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
target/
2+
pom.xml.tag
3+
pom.xml.releaseBackup
4+
pom.xml.versionsBackup
5+
pom.xml.next
6+
release.properties
7+
dependency-reduced-pom.xml
8+
buildNumber.properties
9+
.mvn/timing.properties
10+
11+
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
12+
!/.mvn/wrapper/maven-wrapper.jar

โ€Žpom.xmlโ€Ž

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
<groupId>us.narin.summarizer</groupId>
5+
<artifactId>summarizer-java</artifactId>
6+
<packaging>jar</packaging>
7+
<version>1.0-SNAPSHOT</version>
8+
<build>
9+
<plugins>
10+
<plugin>
11+
<groupId>org.apache.maven.plugins</groupId>
12+
<artifactId>maven-compiler-plugin</artifactId>
13+
<configuration>
14+
<source>1.8</source>
15+
<target>1.8</target>
16+
</configuration>
17+
</plugin>
18+
</plugins>
19+
</build>
20+
<name>summarizer-java</name>
21+
<url>http://maven.apache.org</url>
22+
<dependencies>
23+
<dependency>
24+
<groupId>junit</groupId>
25+
<artifactId>junit</artifactId>
26+
<version>3.8.1</version>
27+
<scope>test</scope>
28+
</dependency>
29+
<dependency>
30+
<groupId>kr.bydelta</groupId>
31+
<artifactId>koalanlp-hannanum_2.12</artifactId>
32+
<classifier>assembly</classifier>
33+
<version>1.5.4</version>
34+
</dependency>
35+
<dependency>
36+
<groupId>kr.bydelta</groupId>
37+
<artifactId>koalanlp-twitter_2.12</artifactId>
38+
<version>1.5.4</version>
39+
</dependency>
40+
<dependency>
41+
<groupId>kr.bydelta</groupId>
42+
<artifactId>koalanlp-komoran_2.11</artifactId>
43+
<version>1.5.1</version>
44+
</dependency>
45+
<dependency>
46+
<groupId>kr.bydelta</groupId>
47+
<artifactId>koalanlp-eunjeon_2.12</artifactId>
48+
<version>1.5.4</version>
49+
</dependency>
50+
<dependency>
51+
<groupId>kr.bydelta</groupId>
52+
<artifactId>koalanlp-kkma_2.12</artifactId>
53+
<classifier>assembly</classifier>
54+
<version>1.5.4</version>
55+
</dependency>
56+
<dependency>
57+
<groupId>kr.bydelta</groupId>
58+
<artifactId>koalanlp-komoran_2.12</artifactId>
59+
<classifier>assembly</classifier>
60+
<version>1.5.4</version>
61+
</dependency>
62+
<dependency>
63+
<groupId>kr.bydelta</groupId>
64+
<artifactId>koalanlp-core_2.12</artifactId>
65+
<version>1.5.4</version>
66+
</dependency>
67+
<dependency>
68+
<groupId>kr.bydelta</groupId>
69+
<artifactId>koalanlp-kryo_2.12</artifactId>
70+
<version>1.5.4</version>
71+
</dependency>
72+
<dependency>
73+
<groupId>net.sf.jung</groupId>
74+
<artifactId>jung-api</artifactId>
75+
<version>2.1.1</version>
76+
</dependency>
77+
<dependency>
78+
<groupId>net.sf.jung</groupId>
79+
<artifactId>jung-graph-impl</artifactId>
80+
<version>2.1.1</version>
81+
</dependency>
82+
<dependency>
83+
<groupId>org.jgrapht</groupId>
84+
<artifactId>jgrapht-core</artifactId>
85+
<version>1.0.1</version>
86+
</dependency>
87+
<dependency>
88+
<groupId>jgraph</groupId>
89+
<artifactId>jgraph</artifactId>
90+
<version>5.13.0.0</version>
91+
</dependency>
92+
</dependencies>
93+
</project>
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package us.narin.summarizer;
2+
3+
import kr.bydelta.koala.data.Morpheme;
4+
import kr.bydelta.koala.data.Sentence;
5+
import kr.bydelta.koala.data.Word;
6+
import kr.bydelta.koala.twt.Tagger;
7+
import org.jgrapht.alg.interfaces.VertexScoringAlgorithm;
8+
import org.jgrapht.alg.scoring.PageRank;
9+
import org.jgrapht.graph.DefaultWeightedEdge;
10+
import org.jgrapht.graph.SimpleDirectedWeightedGraph;
11+
import scala.collection.Iterator;
12+
import us.narin.summarizer.utils.ListUtils;
13+
14+
import java.io.File;
15+
import java.io.FileNotFoundException;
16+
import java.util.*;
17+
18+
public class Summarizer {
19+
20+
public static void main(String[] args) throws FileNotFoundException {
21+
22+
Tagger tagger = new Tagger();
23+
String content = new Scanner(new File("./test.txt")).useDelimiter("\\Z").next();
24+
List<String> keywords = Arrays.asList("๋ถํ•œ", "๋ฏธ์‚ฌ์ผ");
25+
String[] splitSentences = content.split("[.?!\n]");
26+
Map<String, List<String>> parsedSentence = new LinkedHashMap<>();
27+
28+
SimpleDirectedWeightedGraph<String, DefaultWeightedEdge> graph =
29+
new SimpleDirectedWeightedGraph<>
30+
(DefaultWeightedEdge.class);
31+
32+
System.out.println(Arrays.toString(splitSentences));
33+
34+
for (String sentence : splitSentences) {
35+
sentence = sentence.trim();
36+
System.out.println(sentence);
37+
Sentence analyzedSentence = tagger.tagSentence(sentence);
38+
Iterator iterator = analyzedSentence.words().iterator();
39+
List<String> detectedNouns = new ArrayList<>();
40+
41+
while (iterator.hasNext()) {
42+
Word word = (Word) iterator.next();
43+
Iterator wordIterator = word.iterator();
44+
45+
while (wordIterator.hasNext()) {
46+
Morpheme morpheme = (Morpheme) wordIterator.next();
47+
// if (morpheme.isNoun() || morpheme.isPredicate()) {
48+
if (morpheme.isNoun()) {
49+
// WTF - KoalaNLP์˜ Morpheme ํด๋ž˜์Šค์—์„œ ์ˆœ์ˆ˜ํ•œ ๋‹จ์–ด๋งŒ ๊ฐ€์ ธ์˜ค๋Š” ํ•จ์ˆ˜๊ฐ€ ์—†๋‹ค.
50+
String plainWord = morpheme.toString().split("/")[0];
51+
System.out.println(morpheme);
52+
detectedNouns.add(plainWord);
53+
}
54+
}
55+
}
56+
parsedSentence.put(sentence, detectedNouns);
57+
}
58+
59+
System.out.println(parsedSentence);
60+
61+
62+
for (Map.Entry<String, List<String>> entry : parsedSentence.entrySet()) {
63+
String key = entry.getKey();
64+
graph.addVertex(key);
65+
List<String> value = entry.getValue();
66+
67+
System.out.println(key);
68+
System.out.println(value);
69+
70+
}
71+
72+
parsedSentence.entrySet().stream().sorted((o1, o2) -> {
73+
System.out.println("=== COMPARING START ===");
74+
List<String> intersection = ListUtils.intersection(o1.getValue(), o2.getValue());
75+
List<String> union = ListUtils.union(o1.getValue(), o2.getValue());
76+
77+
float similarity = (float) intersection.size() / (float) union.size();
78+
System.out.println(String.format("'%s'์™€ '%s'์˜ ์œ ์‚ฌ๋„๋Š” %f", o1.getKey(), o2.getKey(), similarity));
79+
System.out.println("=== COMPARING END ===");
80+
81+
if (similarity > 0) {
82+
DefaultWeightedEdge e = graph.addEdge(o1.getKey(), o2.getKey());
83+
graph.setEdgeWeight(e, similarity);
84+
}
85+
86+
return 0;
87+
}).forEach(System.out::println);
88+
System.out.println(graph);
89+
90+
VertexScoringAlgorithm<String, Double> pr = new PageRank<>(graph);
91+
92+
pr.getScores().entrySet().stream()
93+
.map(entity -> {
94+
// TODO ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ๊ฐ€ ํฌํ•จ๋œ ๋ฌธ์žฅ์˜ ๊ฒฝ์šฐ ํŽ˜์ด์ง€ ๋žญํฌ์˜ ๊ฐ’์„ ๋”ํ•ด์ค˜์•ผ ํ•จ.
95+
List<String> mutualKeywords = ListUtils.intersection(parsedSentence.get(entity.getKey()), keywords);
96+
if (!mutualKeywords.isEmpty()) {
97+
System.out.println((Double) (entity.getValue() + (entity.getValue() + mutualKeywords.size() / 100)));
98+
}
99+
// entity.setValue((Double) (entity.getValue() + (entity.getValue() + mutalKeywords.size() / 100)));
100+
return entity;
101+
})
102+
.sorted((o1, o2) -> o1.getValue() < o2.getValue() ? 1 : -1)
103+
.forEach(System.out::println);
104+
105+
}
106+
}
107+
108+
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package us.narin.summarizer.utils;
2+
3+
import java.util.ArrayList;
4+
import java.util.HashSet;
5+
import java.util.List;
6+
import java.util.Set;
7+
8+
/**
9+
* Created by endlessdev on 7/8/17.
10+
*/
11+
12+
public class ListUtils {
13+
14+
public static <T> List<T> union(List<T> list1, List<T> list2) {
15+
Set<T> set = new HashSet<>();
16+
17+
set.addAll(list1);
18+
set.addAll(list2);
19+
20+
return new ArrayList<>(set);
21+
}
22+
23+
public static <T> List<T> intersection(List<T> list1, List<T> list2) {
24+
List<T> list = new ArrayList<>();
25+
26+
for (T t : list1) {
27+
if (list2.contains(t)) {
28+
list.add(t);
29+
}
30+
}
31+
32+
return list;
33+
}
34+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package us.narin.summarizer;
2+
3+
import junit.framework.Test;
4+
import junit.framework.TestCase;
5+
import junit.framework.TestSuite;
6+
7+
/**
8+
* Unit test for simple Summarizer.
9+
*/
10+
public class AppTest
11+
extends TestCase
12+
{
13+
/**
14+
* Create the test case
15+
*
16+
* @param testName name of the test case
17+
*/
18+
public AppTest( String testName )
19+
{
20+
super( testName );
21+
}
22+
23+
/**
24+
* @return the suite of tests being tested
25+
*/
26+
public static Test suite()
27+
{
28+
return new TestSuite( AppTest.class );
29+
}
30+
31+
/**
32+
* Rigourous Test :-)
33+
*/
34+
public void testApp()
35+
{
36+
assertTrue( true );
37+
}
38+
}

โ€Žtest.txtโ€Ž

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[์Šคํฌ์ธ ํ•œ๊ตญ ๋ฐ•์†Œ์œค ๊ธฐ์ž] '๊ทธ๊ฒƒ์ด ์•Œ๊ณ  ์‹ถ๋‹ค'๊ฐ€ ๊ณผ๋กœ์‚ฌ์™€ ๊ณผ๋กœ ์ž์‚ด์— ๋Œ€ํ•ด ํŒŒํ—ค์ณค๋‹ค.
2+
3+
8์ผ ๋ฐฉ์†ก๋œ SBS '๊ทธ๊ฒƒ์ด ์•Œ๊ณ  ์‹ถ๋‹ค' 1083ํšŒ๋Š” '์ธ๊ฐ„ ๋ฌดํ•œ์š”๊ธˆ์ œ์˜ ์ง„์‹ค - ๊ณผ๋กœ์ž์‚ด์˜ ์‹œ๋Œ€' ํŽธ์œผ๋กœ ๊พธ๋ฉฐ์กŒ๋‹ค
4+
5+
์ด๋‚  ๋ฐฉ์†ก์—์„œ๋Š” ๊ฒŒ์ž„ยทIT์—…๊ณ„์˜ ํฌ๋Ÿฐ์น˜ ๋ชจ๋“œ๊ฐ€ ์กฐ๋ช…๋๋‹ค. ํฌ๋Ÿฐ์น˜ ๋ชจ๋“œ๋Š” ์„œ๋น„์Šค ๊ฒŒ์ž„ ๋Ÿฐ์นญ ์ „์˜ ๊ฐ•ํ–‰๊ตฐ์„ ๋œปํ•˜๋Š” ๋ง์ด๋‹ค.
6+
7+
๊ถŒ์ƒ์ง‘ ๋™๊ตญ๋Œ€ํ•™๊ต ๊ต์ˆ˜๋Š” "์„ฑ๊ณผ๋ฅผ ๋‚ผ ๋•Œ๊นŒ์ง€ ์ง‘์— ๊ฐ€์ง€ ์•Š๋Š”๋‹ค. ์„ฑ๊ณผ๋ฅผ ๋‚ผ ๋•Œ๊นŒ์ง€ ์šฐ๋ฆฌ๋Š” ํ‡ด๊ทผํ•˜์ง€ ์•Š๋Š”๋‹ค"๋ผ๋ฉฐ "๊ตฐ๋Œ€๋กœ ์น˜๋ฉด ํŠน์ˆ˜ํ›ˆ๋ จ ๋ฐ›๋“ฏ ๊ฐ•ํ–‰๊ตฐ์„ ํŽผ์น˜๋Š” ๊ฒƒ"์ด๋ผ๊ณ  ์„ค๋ช…ํ–ˆ๋‹ค.
8+
9+
์ž„์ข…ํ•œ ๊ฐ€ํ†จ๋ฆญ๋Œ€ํ•™๊ต ์ž‘์—…ํ™˜๊ฒฝ์˜ํ•™๊ณผ ๊ต์ˆ˜๋Š” "60์‹œ๊ฐ„ ์ด์ƒ ์žฅ์‹œ๊ฐ„ ๊ทผ๋กœ๋Š” ์ž์‚ด ์ƒ๊ฐ์„ 30% ์ด์ƒ ์ฆ๊ฐ€์‹œํ‚จ๋‹ค"๋ฉฐ "ํŠนํžˆ ์•ผ๊ฐ„๊ทผ๋ฌด ๊ฐ™์€ ๊ฒฝ์šฐ์—๋Š” 45% ์ •๋„์˜ ์ž์‚ด ์ƒ๊ฐ์„ ์ฆ๊ฐ€์‹œํ‚จ๋‹ค"๊ณ  ๋งํ–ˆ๋‹ค. ์ด์–ด "๊ณผ๋กœ์ž์‚ด์ด๋ผ๋Š” ๊ฒƒ๊ณผ '๋น„๋ก€ํ•ด์„œ ๊ฐ™์ด ๊ฐ„๋‹ค'๊ณ  ๋ด์•ผ ํ•  ๊ฒƒ ๊ฐ™๋‹ค"๊ณ  ๋ง๋ถ™์˜€๋‹ค.
10+
11+
'ํฌ๋Ÿฐ์น˜ ๋ชจ๋“œ'์˜ ๋ฐ˜๋ณต๊ณผ '์ธ๊ฐ„ ๋ฌด์ œํ•œ์š”๊ธˆ์ œ'๋ผ๊ณ  ๋น„์œ ๋˜๋Š” ์žฅ์‹œ๊ฐ„ ๊ทผ๋กœํ™˜๊ฒฝ, ๊ทธ๋ฆ‡๋œ ๊ฒฝ์˜์ง„์˜ ์ด์œค์ถ”๊ตฌ์˜ ๊ทน๋Œ€ํ™”๋กœ ์ธํ•ด IT์—…๊ณ„์—๋Š” 'ํŒ๊ต์˜ ๋“ฑ๋Œ€'์™€ '๊ตฌ๋กœ์˜ ๋“ฑ๋Œ€'๋ผ๋Š” ์€์–ด๊ฐ€ ์žˆ๋‹ค. ๋ถˆ์ด ๊บผ์ง€์ง€ ์•Š๋Š” ํŒ๊ต์™€ ๊ตฌ๋กœ์˜ ๋นŒ๋”ฉ์„ ๋น„์œ ํ•ด '๋“ฑ๋Œ€'๋ผ๊ณ  ๋ถ€๋ฅด๋Š” ๊ฒƒ์ด๋‹ค.
12+
13+
ํ•œํŽธ SBS โ€˜'๊ทธ๊ฒƒ์ด ์•Œ๊ณ  ์‹ถ๋‹ค'๋Š” ๋งค์ฃผ ํ† ์š”์ผ ์ €๋… 11์‹œ 5๋ถ„์— ๋ฐฉ์†ก๋œ๋‹ค.

0 commit comments

Comments
ย (0)