Skip to content

Commit cdc7195

Browse files
authored
SONARPY-2080 Move ShannonEntropy to analyzer commons (#340)
1 parent 0e2709c commit cdc7195

File tree

3 files changed

+103
-1
lines changed

3 files changed

+103
-1
lines changed

commons/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106
<!-- Warning: Please consider carefully when increasing the size of this shared library, it might have
107107
impact on all our analyzers! -->
108108
<minsize>100000</minsize>
109-
<maxsize>120000</maxsize>
109+
<maxsize>122000</maxsize>
110110
<files>
111111
<file>${project.build.directory}/${project.build.finalName}.jar</file>
112112
</files>
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* SonarSource Analyzers Commons
3+
* Copyright (C) 2009-2024 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonarsource.analyzer.commons;
21+
22+
import java.util.function.Function;
23+
import java.util.stream.Collectors;
24+
import javax.annotation.Nullable;
25+
26+
public class ShannonEntropy {
27+
private static final double LOG_2 = Math.log(2.0d);
28+
29+
private ShannonEntropy() {
30+
// utility class
31+
}
32+
33+
public static double calculate(@Nullable String str) {
34+
if (str == null || str.isEmpty()) {
35+
return 0.0d;
36+
}
37+
int length = str.length();
38+
return str.chars()
39+
.boxed()
40+
.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))
41+
.values()
42+
.stream()
43+
.map(Long::doubleValue)
44+
.mapToDouble(count -> count / length)
45+
.map(frequency -> -frequency * Math.log(frequency))
46+
.sum() / LOG_2;
47+
}
48+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* SonarSource Analyzers Commons
3+
* Copyright (C) 2009-2024 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonarsource.analyzer.commons;
21+
22+
import org.assertj.core.data.Offset;
23+
import org.junit.Test;
24+
25+
import static org.assertj.core.api.Assertions.assertThat;
26+
import static org.assertj.core.api.Assertions.within;
27+
28+
public class ShannonEntropyTest {
29+
private static final Offset<Double> WITHIN_5 = within(0.00001);
30+
31+
@Test
32+
public void calculate_empty() {
33+
assertThat(ShannonEntropy.calculate("")).isEqualTo(0.0d);
34+
assertThat(ShannonEntropy.calculate(null)).isEqualTo(0.0d);
35+
}
36+
37+
@Test
38+
public void calculate_base_2() {
39+
assertThat(ShannonEntropy.calculate("ab")).isEqualTo(1.0d);
40+
}
41+
42+
@Test
43+
public void calculate_from_sonar_java() {
44+
assertThat(ShannonEntropy.calculate("0000000000000000000000000000000000000000")).isEqualTo(0.000000, WITHIN_5);
45+
assertThat(ShannonEntropy.calculate("0000000000000000000011111111111111111111")).isEqualTo(1.000000, WITHIN_5);
46+
assertThat(ShannonEntropy.calculate("0000000000111111111122222222223333333333")).isEqualTo(2.000000, WITHIN_5);
47+
assertThat(ShannonEntropy.calculate("0000011111222223333344444555556666677777")).isEqualTo(3.000000, WITHIN_5);
48+
assertThat(ShannonEntropy.calculate("0123456789abcdef0123456789abcdef01234567")).isEqualTo(3.970950, WITHIN_5);
49+
assertThat(ShannonEntropy.calculate("0123456789ABCDabcdefghijklmnopqrstuvwxyz")).isEqualTo(5.321928, WITHIN_5);
50+
assertThat(ShannonEntropy.calculate("0040878d3579659158d09ad09b6a9849d18e0e22")).isEqualTo(3.587326, WITHIN_5);
51+
assertThat(ShannonEntropy.calculate("06c6d5715a1ede6c51fc39ff67fd647f740b656d")).isEqualTo(3.552655, WITHIN_5);
52+
assertThat(ShannonEntropy.calculate("qAhEMdXy/MPwEuDlhh7O0AFBuzGvNy7AxpL3sX3q")).isEqualTo(4.684183, WITHIN_5);
53+
}
54+
}

0 commit comments

Comments
 (0)