-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCosineSimilarity
39 lines (33 loc) · 1.17 KB
/
CosineSimilarity
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
package com.test.document.parser;
/**
* Cosine similarity calculator class
* @author Sandeep Naik
*/
public class CosineSimilarity {
/**
* Method to calculate cosine similarity between two documents.
* @param docVector1 : document vector 1 (a)
* @param docVector2 : document vector 2 (b)
* @return
*/
public double cosineSimilarity(double[] docVector1, double[] docVector2) {
double dotProduct = 0.0;
double magnitude1 = 0.0;
double magnitude2 = 0.0;
double cosineSimilarity = 0.0;
for (int i = 0; i < docVector1.length; i++) //docVector1 and docVector2 must be of same length
{
dotProduct += docVector1[i] * docVector2[i]; //a.b
magnitude1 += Math.pow(docVector1[i], 2); //(a^2)
magnitude2 += Math.pow(docVector2[i], 2); //(b^2)
}
magnitude1 = Math.sqrt(magnitude1);//sqrt(a^2)
magnitude2 = Math.sqrt(magnitude2);//sqrt(b^2)
if (magnitude1 != 0.0 | magnitude2 != 0.0) {
cosineSimilarity = dotProduct / (magnitude1 * magnitude2);
} else {
return 0.0;
}
return cosineSimilarity;
}
}