-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjaro.cpp
164 lines (148 loc) · 5.76 KB
/
jaro.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
// REF: All links below used for understanding the Jaro-Algorithm:
// https://www.youtube.com/watch?v=s0YSKiFdj8Q&t=916s
// https://www.geeksforgeeks.org/jaro-and-jaro-winkler-similarity/#
// https://stackoverflow.com/questions/32439453/is-there-an-existing-algorithm-in-checking-password-strength-or-re-invent-the-w
#include <string>
#include <iostream>
#include <algorithm>
#include <unordered_set>
#include <fstream>
#include <cctype>
using namespace std;
// This class is made to generate statistics about the relation between two passwords
class passwordRater {
public:
passwordRater(string mainPassword, string mostSimilar, int mostSimilarPosition);
float jaroDist();
float getQuirkScore();
private:
// Used for Jaro/Quirk Distance
int numTranspositions = 0, numMatches = 0;
// Security factors, Used for generating the QuirkScore
string leastUsedCharType;
int setScore = 0, positionScore = 0, quirkScore = 0, lengthScore = 0, mostSimilarPosition = 0;;
// Password and most common similar string
string mainPassword, mostSimilar;
// Private Methods
float quirkDist();
double scoreLength();
float scoreSet();
double scorePosition();
};
// The quirk score represents how unique and secure a password is
float passwordRater::getQuirkScore() {
double quirkScore = 0;
quirkScore += setScore;
quirkScore += positionScore;
quirkScore += quirkScore;
quirkScore += lengthScore;
if (jaroDist() >= 1)
quirkScore /= 100;
return 100 / 19 * quirkScore;
}
// This password runs the Jaro algorithm and computes each security factor's score
passwordRater::passwordRater(string mainPassword, string mostSimilar, int mostSimilarPosition) {
this->mainPassword = mainPassword;
this->mostSimilar = mostSimilar;
this->mostSimilarPosition = mostSimilarPosition;
// searchableDistance is the distance within which a character is considered matching
int searchableDistance;
// s1 will always be longer than s2
string s1 = mainPassword, s2 = mostSimilar;
if (mainPassword.length() < mostSimilar.length())
swap(s1, s2);
searchableDistance = (s1.length() / 2) - 1;
// Any characters counted in a match can only be counted once, so we keep track of them in a set
unordered_set<int> matchedIndices;
int greatestIndex = min(s1.length(), s2.length() + searchableDistance);
// This is the execution of the algorithm
for(int i = 0; i < greatestIndex; i++) {
// Test to see if there is a match
// Find the first unique match
for(int j = max(0, i - searchableDistance); j < min(int(s2.length()), i + searchableDistance + 1); j++) {
if(s1[i] == s2[j] && matchedIndices.find(j) == matchedIndices.end()) {
// If it is not an exact match, its both a match and a transposition
if(i != j)
numTranspositions++;
numMatches++;
matchedIndices.insert(j);
break;
}
}
}
// Update the values for the variables
positionScore = scorePosition();
setScore = scoreSet();
quirkScore = 4 * (1 - quirkDist());
lengthScore = scoreLength();
}
// The longer the password, the greater the length score
double passwordRater::scoreLength() {
return mainPassword.length() * 0.25 - 1;
}
// This returns the Jaro algorithm's distance, or the similarity score
float passwordRater::passwordRater::jaroDist() {
// Calculate and return the Jaro Distance
float jaro = 0;
if (numMatches != 0) {
jaro = float(numMatches) / mainPassword.length();
jaro += float(numMatches) / mostSimilar.length();
jaro += float(numMatches - numTranspositions / 2.0) / numMatches;
jaro /= 3;
}
// Return the jaro value calculated above:
return jaro;
}
// This method returns a modified version of the jaro distance made for passwords
float passwordRater::quirkDist() {
// Calculate and return the Quirky distance
float dist = 0;
if(numMatches != 0) {
dist = 3 * float(numMatches) / mainPassword.length();
dist += float(numMatches - numTranspositions) / numMatches;
dist /= 4;
}
// Return the quirk distance calculated:
return dist;
}
// This method returns a score based on the variance of the character sets used in the password
float passwordRater::scoreSet() {
int setScore = 0, numLower = 0, numUpper = 0, numSpecial = 0, numNumber = 0;
for(char c : mainPassword) {
if(islower(c))
numLower++;
else if(isupper(c))
numUpper++;
else if(isdigit(c))
numNumber++;
else
numSpecial++;
}
// Checking for variances of characters:
if (numUpper > 0)
setScore++;
if(numNumber > 0)
setScore++;
if(numSpecial > 0)
setScore += 2;
int leastOccurringSet = min(numLower, numUpper);
leastOccurringSet = min(leastOccurringSet, numSpecial);
leastOccurringSet = min(leastOccurringSet, numNumber);
// Calculate the variance:
float variance = float(leastOccurringSet) / mainPassword.length();
if (variance >= 0.05)
setScore++;
if(variance >= 0.1)
setScore++;
return float(setScore) * 3/2;
}
// This method returns a score based on how common the most similar password is:
// -- is done when position is lower, as that means its more common! While the ++ for 50000 ensures debugging issues with the score :)
double passwordRater::scorePosition() {
int score = 0;
if(mostSimilarPosition > 25000)
score--;
if(mostSimilarPosition > 50000)
score++;
return score;
}