Skip to content

Commit 902cfc6

Browse files
author
i guest
committed
init
0 parents  commit 902cfc6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+26422
-0
lines changed

algorithm/algorithm.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#ifndef _alogrithm_h_
2+
#define _alogrithm_h_
3+
4+
#ifdef __cplusplus
5+
extern "C" {
6+
#endif
7+
8+
///longest common sequence
9+
///@param[in] s1 string source
10+
///@param[in] s2 string source
11+
///@param[out] seq result sequence
12+
///@param[in] len seq buffer size(bytes)
13+
///@return <0-error, 0-ok, >0=need more buffer
14+
int lcs(const char* s1, const char* s2, char* seq, int len);
15+
16+
///longest common substring
17+
///@param[in] s1 string source
18+
///@param[in] s2 string source
19+
///@param[out] sub common substring
20+
///@param[in] len substring buffer size(bytes)
21+
///@return <0-error, 0-ok, >0=need more buffer
22+
int strsubstring(const char* s1, const char* s2, char* sub, int len);
23+
24+
///Knuth-Morris-Pratt Algorithm
25+
///@param[in] s string
26+
///@param[in] pattern substring
27+
///@return 0-can't find substring, other-substring pointer
28+
const char* kmp(const char* s, const char* pattern);
29+
30+
#ifdef __cplusplus
31+
}
32+
#endif
33+
34+
#endif /* !_alogrithm_h_ */

algorithm/kmp.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Knuth-Morris-Pratt Algorithm
2+
// http://www.ics.uci.edu/~eppstein/161/960227.html
3+
4+
#include <stdio.h>
5+
#include <stdlib.h>
6+
#include <string.h>
7+
#include <assert.h>
8+
9+
static void kmp_overlap(const char* pattern, int n, int* overlap)
10+
{
11+
int i, j;
12+
13+
overlap[0] = 0;
14+
15+
for(i=0, j=1; j<n; j++)
16+
{
17+
assert(i < n);
18+
if(pattern[j] == pattern[i])
19+
{
20+
overlap[j] = ++i;
21+
}
22+
else
23+
{
24+
i = 0;
25+
overlap[j] = 0;
26+
}
27+
}
28+
}
29+
30+
static const char* kmp_match(const char* s, const char* pattern, int n1, int n2, int* overlap)
31+
{
32+
int i, j;
33+
34+
i = 0;
35+
j = 0;
36+
while(i < n1 && j<n2)
37+
{
38+
//assert(i+j >= 0 && i+j<n1);
39+
if(s[i] == pattern[j])
40+
{
41+
++j;
42+
++i;
43+
}
44+
else
45+
{
46+
j = j>0?overlap[j-1]:0;
47+
i += j>0?0:1;
48+
}
49+
}
50+
51+
assert(i>=j);
52+
return j==n2?s+i-j:0;
53+
}
54+
55+
const char* kmp(const char* s, const char* pattern)
56+
{
57+
int n1, n2;
58+
int* overlap;
59+
const char* p;
60+
61+
assert(pattern);
62+
n1 = strlen(s);
63+
n2 = strlen(pattern);
64+
overlap = (int*)malloc(sizeof(int)*(n2+1));
65+
if(!overlap)
66+
return NULL;
67+
68+
kmp_overlap(pattern, n2, overlap);
69+
p = kmp_match(s, pattern, n1, n2, overlap);
70+
71+
free(overlap);
72+
return p;
73+
}

algorithm/lcs.c

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Longest Common Subsequences(not Longest Common Substring)
2+
//
3+
// lcs("nematode knowledge", "empty bottle") => "emt ole";
4+
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <assert.h>
8+
#include <errno.h>
9+
10+
#define MAX(a, b) ((a)>(b)?(a):(b));
11+
#define LEN(i, j) lcs_length(m, n1, n2, i, j)
12+
13+
static int lcs_length(int* m, int n1, int n2, int i, int j)
14+
{
15+
return (i<0||j<0) ? 0 : m[i*n2 + j];
16+
}
17+
18+
static int lcs_alogrithm(const char* s1, const char* s2, int n1, int n2, int* m)
19+
{
20+
int i, j;
21+
22+
for(i=0; i<n1; ++i)
23+
{
24+
for(j=0; j<n2; ++j)
25+
{
26+
if(s1[i] == s2[j])
27+
{
28+
m[i*n2 + j] = LEN(i-1, j-1)+1;
29+
}
30+
else
31+
{
32+
m[i*n2 + j] = MAX(LEN(i-1, j), LEN(i, j-1));
33+
}
34+
}
35+
}
36+
37+
return m[n1*n2-1];
38+
}
39+
40+
static void lcs_sequence(const char* s1, const char* s2, int n1, int n2, const int* m, char* seq)
41+
{
42+
int i, j;
43+
int lcs;
44+
45+
lcs = m[n1*n2-1];
46+
47+
for(i=n1-1; i>=0; --i)
48+
{
49+
for(j=n2-1; j>=0; --j)
50+
{
51+
if(LEN(i, j)==lcs && LEN(i,j)>LEN(i-1, j) && LEN(i, j)>LEN(i, j-1))
52+
{
53+
assert(LEN(i, j) > LEN(i-1, j-1));
54+
seq[--lcs] = s1[i];
55+
break;
56+
}
57+
}
58+
59+
if(lcs < 0)
60+
break;
61+
}
62+
}
63+
64+
static void lcs_print(const char* s1, const char* s2, int n1, int n2, int* m)
65+
{
66+
int i, j;
67+
68+
for(i=0; i<n2; i++)
69+
printf("\t%c", s2[i]);
70+
printf("\n");
71+
72+
for(i=0; i<n1; i++)
73+
{
74+
printf("%c\t", s1[i]);
75+
for(j=0; j<n2; j++)
76+
{
77+
printf("%d\t", LEN(i, j));
78+
}
79+
printf("\n");
80+
}
81+
}
82+
83+
/// longest common sequence
84+
/// @param[in] s1 string source
85+
/// @param[in] s2 string source
86+
/// @param[out] seq result sequence(seq can be s2, but can't be s1)
87+
/// @param[in] len seq buffer size(bytes)
88+
/// @return <0-error, 0-ok, >0=need more seq buffer
89+
int lcs(const char* s1, const char* s2, char* seq, int len)
90+
{
91+
int* m;
92+
int n1, n2;
93+
int lcs;
94+
95+
n1 = strlen(s1);
96+
n2 = strlen(s2);
97+
98+
m = (int*)malloc(n1*n2*sizeof(int));
99+
if(!m)
100+
return -ENOMEM;
101+
102+
lcs = lcs_alogrithm(s1, s2, n1, n2, m);
103+
104+
lcs_print(s1, s2, n1, n2, m);
105+
106+
if(len < lcs)
107+
{
108+
free(m);
109+
return lcs;
110+
}
111+
seq[lcs] = 0;
112+
lcs_sequence(s1, s2, n1, n2, m, seq);
113+
114+
free(m);
115+
return 0;
116+
}

algorithm/substring.c

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
// longest common substring(not longest common sequence)
2+
//
3+
// strsubstring("banananobano", "xanano") => "anano"
4+
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <assert.h>
8+
#include <errno.h>
9+
10+
#define MAX(a, b) ((a)>(b)?(a):(b));
11+
#define LEN(i, j) lcs_length(m, n1, n2, i, j)
12+
13+
static int lcs_length(int* m, int n1, int n2, int i, int j)
14+
{
15+
return (i<0||j<0) ? 0 : m[i*n2 + j];
16+
}
17+
18+
static int lcs_alogrithm(const char* s1, const char* s2, int n1, int n2, int* m)
19+
{
20+
int i, j;
21+
int lcs, idx;
22+
23+
lcs = 0;
24+
idx = 0;
25+
26+
for(i=0; i<n1; ++i)
27+
{
28+
for(j=0; j<n2; ++j)
29+
{
30+
if(s1[i] == s2[j])
31+
{
32+
m[i*n2 + j] = LEN(i-1, j-1)+1;
33+
34+
// longest
35+
if(m[i*n2 + j] > lcs)
36+
{
37+
lcs = m[i*n2 + j];
38+
idx = i*n2 + j;
39+
}
40+
41+
}
42+
else
43+
{
44+
m[i*n2 + j] = 0;
45+
}
46+
}
47+
}
48+
49+
return idx;
50+
}
51+
52+
static void lcs_substring(const char* s1, const char* s2, int n1, int n2, const int* m, int idx, char* sub)
53+
{
54+
int i, j;
55+
int lcs;
56+
57+
for(i=idx/n2, j=idx%n2; i>=0 && j>0; --i, --j)
58+
{
59+
lcs = LEN(i, j);
60+
if(lcs <= 0)
61+
break;
62+
63+
assert(LEN(i, j) > LEN(i-1, j-1));
64+
sub[lcs - 1] = s1[i];
65+
}
66+
}
67+
68+
static void lcs_print(const char* s1, const char* s2, int n1, int n2, int* m)
69+
{
70+
int i, j;
71+
72+
for(i=0; i<n2; i++)
73+
printf("\t%c", s2[i]);
74+
printf("\n");
75+
76+
for(i=0; i<n1; i++)
77+
{
78+
printf("%c\t", s1[i]);
79+
for(j=0; j<n2; j++)
80+
{
81+
printf("%d\t", LEN(i, j));
82+
}
83+
printf("\n");
84+
}
85+
}
86+
87+
///longest common substring
88+
///@param[in] s1 string source
89+
///@param[in] s2 string source
90+
///@param[out] sub common substring
91+
///@param[in] len substring buffer size(bytes)
92+
///@return <0-error, 0-ok, >0=need more buffer
93+
int strsubstring(const char* s1, const char* s2, char* sub, int len)
94+
{
95+
int* m;
96+
int n1, n2;
97+
int idx;
98+
99+
n1 = strlen(s1);
100+
n2 = strlen(s2);
101+
if(0==n1 || 0==n2)
102+
return 0;
103+
104+
m = (int*)malloc(n1*n2*sizeof(int));
105+
if(!m)
106+
return -ENOMEM;
107+
108+
idx = lcs_alogrithm(s1, s2, n1, n2, m);
109+
110+
//lcs_print(s1, s2, n1, n2, m);
111+
112+
if(len < m[idx])
113+
{
114+
free(m);
115+
return m[idx];
116+
}
117+
sub[m[idx]] = 0;
118+
lcs_substring(s1, s2, n1, n2, m, idx, sub);
119+
120+
free(m);
121+
return 0;
122+
}

0 commit comments

Comments
 (0)