经典字符串匹配算法
# 经典字符串匹配算法 KMP
前置知识
- 真前缀与真后缀
- nexts数组实现
- 时间复杂度
Olog(n+m)
public class KMP {
public static void main(String[] args) {
String text = "ababaaaabccababab";
String pattern = "ab";
System.out.println(kmp(text,pattern));
}
public static List<Integer> kmp(String text,String pattern){
List<Integer> ans = new ArrayList<>();
if( text== null || pattern == null ) return ans;
if( text.length() < pattern.length() ) return ans;
int[] nexts = calcNexts(pattern);
int count = 0;
for(int i = 0;i<text.length();i++){
while( count > 0 && text.charAt(i) != pattern.charAt(count)){
count = nexts[count-1];
}
if( text.charAt(i) == pattern.charAt(count)){
count++;
}
if( count == pattern.length() ){
ans.add(i - count + 1);
count = nexts[count-1];
}
}
return ans;
}
private static int[] calcNexts(String pattern){
int[] nexts = new int[pattern.length()];
int count = 0;
for(int i = 1;i<pattern.length();i++){
while( count > 0 && pattern.charAt(i) != pattern.charAt(count) ){
count = nexts[count-1]; // back
}
if( pattern.charAt(i) == pattern.charAt(count)) count++;
nexts[i] = count;
}
return nexts;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
function KMP(text, pattren) {
if (!text || !pattren) return null;
if (text.length < pattren.length) return null
function calcNext(s) {
if (!s) return null
let nexts = Array.from({ length: s.length }).fill(0)
// console.log(nexts)
let count = 0
for (let i = 1; i < s.length; i++) {
while (count > 0 && s.charAt(count) != s.charAt(i)) {
count = nexts[count - 1]
}
if (s.charAt(count) == s.charAt(i)) count++;
nexts[i] = count
}
return nexts
}
let nexts = calcNext(pattren)
if (!nexts) return null
let res = []
let count = 0
for (let i = 0; i < text.length; i++) {
while (count > 0 && text.charAt(i) != pattren.charAt(count)) {
count = nexts[count - 1]
}
if (text.charAt(i) == pattren.charAt(count)) count++;
if (count == pattren.length) {
res.push(i - count + 1)
count = nexts[count - 1] // 回退
}
}
return res
}
function search(text, pattren) {
if (!text || !pattren) return null;
if (text.length < pattren.length) return null
let res = []
for (let i = 0; i < text.length - pattren.length + 1; i++) {
let s = text.substring(i, i + pattren.length)
if (s == pattren) res.push(i)
}
return res
}
function ok(arr1, arr2) {
if( arr1 == arr2){
console.log('ok')
return
}
if( !Array.isArray(arr1) || !Array.isArray(arr2)){
console.log('error')
return;
}
if (arr1.length != arr2.length) {
console.log('error!')
return
}
let f = true
for (let i = 0; i < arr1.length; i++) {
if (arr1[i] != arr2[i]) {
f = false;
break;
}
}
if (f) {
console.log('ok')
} else {
console.log('error')
}
}
let text = 'abcdefababcabadfasafddasfasababbbbasdewtfseaqwfrjslakdfab'
let pattren = 'ab'
console.log(text, pattren)
console.log(KMP(text, pattren))
console.log(search(text, pattren))
ok(KMP(text,pattren),search(text,pattren))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# kmp
def kmp(text,pattren):
if not text or not pattren or len(text) < len(pattren):
return []
n,m = len(pattren),len(text)
def nexts():
l = [0 for _ in range(n)]
cnt = 0
for i in range(1,n):
while cnt > 0 and pattren[i] != pattren[cnt]:
cnt = l[cnt-1]
if pattren[i] == pattren[cnt]:
cnt += 1
l[i] = cnt
return l
nxt = nexts()
# print('nxt',nxt)
cnt = 0
res = []
for i in range(0,m - n + 1):
while cnt > 0 and text[i] != pattren[cnt]:
cnt = nxt[cnt-1]
if text[i] == pattren[cnt]:
cnt += 1
if cnt == n:
res.append(i - cnt + 1)
cnt = nxt[cnt-1]
return res
# 普通方法
def serach(text,pattren):
if not text or not pattren or len(text) < len(pattren):
return []
n,m = len(pattren),len(text)
res = []
for i in range(0,m - n + 1):
if text[i:n+i] == pattren:
res.append(i)
return res
s1 = 'abceababadfdsfasdfasfaabaaababababdfsd3534fdajkhjkghabacabadd'
p1 = 'ab'
print('res',kmp(s1,p1))
print('res',serach(s1,p1))
print(kmp(s1,p1) == serach(s1,p1))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
// Make sure to add code blocks to your code group
# 参考链接
编辑 (opens new window)
上次更新: 2024-05-21, 09:50:09