Поиск SUBSTRING переписан на алгоритм Кнута-Моррриса-Пратта

This commit is contained in:
serega6531
2020-04-16 02:21:42 +03:00
parent 9eff4ecfe1
commit 6974869975
2 changed files with 69 additions and 7 deletions

View File

@@ -1,14 +1,15 @@
package ru.serega6531.packmate.service; package ru.serega6531.packmate.service;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import org.apache.commons.lang3.StringUtils;
import org.springframework.security.crypto.codec.Hex; import org.springframework.security.crypto.codec.Hex;
import ru.serega6531.packmate.model.FoundPattern; import ru.serega6531.packmate.model.FoundPattern;
import ru.serega6531.packmate.model.Pattern; import ru.serega6531.packmate.model.Pattern;
import ru.serega6531.packmate.utils.KMPByteSearcher; import ru.serega6531.packmate.utils.KMPByteSearcher;
import ru.serega6531.packmate.utils.KMPStringSearcher;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringReader;
import java.util.*; import java.util.*;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@@ -57,24 +58,23 @@ public class PatternMatcher {
} }
private void matchSubstring(Pattern pattern) { private void matchSubstring(Pattern pattern) {
int startSearch = 0;
final String value = pattern.getValue(); final String value = pattern.getValue();
KMPStringSearcher searcher = new KMPStringSearcher(value.toCharArray());
StringReader reader = new StringReader(content);
while (true) { while (true) {
int start = StringUtils.indexOfIgnoreCase(content, value, startSearch); int end = searcher.search(reader) - 1;
if (start == -1) { if (end < 0) {
return; return;
} }
int end = start + value.length() - 1; int start = end - value.length() + 1;
addIfPossible(FoundPattern.builder() addIfPossible(FoundPattern.builder()
.patternId(pattern.getId()) .patternId(pattern.getId())
.startPosition(start) .startPosition(start)
.endPosition(end) .endPosition(end)
.build()); .build());
startSearch = end + 1;
} }
} }

View File

@@ -0,0 +1,62 @@
package ru.serega6531.packmate.utils;
import lombok.SneakyThrows;
import java.io.StringReader;
/**
* Based on <a href="https://github.com/twitter/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/util/StreamSearcher.java">StreamSearcher</a>
*/
public class KMPStringSearcher {
private char[] pattern;
private int[] borders;
private int lastEnd = 0;
public KMPStringSearcher(char[] pattern) {
setPattern(pattern);
}
public void setPattern(char[] pattern) {
this.pattern = pattern;
this.borders = new int[this.pattern.length + 1];
preProcess();
}
@SneakyThrows
public int search(StringReader reader) {
int bytesRead = 0;
int b;
int j = 0;
while ((b = reader.read()) != -1) {
bytesRead++;
while (j >= 0 && (char)b != pattern[j]) {
j = borders[j];
}
++j;
if (j == pattern.length) {
lastEnd += bytesRead;
return lastEnd;
}
}
return -1;
}
private void preProcess() {
int i = 0;
int j = -1;
borders[i] = j;
while (i < pattern.length) {
while (j >= 0 && pattern[i] != pattern[j]) {
j = borders[j];
}
borders[++i] = ++j;
}
}
}