Поиск SUBBYTES переписан на алгоритм Кнута-Моррриса-Пратта, добавлены тесты поиска паттернов

This commit is contained in:
serega6531
2020-04-16 02:08:30 +03:00
parent 07affe3812
commit 9eff4ecfe1
4 changed files with 161 additions and 10 deletions

View File

@@ -23,6 +23,7 @@ import javax.persistence.Id;
@Builder
@Getter
@ToString
@EqualsAndHashCode
public class FoundPattern {
@Id

View File

@@ -1,15 +1,18 @@
package ru.serega6531.packmate.service;
import lombok.SneakyThrows;
import org.apache.commons.lang3.StringUtils;
import org.springframework.security.crypto.codec.Hex;
import ru.serega6531.packmate.model.FoundPattern;
import ru.serega6531.packmate.model.Pattern;
import ru.serega6531.packmate.utils.BytesUtils;
import ru.serega6531.packmate.utils.KMPByteSearcher;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Matcher;
class PatternMatcher {
public class PatternMatcher {
private static final Map<String, java.util.regex.Pattern> compiledPatterns = new HashMap<>();
@@ -19,13 +22,13 @@ class PatternMatcher {
private final Set<FoundPattern> result = new HashSet<>();
PatternMatcher(byte[] contentBytes, List<Pattern> patterns) {
public PatternMatcher(byte[] contentBytes, List<Pattern> patterns) {
this.contentBytes = contentBytes;
this.content = new String(contentBytes);
this.patterns = patterns;
}
Set<FoundPattern> findMatches() {
public Set<FoundPattern> findMatches() {
patterns.forEach(this::match);
return result;
}
@@ -75,25 +78,25 @@ class PatternMatcher {
}
}
@SneakyThrows
private void matchSubbytes(Pattern pattern) {
int startSearch = 0;
final byte[] value = Hex.decode(pattern.getValue());
KMPByteSearcher searcher = new KMPByteSearcher(value);
InputStream is = new ByteArrayInputStream(contentBytes);
while (true) {
int start = BytesUtils.indexOf(contentBytes, value, startSearch, contentBytes.length);
int end = searcher.search(is) - 1;
if (start == -1) {
if (end < 0) {
return;
}
int end = start + value.length - 1;
int start = end - value.length + 1;
addIfPossible(FoundPattern.builder()
.patternId(pattern.getId())
.startPosition(start)
.endPosition(end)
.build());
startSearch = end + 1;
}
}

View File

@@ -0,0 +1,62 @@
package ru.serega6531.packmate.utils;
import lombok.SneakyThrows;
import java.io.InputStream;
/**
* Based on <a href="https://github.com/twitter/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/util/StreamSearcher.java">StreamSearcher</a>
*/
public class KMPByteSearcher {
private byte[] pattern;
private int[] borders;
private int lastEnd = 0;
public KMPByteSearcher(byte[] pattern) {
setPattern(pattern);
}
public void setPattern(byte[] pattern) {
this.pattern = pattern;
this.borders = new int[this.pattern.length + 1];
preProcess();
}
@SneakyThrows
public int search(InputStream stream) {
int bytesRead = 0;
int b;
int j = 0;
while ((b = stream.read()) != -1) {
bytesRead++;
while (j >= 0 && (byte)b != pattern[j]) {
j = borders[j];
}
++j;
if (j == pattern.length) {
lastEnd += bytesRead;
return lastEnd;
}
}
return -1;
}
private void preProcess() {
int i = 0;
int j = -1;
borders[i] = j;
while (i < pattern.length) {
while (j >= 0 && pattern[i] != pattern[j]) {
j = borders[j];
}
borders[++i] = ++j;
}
}
}

View File

@@ -0,0 +1,85 @@
package ru.serega6531.packmate;
import org.junit.jupiter.api.Test;
import org.springframework.util.Assert;
import ru.serega6531.packmate.model.FoundPattern;
import ru.serega6531.packmate.model.Pattern;
import ru.serega6531.packmate.model.enums.PatternSearchType;
import ru.serega6531.packmate.service.PatternMatcher;
import java.util.List;
import java.util.Set;
public class PatternMatcherTest {
@Test
public void testRegex() {
String content = "ahkfkyafceffek";
Set<FoundPattern> correctMatches = Set.of(
FoundPattern.builder()
.startPosition(6)
.endPosition(8)
.build(),
FoundPattern.builder()
.startPosition(9)
.endPosition(11)
.build());
final Pattern pattern = new Pattern();
pattern.setValue("[a-f]{3}");
pattern.setSearchType(PatternSearchType.REGEX);
final PatternMatcher matcher = new PatternMatcher(content.getBytes(), List.of(pattern));
final Set<FoundPattern> matches = matcher.findMatches();
Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString());
}
@Test
public void testSubstring() {
String content = "abaabbaaabaabbbbbbbaaabaaa";
Set<FoundPattern> correctMatches = Set.of(
FoundPattern.builder()
.startPosition(12)
.endPosition(14)
.build(),
FoundPattern.builder()
.startPosition(15)
.endPosition(17)
.build());
final Pattern pattern = new Pattern();
pattern.setValue("bbb");
pattern.setSearchType(PatternSearchType.SUBSTRING);
final PatternMatcher matcher = new PatternMatcher(content.getBytes(), List.of(pattern));
final Set<FoundPattern> matches = matcher.findMatches();
Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString());
}
@Test
public void testSubbytes() {
byte[] content = new byte[]{0x11, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, 0x22};
Set<FoundPattern> correctMatches = Set.of(
FoundPattern.builder()
.startPosition(1)
.endPosition(2)
.build(),
FoundPattern.builder()
.startPosition(3)
.endPosition(4)
.build());
final Pattern pattern = new Pattern();
pattern.setValue("AAaa");
pattern.setSearchType(PatternSearchType.SUBBYTES);
final PatternMatcher matcher = new PatternMatcher(content, List.of(pattern));
final Set<FoundPattern> matches = matcher.findMatches();
Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString());
}
}