Поиск SUBBYTES переписан на алгоритм Кнута-Моррриса-Пратта, добавлены тесты поиска паттернов
This commit is contained in:
@@ -23,6 +23,7 @@ import javax.persistence.Id;
|
||||
@Builder
|
||||
@Getter
|
||||
@ToString
|
||||
@EqualsAndHashCode
|
||||
public class FoundPattern {
|
||||
|
||||
@Id
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
package ru.serega6531.packmate.service;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.security.crypto.codec.Hex;
|
||||
import ru.serega6531.packmate.model.FoundPattern;
|
||||
import ru.serega6531.packmate.model.Pattern;
|
||||
import ru.serega6531.packmate.utils.BytesUtils;
|
||||
import ru.serega6531.packmate.utils.KMPByteSearcher;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
class PatternMatcher {
|
||||
public class PatternMatcher {
|
||||
|
||||
private static final Map<String, java.util.regex.Pattern> compiledPatterns = new HashMap<>();
|
||||
|
||||
@@ -19,13 +22,13 @@ class PatternMatcher {
|
||||
|
||||
private final Set<FoundPattern> result = new HashSet<>();
|
||||
|
||||
PatternMatcher(byte[] contentBytes, List<Pattern> patterns) {
|
||||
public PatternMatcher(byte[] contentBytes, List<Pattern> patterns) {
|
||||
this.contentBytes = contentBytes;
|
||||
this.content = new String(contentBytes);
|
||||
this.patterns = patterns;
|
||||
}
|
||||
|
||||
Set<FoundPattern> findMatches() {
|
||||
public Set<FoundPattern> findMatches() {
|
||||
patterns.forEach(this::match);
|
||||
return result;
|
||||
}
|
||||
@@ -75,25 +78,25 @@ class PatternMatcher {
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private void matchSubbytes(Pattern pattern) {
|
||||
int startSearch = 0;
|
||||
final byte[] value = Hex.decode(pattern.getValue());
|
||||
KMPByteSearcher searcher = new KMPByteSearcher(value);
|
||||
InputStream is = new ByteArrayInputStream(contentBytes);
|
||||
|
||||
while (true) {
|
||||
int start = BytesUtils.indexOf(contentBytes, value, startSearch, contentBytes.length);
|
||||
int end = searcher.search(is) - 1;
|
||||
|
||||
if (start == -1) {
|
||||
if (end < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int end = start + value.length - 1;
|
||||
int start = end - value.length + 1;
|
||||
addIfPossible(FoundPattern.builder()
|
||||
.patternId(pattern.getId())
|
||||
.startPosition(start)
|
||||
.endPosition(end)
|
||||
.build());
|
||||
|
||||
startSearch = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
package ru.serega6531.packmate.utils;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Based on <a href="https://github.com/twitter/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/util/StreamSearcher.java">StreamSearcher</a>
|
||||
*/
|
||||
public class KMPByteSearcher {
|
||||
|
||||
private byte[] pattern;
|
||||
private int[] borders;
|
||||
private int lastEnd = 0;
|
||||
|
||||
public KMPByteSearcher(byte[] pattern) {
|
||||
setPattern(pattern);
|
||||
}
|
||||
|
||||
public void setPattern(byte[] pattern) {
|
||||
this.pattern = pattern;
|
||||
this.borders = new int[this.pattern.length + 1];
|
||||
preProcess();
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public int search(InputStream stream) {
|
||||
int bytesRead = 0;
|
||||
|
||||
int b;
|
||||
int j = 0;
|
||||
|
||||
while ((b = stream.read()) != -1) {
|
||||
bytesRead++;
|
||||
|
||||
while (j >= 0 && (byte)b != pattern[j]) {
|
||||
j = borders[j];
|
||||
}
|
||||
++j;
|
||||
|
||||
if (j == pattern.length) {
|
||||
lastEnd += bytesRead;
|
||||
return lastEnd;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private void preProcess() {
|
||||
int i = 0;
|
||||
int j = -1;
|
||||
borders[i] = j;
|
||||
while (i < pattern.length) {
|
||||
while (j >= 0 && pattern[i] != pattern[j]) {
|
||||
j = borders[j];
|
||||
}
|
||||
borders[++i] = ++j;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
85
src/test/java/ru/serega6531/packmate/PatternMatcherTest.java
Normal file
85
src/test/java/ru/serega6531/packmate/PatternMatcherTest.java
Normal file
@@ -0,0 +1,85 @@
|
||||
package ru.serega6531.packmate;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.util.Assert;
|
||||
import ru.serega6531.packmate.model.FoundPattern;
|
||||
import ru.serega6531.packmate.model.Pattern;
|
||||
import ru.serega6531.packmate.model.enums.PatternSearchType;
|
||||
import ru.serega6531.packmate.service.PatternMatcher;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public class PatternMatcherTest {
|
||||
|
||||
@Test
|
||||
public void testRegex() {
|
||||
String content = "ahkfkyafceffek";
|
||||
|
||||
Set<FoundPattern> correctMatches = Set.of(
|
||||
FoundPattern.builder()
|
||||
.startPosition(6)
|
||||
.endPosition(8)
|
||||
.build(),
|
||||
FoundPattern.builder()
|
||||
.startPosition(9)
|
||||
.endPosition(11)
|
||||
.build());
|
||||
|
||||
final Pattern pattern = new Pattern();
|
||||
pattern.setValue("[a-f]{3}");
|
||||
pattern.setSearchType(PatternSearchType.REGEX);
|
||||
|
||||
final PatternMatcher matcher = new PatternMatcher(content.getBytes(), List.of(pattern));
|
||||
final Set<FoundPattern> matches = matcher.findMatches();
|
||||
|
||||
Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSubstring() {
|
||||
String content = "abaabbaaabaabbbbbbbaaabaaa";
|
||||
Set<FoundPattern> correctMatches = Set.of(
|
||||
FoundPattern.builder()
|
||||
.startPosition(12)
|
||||
.endPosition(14)
|
||||
.build(),
|
||||
FoundPattern.builder()
|
||||
.startPosition(15)
|
||||
.endPosition(17)
|
||||
.build());
|
||||
|
||||
final Pattern pattern = new Pattern();
|
||||
pattern.setValue("bbb");
|
||||
pattern.setSearchType(PatternSearchType.SUBSTRING);
|
||||
|
||||
final PatternMatcher matcher = new PatternMatcher(content.getBytes(), List.of(pattern));
|
||||
final Set<FoundPattern> matches = matcher.findMatches();
|
||||
|
||||
Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSubbytes() {
|
||||
byte[] content = new byte[]{0x11, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, 0x22};
|
||||
Set<FoundPattern> correctMatches = Set.of(
|
||||
FoundPattern.builder()
|
||||
.startPosition(1)
|
||||
.endPosition(2)
|
||||
.build(),
|
||||
FoundPattern.builder()
|
||||
.startPosition(3)
|
||||
.endPosition(4)
|
||||
.build());
|
||||
|
||||
final Pattern pattern = new Pattern();
|
||||
pattern.setValue("AAaa");
|
||||
pattern.setSearchType(PatternSearchType.SUBBYTES);
|
||||
|
||||
final PatternMatcher matcher = new PatternMatcher(content, List.of(pattern));
|
||||
final Set<FoundPattern> matches = matcher.findMatches();
|
||||
|
||||
Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString());
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user