From 9eff4ecfe193537d34e5b78044672e92887942de Mon Sep 17 00:00:00 2001 From: serega6531 Date: Thu, 16 Apr 2020 02:08:30 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D0=B8=D1=81=D0=BA=20SUBBYTES=20?= =?UTF-8?q?=D0=BF=D0=B5=D1=80=D0=B5=D0=BF=D0=B8=D1=81=D0=B0=D0=BD=20=D0=BD?= =?UTF-8?q?=D0=B0=20=D0=B0=D0=BB=D0=B3=D0=BE=D1=80=D0=B8=D1=82=D0=BC=20?= =?UTF-8?q?=D0=9A=D0=BD=D1=83=D1=82=D0=B0-=D0=9C=D0=BE=D1=80=D1=80=D1=80?= =?UTF-8?q?=D0=B8=D1=81=D0=B0-=D0=9F=D1=80=D0=B0=D1=82=D1=82=D0=B0,=20?= =?UTF-8?q?=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D1=8B=20=D1=82?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D1=8B=20=D0=BF=D0=BE=D0=B8=D1=81=D0=BA=D0=B0?= =?UTF-8?q?=20=D0=BF=D0=B0=D1=82=D1=82=D0=B5=D1=80=D0=BD=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../packmate/model/FoundPattern.java | 1 + .../packmate/service/PatternMatcher.java | 23 ++--- .../packmate/utils/KMPByteSearcher.java | 62 ++++++++++++++ .../packmate/PatternMatcherTest.java | 85 +++++++++++++++++++ 4 files changed, 161 insertions(+), 10 deletions(-) create mode 100644 src/main/java/ru/serega6531/packmate/utils/KMPByteSearcher.java create mode 100644 src/test/java/ru/serega6531/packmate/PatternMatcherTest.java diff --git a/src/main/java/ru/serega6531/packmate/model/FoundPattern.java b/src/main/java/ru/serega6531/packmate/model/FoundPattern.java index f1721d1..1d33786 100644 --- a/src/main/java/ru/serega6531/packmate/model/FoundPattern.java +++ b/src/main/java/ru/serega6531/packmate/model/FoundPattern.java @@ -23,6 +23,7 @@ import javax.persistence.Id; @Builder @Getter @ToString +@EqualsAndHashCode public class FoundPattern { @Id diff --git a/src/main/java/ru/serega6531/packmate/service/PatternMatcher.java b/src/main/java/ru/serega6531/packmate/service/PatternMatcher.java index 288777c..dd01439 100644 --- a/src/main/java/ru/serega6531/packmate/service/PatternMatcher.java +++ b/src/main/java/ru/serega6531/packmate/service/PatternMatcher.java @@ -1,15 +1,18 @@ package ru.serega6531.packmate.service; +import lombok.SneakyThrows; import org.apache.commons.lang3.StringUtils; import org.springframework.security.crypto.codec.Hex; import ru.serega6531.packmate.model.FoundPattern; import ru.serega6531.packmate.model.Pattern; -import ru.serega6531.packmate.utils.BytesUtils; +import ru.serega6531.packmate.utils.KMPByteSearcher; +import java.io.ByteArrayInputStream; +import java.io.InputStream; import java.util.*; import java.util.regex.Matcher; -class PatternMatcher { +public class PatternMatcher { private static final Map compiledPatterns = new HashMap<>(); @@ -19,13 +22,13 @@ class PatternMatcher { private final Set result = new HashSet<>(); - PatternMatcher(byte[] contentBytes, List patterns) { + public PatternMatcher(byte[] contentBytes, List patterns) { this.contentBytes = contentBytes; this.content = new String(contentBytes); this.patterns = patterns; } - Set findMatches() { + public Set findMatches() { patterns.forEach(this::match); return result; } @@ -75,25 +78,25 @@ class PatternMatcher { } } + @SneakyThrows private void matchSubbytes(Pattern pattern) { - int startSearch = 0; final byte[] value = Hex.decode(pattern.getValue()); + KMPByteSearcher searcher = new KMPByteSearcher(value); + InputStream is = new ByteArrayInputStream(contentBytes); while (true) { - int start = BytesUtils.indexOf(contentBytes, value, startSearch, contentBytes.length); + int end = searcher.search(is) - 1; - if (start == -1) { + if (end < 0) { return; } - int end = start + value.length - 1; + int start = end - value.length + 1; addIfPossible(FoundPattern.builder() .patternId(pattern.getId()) .startPosition(start) .endPosition(end) .build()); - - startSearch = end + 1; } } diff --git a/src/main/java/ru/serega6531/packmate/utils/KMPByteSearcher.java b/src/main/java/ru/serega6531/packmate/utils/KMPByteSearcher.java new file mode 100644 index 0000000..361d5f3 --- /dev/null +++ b/src/main/java/ru/serega6531/packmate/utils/KMPByteSearcher.java @@ -0,0 +1,62 @@ +package ru.serega6531.packmate.utils; + +import lombok.SneakyThrows; + +import java.io.InputStream; + +/** + * Based on StreamSearcher + */ +public class KMPByteSearcher { + + private byte[] pattern; + private int[] borders; + private int lastEnd = 0; + + public KMPByteSearcher(byte[] pattern) { + setPattern(pattern); + } + + public void setPattern(byte[] pattern) { + this.pattern = pattern; + this.borders = new int[this.pattern.length + 1]; + preProcess(); + } + + @SneakyThrows + public int search(InputStream stream) { + int bytesRead = 0; + + int b; + int j = 0; + + while ((b = stream.read()) != -1) { + bytesRead++; + + while (j >= 0 && (byte)b != pattern[j]) { + j = borders[j]; + } + ++j; + + if (j == pattern.length) { + lastEnd += bytesRead; + return lastEnd; + } + } + + return -1; + } + + private void preProcess() { + int i = 0; + int j = -1; + borders[i] = j; + while (i < pattern.length) { + while (j >= 0 && pattern[i] != pattern[j]) { + j = borders[j]; + } + borders[++i] = ++j; + } + } + +} diff --git a/src/test/java/ru/serega6531/packmate/PatternMatcherTest.java b/src/test/java/ru/serega6531/packmate/PatternMatcherTest.java new file mode 100644 index 0000000..f550c0f --- /dev/null +++ b/src/test/java/ru/serega6531/packmate/PatternMatcherTest.java @@ -0,0 +1,85 @@ +package ru.serega6531.packmate; + +import org.junit.jupiter.api.Test; +import org.springframework.util.Assert; +import ru.serega6531.packmate.model.FoundPattern; +import ru.serega6531.packmate.model.Pattern; +import ru.serega6531.packmate.model.enums.PatternSearchType; +import ru.serega6531.packmate.service.PatternMatcher; + +import java.util.List; +import java.util.Set; + +public class PatternMatcherTest { + + @Test + public void testRegex() { + String content = "ahkfkyafceffek"; + + Set correctMatches = Set.of( + FoundPattern.builder() + .startPosition(6) + .endPosition(8) + .build(), + FoundPattern.builder() + .startPosition(9) + .endPosition(11) + .build()); + + final Pattern pattern = new Pattern(); + pattern.setValue("[a-f]{3}"); + pattern.setSearchType(PatternSearchType.REGEX); + + final PatternMatcher matcher = new PatternMatcher(content.getBytes(), List.of(pattern)); + final Set matches = matcher.findMatches(); + + Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString()); + } + + @Test + public void testSubstring() { + String content = "abaabbaaabaabbbbbbbaaabaaa"; + Set correctMatches = Set.of( + FoundPattern.builder() + .startPosition(12) + .endPosition(14) + .build(), + FoundPattern.builder() + .startPosition(15) + .endPosition(17) + .build()); + + final Pattern pattern = new Pattern(); + pattern.setValue("bbb"); + pattern.setSearchType(PatternSearchType.SUBSTRING); + + final PatternMatcher matcher = new PatternMatcher(content.getBytes(), List.of(pattern)); + final Set matches = matcher.findMatches(); + + Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString()); + } + + @Test + public void testSubbytes() { + byte[] content = new byte[]{0x11, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, 0x22}; + Set correctMatches = Set.of( + FoundPattern.builder() + .startPosition(1) + .endPosition(2) + .build(), + FoundPattern.builder() + .startPosition(3) + .endPosition(4) + .build()); + + final Pattern pattern = new Pattern(); + pattern.setValue("AAaa"); + pattern.setSearchType(PatternSearchType.SUBBYTES); + + final PatternMatcher matcher = new PatternMatcher(content, List.of(pattern)); + final Set matches = matcher.findMatches(); + + Assert.isTrue(matches.equals(correctMatches), "Incorrect search: " + matches.toString()); + } + +}