Files
firegex-traffic-viewer/backend/binsrc/classes/regex_rules.cpp
2025-02-02 19:54:42 +01:00

162 lines
4.1 KiB
C++

#include <iostream>
#include <cstring>
#include <sstream>
#include "../utils.hpp"
#include <vector>
#include <hs.h>
using namespace std;
#ifndef REGEX_FILTER_HPP
#define REGEX_FILTER_HPP
enum FilterDirection{ CTOS, STOC };
struct decoded_regex {
string regex;
FilterDirection direction;
bool is_case_sensitive;
};
struct regex_ruleset {
hs_database_t* hs_db;
char** regexes;
};
decoded_regex decode_regex(string regex){
size_t arg_len = regex.size();
if (arg_len < 2 || arg_len%2 != 0){
cerr << "[warning] [decode_regex] invalid arg passed (" << regex << "), skipping..." << endl;
throw runtime_error( "Invalid expression len (too small)" );
}
if (regex[0] != '0' && regex[0] != '1'){
cerr << "[warning] [decode_regex] invalid is_case_sensitive (" << regex[0] << ") in '" << regex << "', must be '1' or '0', skipping..." << endl;
throw runtime_error( "Invalid is_case_sensitive" );
}
if (regex[1] != 'C' && regex[1] != 'S'){
cerr << "[warning] [decode_regex] invalid filter_direction (" << regex[1] << ") in '" << regex << "', must be 'C', 'S', skipping..." << endl;
throw runtime_error( "Invalid filter_direction" );
}
string hex(regex.c_str()+2), expr;
if (!unhexlify(hex, expr)){
cerr << "[warning] [decode_regex] invalid hex regex value (" << hex << "), skipping..." << endl;
throw runtime_error( "Invalid hex regex encoded value" );
}
decoded_regex ruleset{
regex: expr,
direction: regex[1] == 'C'? CTOS : STOC,
is_case_sensitive: regex[0] == '1'
};
return ruleset;
}
class RegexRules{
public:
regex_ruleset output_ruleset, input_ruleset;
private:
static inline u_int16_t glob_seq = 0;
u_int16_t version;
vector<pair<string, decoded_regex>> decoded_input_rules;
vector<pair<string, decoded_regex>> decoded_output_rules;
bool is_stream = true;
void free_dbs(){
if (output_ruleset.hs_db != nullptr){
hs_free_database(output_ruleset.hs_db);
}
if (input_ruleset.hs_db != nullptr){
hs_free_database(input_ruleset.hs_db);
}
}
void fill_ruleset(vector<pair<string, decoded_regex>> & decoded, regex_ruleset & ruleset){
size_t n_of_regex = decoded.size();
if (n_of_regex == 0){
return;
}
const char* regex_match_rules[n_of_regex];
unsigned int regex_array_ids[n_of_regex];
unsigned int regex_flags[n_of_regex];
for(int i = 0; i < n_of_regex; i++){
regex_match_rules[i] = decoded[i].second.regex.c_str();
regex_array_ids[i] = i;
regex_flags[i] = HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY;
if (!decoded[i].second.is_case_sensitive){
regex_flags[i] |= HS_FLAG_CASELESS;
}
}
hs_database_t* rebuilt_db;
hs_compile_error_t *compile_err;
if (
hs_compile_multi(
regex_match_rules,
regex_flags,
regex_array_ids,
n_of_regex,
is_stream?HS_MODE_STREAM:HS_MODE_BLOCK,
nullptr,&rebuilt_db, &compile_err
) != HS_SUCCESS
) {
cerr << "[warning] [RegexRules.fill_ruleset] hs_db failed to compile: '" << compile_err->message << "' skipping..." << endl;
hs_free_compile_error(compile_err);
throw runtime_error( "Failed to compile hyperscan db" );
}
ruleset.hs_db = rebuilt_db;
}
public:
RegexRules(vector<string> raw_rules, bool is_stream){
this->is_stream = is_stream;
this->version = ++glob_seq; // 0 version is a invalid version (useful for some logics)
for(string ele : raw_rules){
try{
decoded_regex rule = decode_regex(ele);
if (rule.direction == FilterDirection::CTOS){
decoded_input_rules.push_back(make_pair(ele, rule));
}else{
decoded_output_rules.push_back(make_pair(ele, rule));
}
}catch(...){
throw current_exception();
}
}
fill_ruleset(decoded_input_rules, input_ruleset);
try{
fill_ruleset(decoded_output_rules, output_ruleset);
}catch(...){
free_dbs();
throw current_exception();
}
}
u_int16_t ver(){
return version;
}
RegexRules(bool is_stream){
vector<string> no_rules;
RegexRules(no_rules, is_stream);
}
bool stream_mode(){
return is_stream;
}
RegexRules(){
RegexRules(true);
}
~RegexRules(){
free_dbs();
}
};
#endif // REGEX_FILTER_HPP