Skip to content

Commit

Permalink
Merge pull request #164 from mhoffm-aiven/mhoffm-use-re2
Browse files Browse the repository at this point in the history
misc: use re2 if configured
  • Loading branch information
jshuping-aiven authored Feb 27, 2024
2 parents 7bd8947 + fd641c0 commit 3f80062
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 9 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ SHELL=/bin/bash
short_ver = 2.1.3
long_ver = $(shell git describe --long 2>/dev/null || echo $(short_ver)-0-unknown-g`git describe --always`)

USE_RE2=${USE_RE2}

all: py-egg

PYTHON ?= python3
Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ Using backrefs, the message can also be restructured into a new format.
}
]

Secret filters and searches can be made to use re2 as a regex engine by running journalpump with the environment "USE_RE2=yes". Make sure that the PyPI package "google_re2" is installed with at least version 1.1

``secret_filter_metrics`` ( default: ``false``)
Change this setting to true to emit metrics to the metrics host whenever a secret pattern is matched.
Expand Down
8 changes: 7 additions & 1 deletion journalpump/journalpump.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@
import fnmatch
import json
import logging
import re
import os
import select
import time
import uuid

# NOTE: make sure to use google-re >= 1.1 if this is enabled.
if os.environ.get("USE_RE2"):
import re2 as re
else:
import re # type: ignore[no-redef]

_5_MB = 5 * 1024 * 1024
CHUNK_SIZE = 5000

Expand Down
8 changes: 7 additions & 1 deletion journalpump/senders/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
from typing import Dict, Optional

import logging
import os
import random
import re
import sys
import time

# NOTE: make sure to use google-re >= 1.1
if os.environ.get("USE_RE2"):
import re2 as re
else:
import re # type: ignore[no-redef]

KAFKA_COMPRESSED_MESSAGE_OVERHEAD = 30
MAX_KAFKA_MESSAGE_SIZE = 1024**2 # 1 MiB

Expand Down
6 changes: 1 addition & 5 deletions journalpump/senders/elasticsearch_opensearch_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import enum
import json
import re
import time


Expand Down Expand Up @@ -72,8 +71,6 @@ def create(*, sender_type: SenderType, config: Dict[str, Any]) -> "Config":
class _EsOsLogSenderBase(LogSender):
_DEFAULT_MAX_SENDER_INTERVAL = 10.0

_INDICIES_URL_REDACTION_REGEXP = r"(\w*?://[A-Za-z0-9\-._~%!$&'()*+,;=]*)(:)([A-Za-z0-9\-._~%!$&'()*+,;=]*)(@)"

_ONE_HOUR_LAST_INDEX_CHECK = 3600

_SUCCESS_HTTP_STATUSES = {HTTPStatus.OK, HTTPStatus.CREATED}
Expand Down Expand Up @@ -173,8 +170,7 @@ def send_messages(self, *, messages, cursor) -> bool:
try:
es_available = self._load_indices()
if not es_available:
redacted_url = re.sub(self._INDICIES_URL_REDACTION_REGEXP, r"\1\2[REDACTED]\4", self._indices_url)
self.log.warning("Waiting for connection to %s for %s", redacted_url, self.name)
self.log.warning("Waiting for connection for %s", self.name)
self._backoff()
return False
for msg in messages:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ google-auth
geoip2
https://github.com/systemd/python-systemd/zipball/master
typing-extensions
google-re2
8 changes: 7 additions & 1 deletion systest/test_rsyslog.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@
import logging.handlers
import os
import random
import re
import socket
import string
import threading

# NOTE: make sure to use google-re >= 1.1 if this is enabled.
if os.environ.get("USE_RE2"):
import re2 as re
else:
import re # type: ignore[no-redef]


RSYSLOGD = "/usr/sbin/rsyslogd"

RSYSLOGD_TCP_CONF = """
Expand Down
8 changes: 7 additions & 1 deletion test/test_journalpump.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,16 @@

import botocore.session
import json
import os
import pytest
import re
import responses

# NOTE: make sure to use google-re >= 1.1 if this is enabled.
if os.environ.get("USE_RE2"):
import re2 as re
else:
import re # type: ignore[no-redef]


def test_journalpump_init(tmpdir): # pylint: disable=too-many-statements
# Logplex sender
Expand Down

0 comments on commit 3f80062

Please sign in to comment.