So, as you might be aware by now through the few posts I made on the forums, I was having serious performance issues with rsyslog. More precisely, it wasn't parsing the incoming messages fast enough (half my rules are regexp based). So a colleague of mine, Arnaud Cornet, dived into the code and found that regexps are compiled, executed, and destroyed each time the selector is called. He quickly came up with a quick and dirty fix, in the form of a short patch. I just added some cleanup code to his, so that compiled regexps are destroyed when the selector is, preventing a memory leak on sighup.
His code basically caches the result of a regex compilation, to ensure it is only compiled once.
The results were impressive. Here a CPU usage graph (red is before, blue is after, both are on a two hour timeframe) :

Ideally, the cleanest way to do this would be to compile the regexp when parsing the configuration file. This would remove the added test from the selector code, leading to even better performance. Here is our patch. Please keep in mind it's only a quick proof of concept.
- Code: Select all
diff --git a/rsyslogd/rsyslog-3.20.4/runtime/stringbuf.c b/rsyslogd/rsyslog-3.20.4/runtime/stringbuf.c
index 93d1e1e..bbb54b1 100644
--- a/rsyslogd/rsyslog-3.20.4/runtime/stringbuf.c
+++ b/rsyslogd/rsyslog-3.20.4/runtime/stringbuf.c
@@ -722,6 +722,36 @@ int rsCStrSzStrMatchRegex(cstr_t *pCS1, uchar *psz)
return ret;
}
+/* same as above, only not braindead */
+int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void **rc)
+{
+ int ret;
+
+ BEGINfunc
+
+ if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) {
+ regex_t **cache = rc;
+ if (*cache == NULL) {
+ *cache = calloc(sizeof(regex_t), 1);
+ regexp.regcomp(*cache, (char*) rsCStrGetSzStr(pCS1), 0);
+ }
+ ret = regexp.regexec(*cache, (char*) psz, 0, NULL, 0);
+ } else {
+ ret = 1; /* simulate "not found" */
+ }
+
+ ENDfunc
+ return ret;
+}
+
+/* free a cached compiled regex */
+void rsRegexDestruct(void **rc) {
+ regex_t **cache = rc;
+ regexp.regfree(*cache);
+ free(*cache);
+ *cache = NULL;
+}
+
/* compare a rsCStr object with a classical sz string. This function
* is almost identical to rsCStrZsStrCmp(), but it also takes an offset
diff --git a/rsyslogd/rsyslog-3.20.4/runtime/stringbuf.h b/rsyslogd/rsyslog-3.20.4/runtime/stringbuf.h
index c196644..33df467 100644
--- a/rsyslogd/rsyslog-3.20.4/runtime/stringbuf.h
+++ b/rsyslogd/rsyslog-3.20.4/runtime/stringbuf.h
@@ -137,6 +137,8 @@ int rsCStrStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz);
int rsCStrCaseInsensitveStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz);
int rsCStrSzStrStartsWithCStr(cstr_t *pCS1, uchar *psz, size_t iLenSz);
int rsCStrSzStrMatchRegex(cstr_t *pCS1, uchar *psz);
+int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void **cache);
+void rsRegexDestruct(void **rc);
rsRetVal rsCStrConvertToNumber(cstr_t *pStr, number_t *pNumber);
rsRetVal rsCStrConvertToBool(cstr_t *pStr, number_t *pBool);
rsRetVal rsCStrAppendCStr(cstr_t *pThis, cstr_t *pstrAppend);
diff --git a/rsyslogd/rsyslog-3.20.4/tools/syslogd.c b/rsyslogd/rsyslog-3.20.4/tools/syslogd.c
index 2c66daa..e79224a 100644
--- a/rsyslogd/rsyslog-3.20.4/tools/syslogd.c
+++ b/rsyslogd/rsyslog-3.20.4/tools/syslogd.c
@@ -446,6 +446,8 @@ selectorDestruct(void *pVal)
rsCStrDestruct(&pThis->f_filterData.prop.pCSPropName);
if(pThis->f_filterData.prop.pCSCompValue != NULL)
rsCStrDestruct(&pThis->f_filterData.prop.pCSCompValue);
+ if(pThis->regex_cache != NULL)
+ rsRegexDestruct(&pThis->regex_cache);
} else if(pThis->f_filter_type == FILTER_EXPR) {
if(pThis->f_filterData.f_expr != NULL)
expr.Destruct(&pThis->f_filterData.f_expr);
@@ -1027,8 +1029,7 @@ static rsRetVal shouldProcessThisMessage(selector_t *f, msg_t *pMsg, int *bProce
bRet = 1; /* process message! */
break;
case FIOP_REGEX:
- if(rsCStrSzStrMatchRegex(f->f_filterData.prop.pCSCompValue,
- (unsigned char*) pszPropVal) == 0)
+ if(rsCStrSzStrMatchRegexCache(f->f_filterData.prop.pCSCompValue, (unsigned char*) pszPropVal, &f->regex_cache) == 0)
bRet = 1;
break;
default:
diff --git a/rsyslogd/rsyslog-3.20.4/tools/syslogd.h b/rsyslogd/rsyslog-3.20.4/tools/syslogd.h
index e866a16..92c44cc 100644
--- a/rsyslogd/rsyslog-3.20.4/tools/syslogd.h
+++ b/rsyslogd/rsyslog-3.20.4/tools/syslogd.h
@@ -79,6 +79,7 @@ struct filed {
} f_filterData;
linkedList_t llActList; /* list of configured actions */
+ regex_t *regex_cache;
};


