Optimize: special-case crm_ends_with: BigO( M+N+(M-N) -> M+N )
...for M, the length of the scanned string, and N, the length of
the searched string, specifically for the case this searched string
fulfills "extension" property, that is, it's initial character is
a singleton within the whole sequence. Also extend the specifics
of the affected functions in the docstring, and importantly,
apply the respective new function -- crm_ends_with_ext -- where
crm_ends_with was previously used but the match is actually
the "extension".
Benchmarking/testing program:
#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sysexits.h> /*#define UNITTESTS*/ /*#define IMPLNEW*/ /*#define USEGLIB*/ #ifdef USEGLIB #include <glib.h> #endif #ifdef IMPLNEW static inline const char * null2emptystr(const char *input); static inline const char * null2emptystr(const char *input) { return (input == NULL) ? "" : input; } #endif int crm_ends_with(const char *s, const char *match); int crm_ends_with(const char *s, const char *match) { if ((s == NULL) || (match == NULL)) { return 0; } else { #ifndef IMPLNEW size_t slen = strlen(s); size_t mlen = strlen(match); return ((slen >= mlen) && !strcmp(s + slen - mlen, match)); #else int extension = 1; /* <-- change to force extension-like treatment */ size_t slen, mlen; if (match[0] != '\0' && (extension || strchr(&match[1], match[0]) == NULL)) return !strcmp(null2emptystr(strrchr(s, match[0])), match); if ((mlen = strlen(match)) == 0) return 1; slen = strlen(s); return ((slen >= mlen) && !strcmp(s + slen - mlen, match)); #endif } } int main(int argc, char *argv[]) { int ret = 0; #ifdef UNITTESTS #include <assert.h> assert( crm_ends_with("abcd", "cd")); assert( crm_ends_with("abcdefghijklmnopqrstuvwxyz", "qrstuvwxyz")); assert(!crm_ends_with("abcdefghijklmnopqrstuvwxyz", "abcdefghij")); assert( crm_ends_with("abcdefghijklmnopqrstuvwxyz", "")); assert( crm_ends_with("abcdefghhh", "hhh")); assert( crm_ends_with("abcdefghah", "hah")); assert( crm_ends_with("abcdfg_stop_0", "_stop_0")); assert(!crm_ends_with("abcdfg_stop_10", "_stop_0")); assert(!crm_ends_with("", "qrstuvwxyz")); assert( crm_ends_with("", "")); assert( crm_ends_with("abcdefghijklmnopqrstuvwxyz_stop_0", "_stop_0")); #else const char *a, *b, *c; if (argc < 3) return EX_USAGE; a = argv[1]; b = argv[2]; for (unsigned i = 0; i < UINT_MAX; i++) { #ifndef USEGLIB ret ^= !crm_ends_with(a, b); #else ret ^= !g_str_has_suffix(a, b); #endif /* confuse gcc about the loop invariants for the test to be valid! */ if (argc > 3) { c = a; a = b; b = c; } /*if (!(i % ((UINT_MAX >> 10) + 1))) { printf("."); fflush(stdout); }*/ } #endif /*printf("%d\n", ret);*/ return ret; }
Some results from the benchmark (on i7-3520M), compiled with -O2;
NEW-AUTO denotes what's in the current incarnation the self-check
if match fulfills the property of "extension", NEW-USE-EXT
corresponds to using crm_ends_with_ext, and GLIB is a complementary
comparison against the performance of glib (2.44):
time ./test_crm_ends_with | ORIGINAL| NEW-AUTO| -USE-EXT| GLIB
-------------------------------------+---------+---------+---------+---------
wqefkqwejf.cgi .cgi |1m10.479s|1m39.996s|0m53.835s|1m18.011s
abcdefghijklmnopqrstuvwxyz_stop_10000| | | |
_stop_10000 |1m33.313s|2m3.573s | N/A |1m40.498s
abcd cd |1m5.651s |1m6.611s |0m47.913s|1m10.954s
abcdefghijklmnopqrstuvwxyz '' |0m55.822s|0m25.800s|0m26.633s|1m0.870s
This measument also justifies why the mentioned "extension" property
self-check is commented out in the code, mainly for posterity.