use regexes, avoid parse by letter (cur: 110k/sec)

author: (quasar) nebula <towerofnix@gmail.com> 2021-04-15 15:48:03 -0300
committer: (quasar) nebula <towerofnix@gmail.com> 2021-04-15 15:48:03 -0300
commit: e3baf8b7e3834b27a6ea6516e43d797570ff4c92 (patch)
tree: 3218cdcda2c13169981eec2ae1fda8fa1ed05641
parent: fbfe596c99924431b3e7105ab3c568bbf26f3fef (diff)
2 files changed, 64 insertions, 32 deletions
diff --git a/upd8-util.js b/upd8-util.js
index 30260f8..3293d68 100644
--- a/upd8-util.js
+++ b/upd8-util.js
@@ -427,3 +427,11 @@ module.exports.makeExtendedRegExp = (inputPatternStr, flags) => {
         .replace(/(^|[^\\])\s+/g, '$1');
     return new RegExp(cleanedPatternStr, flags);
 };
+
+
+// Stolen from here: https://stackoverflow.com/a/3561711
+//
+// There's a proposal for a native JS function like this, 8ut it's not even
+// past stage 1 yet: https://github.com/tc39/proposal-regex-escaping
+module.exports.escapeRegex = string =>
+    string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
diff --git a/upd8.js b/upd8.js
index b1eb896..72031b8 100755
--- a/upd8.js
+++ b/upd8.js
@@ -111,6 +111,7 @@ const {
     chunkByProperties,
     curry,
     decorateTime,
+    escapeRegex,
     filterEmptyLines,
     joinNoOxford,
     mapInPlace,
@@ -979,6 +980,15 @@ const replacerSpec = {
     const tagArgumentValue = '=';
     const tagLabel = '|';
 
+    const R_tagBeginning = escapeRegex(tagBeginning);
+    const R_tagEnding = escapeRegex(tagEnding);
+    const R_tagReplacerValue = escapeRegex(tagReplacerValue);
+    const R_tagArgument = escapeRegex(tagArgument);
+    const R_tagArgumentValue = escapeRegex(tagArgumentValue);
+    const R_tagLabel = escapeRegex(tagLabel);
+
+    const regexpCache = {};
+
     const makeError = (i, message) => ({i, type: 'error', data: {message}});
     const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`);
 
@@ -1009,45 +1019,62 @@ const replacerSpec = {
             }
         };
 
+        const literalsToMatch = stopAt ? stopAt.concat([R_tagBeginning]) : [R_tagBeginning];
+
+        // The 8ackslash stuff here is to only match an even (or zero) num8er
+        // of sequential 'slashes. Even amounts always cancel out! Odd amounts
+        // don't, which would mean the following literal is 8eing escaped and
+        // should 8e counted only as part of the current string/text.
+        //
+        // Inspired 8y this: https://stackoverflow.com/a/41470813
+        const regexpSource = `(?<!\\\\)(?:\\\\{2})*(${literalsToMatch.join('|')})`;
+
+        // There are 8asically only a few regular expressions we'll ever use,
+        // 8ut it's a pain to hard-code them all, so we dynamically gener8te
+        // and cache them for reuse instead.
+        let regexp;
+        if (regexpCache.hasOwnProperty(regexpSource)) {
+            regexp = regexpCache[regexpSource];
+        } else {
+            regexp = new RegExp(regexpSource);
+            regexpCache[regexpSource] = regexp;
+        }
+
         while (i < input.length) {
-            if (escapeNext) {
-                string += input[i];
-                i++;
-                continue;
-            }
+            const match = input.slice(i).match(regexp);
 
-            if (input[i] === '\\') {
-                escapeNext = true;
-                i++;
-                continue;
+            if (!match) {
+                break;
             }
 
-            if (stopAt) {
-                for (const literal of stopAt) {
-                    if (input.slice(i, i + literal.length) === literal) {
-                        pushTextNode();
-                        stopped = true;
-                        stop_iMatch = i;
-                        stop_iParse = i + literal.length;
-                        stop_literal = literal;
-                        return nodes;
-                    }
-                }
+            const closestMatch = match[0];
+            const closestMatchIndex = i + match.index;
+
+            iString = i;
+            string = input.slice(i, closestMatchIndex);
+            pushTextNode();
+
+            i = closestMatchIndex + closestMatch.length;
+
+            if (closestMatch !== tagBeginning) {
+                stopped = true;
+                stop_iMatch = closestMatchIndex;
+                stop_iParse = i;
+                stop_literal = closestMatch;
+                return nodes;
             }
 
-            if (input.slice(i, i + tagBeginning.length) === tagBeginning) {
+            if (closestMatch === tagBeginning) {
                 if (textOnly)
                     throw makeError(i, `Unexpected [[tag]] - expected only text here.`);
 
-                pushTextNode();
-                const iTag = i;
-                i += tagBeginning.length;
+                const iTag = closestMatchIndex;
 
                 let N;
 
                 // Replacer key (or value)
 
-                N = parseOneTextNode(input, i, [tagReplacerValue, tagArgument, tagLabel, tagEnding]);
+                N = parseOneTextNode(input, i, [R_tagReplacerValue, R_tagArgument, R_tagLabel, R_tagEnding]);
 
                 if (!stopped) throw endOfInput(i, `reading replacer key`);
 
@@ -1070,7 +1097,7 @@ const replacerSpec = {
                 let replacerSecond;
 
                 if (stop_literal === tagReplacerValue) {
-                    N = parseNodes(input, i, [tagArgument, tagLabel, tagEnding]);
+                    N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]);
 
                     if (!stopped) throw endOfInput(i, `reading replacer value`);
                     if (!N.length) throw makeError(i, `Expected content (replacer value).`);
@@ -1094,7 +1121,7 @@ const replacerSpec = {
                 const args = [];
 
                 while (stop_literal === tagArgument) {
-                    N = parseOneTextNode(input, i, [tagArgumentValue, tagArgument, tagLabel, tagEnding]);
+                    N = parseOneTextNode(input, i, [R_tagArgumentValue, R_tagArgument, R_tagLabel, R_tagEnding]);
 
                     if (!stopped) throw endOfInput(i, `reading argument key`);
 
@@ -1107,7 +1134,7 @@ const replacerSpec = {
                     const key = N;
                     i = stop_iParse;
 
-                    N = parseNodes(input, i, [tagArgument, tagLabel, tagEnding]);
+                    N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]);
 
                     if (!stopped) throw endOfInput(i, `reading argument value`);
                     if (!N.length) throw makeError(i, `Expected content (argument value).`);
@@ -1121,7 +1148,7 @@ const replacerSpec = {
                 let label;
 
                 if (stop_literal === tagLabel) {
-                    N = parseOneTextNode(input, i, [tagEnding]);
+                    N = parseOneTextNode(input, i, [R_tagEnding]);
 
                     if (!stopped) throw endOfInput(i, `reading label`);
                     if (!N) throw makeError(i, `Expected text (label).`);
@@ -1134,9 +1161,6 @@ const replacerSpec = {
 
                 continue;
             }
-
-            string += input[i];
-            i++;
         }
 
         pushTextNode();
author	(quasar) nebula <towerofnix@gmail.com>	2021-04-15 15:48:03 -0300
committer	(quasar) nebula <towerofnix@gmail.com>	2021-04-15 15:48:03 -0300
commit	e3baf8b7e3834b27a6ea6516e43d797570ff4c92 (patch)
tree	3218cdcda2c13169981eec2ae1fda8fa1ed05641
parent	fbfe596c99924431b3e7105ab3c568bbf26f3fef (diff)