From 385fec5f491871cd76f6fb604168e89e616c498e Mon Sep 17 00:00:00 2001 From: Tim Pavlik <4960530+fantavlik@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:01:44 -0800 Subject: [PATCH] Improve parsing of SPL2 modules for statement names to handle strings, fields, functions, comments. (#131) --- .../workflows/package-acceptance-test.yaml | 2 +- .gitignore | 4 +- out/notebooks/controller.ts | 2 +- out/notebooks/spl2/controller.ts | 2 +- out/notebooks/splunk.ts | 11 +-- out/notebooks/{utils.ts => utils/messages.ts} | 0 out/notebooks/utils/parsing.ts | 97 +++++++++++++++++++ package.json | 5 +- test/spl2.test.js | 79 +++++++++++++++ 9 files changed, 189 insertions(+), 13 deletions(-) rename out/notebooks/{utils.ts => utils/messages.ts} (100%) create mode 100644 out/notebooks/utils/parsing.ts create mode 100644 test/spl2.test.js diff --git a/.github/workflows/package-acceptance-test.yaml b/.github/workflows/package-acceptance-test.yaml index a685695..b4b72df 100644 --- a/.github/workflows/package-acceptance-test.yaml +++ b/.github/workflows/package-acceptance-test.yaml @@ -22,7 +22,7 @@ jobs: - run: node --version - run: npm install - run: npm list - - run: npm run compile + - run: npm run package - run: npm install -g @vscode/vsce - run: vsce package - uses: actions/upload-artifact@v3 diff --git a/.gitignore b/.gitignore index ed9224d..2315a9e 100755 --- a/.gitignore +++ b/.gitignore @@ -20,5 +20,5 @@ out/package.json .vscode-test **/out/notebooks/*.js **/out/notebooks/*.map -**/out/notebooks/spl2/*.js -**/out/notebooks/spl2/*.map \ No newline at end of file +**/out/notebooks/**/*.js +**/out/notebooks/**/*.map \ No newline at end of file diff --git a/out/notebooks/controller.ts b/out/notebooks/controller.ts index a068b82..6246d77 100644 --- a/out/notebooks/controller.ts +++ b/out/notebooks/controller.ts @@ -8,7 +8,7 @@ import { getSearchJobResults, wait, } from './splunk'; -import { splunkMessagesToOutputItems } from './utils'; +import { splunkMessagesToOutputItems } from './utils/messages'; export class SplunkController { public notebookType: string; diff --git a/out/notebooks/spl2/controller.ts b/out/notebooks/spl2/controller.ts index 9e70287..17b0259 100644 --- a/out/notebooks/spl2/controller.ts +++ b/out/notebooks/spl2/controller.ts @@ -5,7 +5,7 @@ import { getClient, } from '../splunk'; import { SplunkController } from '../controller'; -import { splunkMessagesToOutputItems } from '../utils'; +import { splunkMessagesToOutputItems } from '../utils/messages'; import { getAppSubNamespace } from './serializer'; export class Spl2Controller extends SplunkController { diff --git a/out/notebooks/splunk.ts b/out/notebooks/splunk.ts index f0054ea..3929584 100644 --- a/out/notebooks/splunk.ts +++ b/out/notebooks/splunk.ts @@ -1,7 +1,8 @@ import * as splunk from 'splunk-sdk'; import * as needle from 'needle'; // transitive dependency of splunk-sdk import * as vscode from 'vscode'; -import { SplunkMessage } from './utils'; +import { SplunkMessage } from './utils/messages'; +import { getModuleStatements } from './utils/parsing'; export function getClient() { const config = vscode.workspace.getConfiguration(); @@ -138,16 +139,14 @@ export function dispatchSpl2Module(service: any, spl2Module: string, app: string namespace = ''; app = app || 'search'; // default to search app // Get last statement assignment '$my_statement = ...' -> 'my_statement' - const statementMatches = [...spl2Module.matchAll(/^\s*\$([a-zA-Z0-9_]+)[\s]*=/gm)]; - if (!statementMatches - || statementMatches.length < 1 - || statementMatches[statementMatches.length - 1].length < 2) { + const statements = getModuleStatements(spl2Module); + if (!statements || (statements.length < 1)) { throw new Error( 'No statements found in SPL2. Please assign at least one statement name ' + 'using "$". For example: `$my_statement = from _internal`' ); } - const statementIdentifier = statementMatches[statementMatches.length - 1][1]; + const statementIdentifier = statements[statements.length - 1]; const params = { 'timezone': 'Etc/UTC', 'collectFieldSummary': true, diff --git a/out/notebooks/utils.ts b/out/notebooks/utils/messages.ts similarity index 100% rename from out/notebooks/utils.ts rename to out/notebooks/utils/messages.ts diff --git a/out/notebooks/utils/parsing.ts b/out/notebooks/utils/parsing.ts new file mode 100644 index 0000000..7e9d6a7 --- /dev/null +++ b/out/notebooks/utils/parsing.ts @@ -0,0 +1,97 @@ +/** + * This helper function retrieves the names of all module-level search statements + * + * @param spl2Module module contents + * @returns array of regex matches of statements capturing names of each statement + */ +export function getModuleStatements(spl2Module: string): string[] { + // Remove anything within comments, field literals, string + // literals, or between braces { .. } which will eliminate + // function/lambda params like `$it -> { $p = 1 }` + // and commented-out statements like /* $out = from [{}] */ + let inBlockComment = false; // /* .. */ + let inField = false; // ' .. ' + let inString = false; // " .. " + let inLineComment = false; // // .. + let braceLevel = 0; // { .. } + + let newModule = ''; + let prev = ''; + for (let indx = 0; indx < spl2Module.length; indx++) { + let next = spl2Module[indx]; + let peeked = peek(spl2Module, indx + 1); + let crlf = (next === '\r' && peeked === '\n'); + let newLine = crlf || (next === '\n'); + if (inBlockComment) { + if (next === '*' && peeked === '/') { + inBlockComment = false; // exit block comment + indx++; // move past */ + } + } else if (inField) { + if (next === '\'' && prev !== '\\') { // ignore \' + inField = false; // exit field literal + } + } else if (inString) { + if (newLine || (next === '"' && prev !== '\\')) { // ignore \" + inString = false; // exit string literal + if (crlf) { + indx++; // move past \r\n + } + } + } else if (inLineComment) { + if (newLine) { + inLineComment = false; // exit line comment + if (crlf) { + indx++; // move past \r\n + } + } + } else if (braceLevel > 0) { + if (next === '{') { + braceLevel++; + } else if (next === '}') { + braceLevel--; + } + if (braceLevel === 0) { + // insert newlines after blocks like function and dataset declarations + // to start new statements/declarations on new lines when possible + newModule += '\n'; + } + } else { + // Check for entering new block + switch (next) { + case '/': + if (peeked === '/') { + inLineComment = true; + indx++; // move past // + } else if (peeked === '*') { + inBlockComment = true; + indx++; // move past /* + } + break; + case '\'': + inField = true; + break; + case '"': + inString = true; + break; + case '{': + braceLevel++; + break; + } + // if we're not in one of the blocks above, write to cleaned module + if (!inBlockComment && !inField && !inString && !inLineComment && braceLevel === 0) { + newModule += next; + } + } + prev = next; + } + + // Match anything that looks like `$statement_1 = ...` and return the statement names + return [...newModule.matchAll(/^\s*\$([a-zA-Z0-9_]+)[\s]*=/gm)] + .map(group => (group.length > 1) ? group[1] : null) + .filter(val => (val !== null)); +} + +function peek(str: string, i: number): string { + return (str.length > i) ? str.charAt(i) : ""; +} \ No newline at end of file diff --git a/package.json b/package.json index fe7000c..9d064c1 100644 --- a/package.json +++ b/package.json @@ -489,9 +489,10 @@ }, "scripts": { "lint": "eslint .", - "pretest": "npm run lint", + "pretest": "npm run compile", "test": "mocha", - "compile": "webpack --mode=production", + "package": "webpack --mode=production", + "compile": "tsc -p tsconfig.json", "compile-tests": "tsc -p tsconfig-test.json", "watch": "webpack --watch --mode none" } diff --git a/test/spl2.test.js b/test/spl2.test.js new file mode 100644 index 0000000..efc49c9 --- /dev/null +++ b/test/spl2.test.js @@ -0,0 +1,79 @@ +const { assert } = require('chai'); +const { getModuleStatements } = require("../out/notebooks/utils/parsing"); + +describe('splunk', () => { + describe('getModuleStatements()', () => { + it('should find a single statement', () => { + const module = ` + $out = from a; + `; + const statements = getModuleStatements(module); + assert.equal(statements.length, 1); + assert.equal(statements[0], 'out'); + }); + it('should find each statement when several specified', () => { + const module = ` + $out1 = from a; + $out2 = from b; + $out3 = from c; + `; + const statements = getModuleStatements(module); + assert.equal(statements.length, 3); + assert.equal(statements[0], 'out1'); + assert.equal(statements[1], 'out2'); + assert.equal(statements[2], 'out3'); + }); + it('should ignore single line comments', () => { + const module = ` + //$out1 = from a; + $out2 = from b; // $out3 = from c; + // $out4 = from c; + `; + const statements = getModuleStatements(module); + assert.equal(statements.length, 1); + assert.equal(statements[0], 'out2'); + }); + it('should ignore block comments', () => { + const module = ` + /*$out1 = from a; + */$out2 /* * */= from b; + /* $out3 = from c;*/ + `; + const statements = getModuleStatements(module); + assert.equal(statements.length, 1); + assert.equal(statements[0], 'out2'); + }); + it('should handle complex comment, field, and function scenarios', () => { + const module = ` + $out1 = from [{s:1}] | eval ' + $fieldtemp1 = ' = value1 | eval ' \\' + $fieldtemp2 = ' = value2 | eval field1 = + " \\" $stringtemp1 = value3" + | eval foo = map([1,2], $it -> { + $lp1 = 1; + return $f; + }); + function func1() + dataset ds1 { + ' + $dsfield = ': "value" + } + function func2() { + $p1 = 1; + $p2 = $p1 + 1; + return $p2 + } $out2 = from [{s:2}] | where '$foo=bar'=2; + $out3 /* $f1 = 1; + $f2 = 2 + */ = from [{s:3}]; + $out4 = from [{' + $fieldval = ': "error"}];`; + const statements = getModuleStatements(module); + assert.equal(statements.length, 4); + assert.equal(statements[0], 'out1'); + assert.equal(statements[1], 'out2'); + assert.equal(statements[2], 'out3'); + assert.equal(statements[3], 'out4'); + }); + }); +}); \ No newline at end of file