Skip to content

Commit

Permalink
Add some Regex queries
Browse files Browse the repository at this point in the history
  • Loading branch information
Marcono1234 committed Feb 5, 2024
1 parent dc3389f commit b28673a
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* Finds Regex patterns containing `(...)` which was most likely not intended to be
* treated as group but instead literally.
*
* For example in the pattern `Action ".*" failed (cancelled)` the part `(cancelled)`
* was most likely supposed to be matched literally, but it is actually interpreted as
* group and therefore `(` and `)` are not expected in the input. The `(` and `)`
* should be escaped with a `\` in this case.
*
* @id todo
* @kind problem
*/

import java
// Uses alias `re` to avoid conflicting declarations
import semmle.code.java.regex.RegexTreeView as re

class LiteralRegExpChar extends re::RegExpNormalChar {
LiteralRegExpChar() {
// RegExpNormalChar documentation says it also matches character classes; ignore them here
not exists(this.getRawValue().indexOf("\\"))
}
}

// Note: This does not match all Regex patterns, see
// https://github.com/github/codeql/blob/codeql-cli/v2.15.5/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll#L161-L162
from re::RegExpGroup group
where
// Ignore special group syntax (non-capturing, lookahead, ...), which suggests group is intentional
not group.getRawValue().matches("(?%") and
// Require that group contains only literals; otherwise captured group content might be used somewhere
forall(re::RegExpTerm child | child = group.getAChild() | child instanceof LiteralRegExpChar) and
// Ignore if group has quantifier (e.g. `(ab)+`), then it is most likely intentional
not group.getParent() instanceof re::RegExpQuantifier
// TODO: Maybe check for parse errors to reduce false-positives, with `not group.getRegex().failedToParse(_)`?
select group, "Potential accidental group"
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* Finds Regex patterns with a character class which contains the same character multiple
* times. This is redundant and might indicate that the string was not supposed to represent
* a character class.
*
* For example in the pattern `[ERROR] some message.*` the part `[ERROR]` is actually a
* character class which matches any of these characters. The `[` and `]` should be escaped
* with a `\` in this case.
*
* Note that a `|` _inside a character class_ does not represent an 'either' and is instead
* matched literally. E.g. the pattern `[ab|cd|ef]` also matches the string `"|"`.
*
* This issue is also reported by IntelliJ as `RegExpDuplicateCharacterInClass`.
*
* @id todo
* @kind problem
*/

import java
// Uses alias `re` to avoid conflicting declarations
import semmle.code.java.regex.RegexTreeView as re

// Note: This does not match all Regex patterns, see
// https://github.com/github/codeql/blob/codeql-cli/v2.15.5/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll#L161-L162
from
re::RegExpCharacterClass charClass, int indexA, re::RegExpNormalChar charA, int indexB,
re::RegExpNormalChar charB, string charValue
where
charA = charClass.getChild(indexA) and
charB = charClass.getChild(indexB) and
// Prevent reporting twice with order reversed
indexA < indexB and
charValue = charA.getRawValue() and
charValue = charB.getRawValue() and
// Ignore false positives for `&&`, which does not seem to be recognized by CodeQL Regex library yet
not (charValue = "&" and indexA + 1 = indexB)
// TODO: Maybe check for parse errors to reduce false-positives, with `not charClass.getRegex().failedToParse(_)`?
select charClass, "Contains '" + charValue + "' twice $@ and $@", charA, "here", charB, "here"

0 comments on commit b28673a

Please sign in to comment.