-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dc3389f
commit b28673a
Showing
2 changed files
with
74 additions
and
0 deletions.
There are no files selected for viewing
36 changes: 36 additions & 0 deletions
36
codeql-custom-queries-java/queries/likely-bugs/regex-accidental-group.ql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/** | ||
* Finds Regex patterns containing `(...)` which was most likely not intended to be | ||
* treated as group but instead literally. | ||
* | ||
* For example in the pattern `Action ".*" failed (cancelled)` the part `(cancelled)` | ||
* was most likely supposed to be matched literally, but it is actually interpreted as | ||
* group and therefore `(` and `)` are not expected in the input. The `(` and `)` | ||
* should be escaped with a `\` in this case. | ||
* | ||
* @id todo | ||
* @kind problem | ||
*/ | ||
|
||
import java | ||
// Uses alias `re` to avoid conflicting declarations | ||
import semmle.code.java.regex.RegexTreeView as re | ||
|
||
class LiteralRegExpChar extends re::RegExpNormalChar { | ||
LiteralRegExpChar() { | ||
// RegExpNormalChar documentation says it also matches character classes; ignore them here | ||
not exists(this.getRawValue().indexOf("\\")) | ||
} | ||
} | ||
|
||
// Note: This does not match all Regex patterns, see | ||
// https://github.com/github/codeql/blob/codeql-cli/v2.15.5/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll#L161-L162 | ||
from re::RegExpGroup group | ||
where | ||
// Ignore special group syntax (non-capturing, lookahead, ...), which suggests group is intentional | ||
not group.getRawValue().matches("(?%") and | ||
// Require that group contains only literals; otherwise captured group content might be used somewhere | ||
forall(re::RegExpTerm child | child = group.getAChild() | child instanceof LiteralRegExpChar) and | ||
// Ignore if group has quantifier (e.g. `(ab)+`), then it is most likely intentional | ||
not group.getParent() instanceof re::RegExpQuantifier | ||
// TODO: Maybe check for parse errors to reduce false-positives, with `not group.getRegex().failedToParse(_)`? | ||
select group, "Potential accidental group" |
38 changes: 38 additions & 0 deletions
38
codeql-custom-queries-java/queries/likely-bugs/regex-class-duplicate-char.ql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
/** | ||
* Finds Regex patterns with a character class which contains the same character multiple | ||
* times. This is redundant and might indicate that the string was not supposed to represent | ||
* a character class. | ||
* | ||
* For example in the pattern `[ERROR] some message.*` the part `[ERROR]` is actually a | ||
* character class which matches any of these characters. The `[` and `]` should be escaped | ||
* with a `\` in this case. | ||
* | ||
* Note that a `|` _inside a character class_ does not represent an 'either' and is instead | ||
* matched literally. E.g. the pattern `[ab|cd|ef]` also matches the string `"|"`. | ||
* | ||
* This issue is also reported by IntelliJ as `RegExpDuplicateCharacterInClass`. | ||
* | ||
* @id todo | ||
* @kind problem | ||
*/ | ||
|
||
import java | ||
// Uses alias `re` to avoid conflicting declarations | ||
import semmle.code.java.regex.RegexTreeView as re | ||
|
||
// Note: This does not match all Regex patterns, see | ||
// https://github.com/github/codeql/blob/codeql-cli/v2.15.5/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll#L161-L162 | ||
from | ||
re::RegExpCharacterClass charClass, int indexA, re::RegExpNormalChar charA, int indexB, | ||
re::RegExpNormalChar charB, string charValue | ||
where | ||
charA = charClass.getChild(indexA) and | ||
charB = charClass.getChild(indexB) and | ||
// Prevent reporting twice with order reversed | ||
indexA < indexB and | ||
charValue = charA.getRawValue() and | ||
charValue = charB.getRawValue() and | ||
// Ignore false positives for `&&`, which does not seem to be recognized by CodeQL Regex library yet | ||
not (charValue = "&" and indexA + 1 = indexB) | ||
// TODO: Maybe check for parse errors to reduce false-positives, with `not charClass.getRegex().failedToParse(_)`? | ||
select charClass, "Contains '" + charValue + "' twice $@ and $@", charA, "here", charB, "here" |