Skip to content

Commit

Permalink
Display positions in RuleCodeDuplicate errors (#735)
Browse files Browse the repository at this point in the history
Summary:
**Pre-submission checklist**
- [x] I've ran the linters locally and fixed lint errors related to the files I modified in this PR. You can install the linters by running `pip install -r requirements-dev.txt && pre-commit install`
- [x] `pre-commit run`

Display positions of where duplicate rule codes appear in
RuleCodeDuplicate taint configuration errors.

Previously rule codes were only checked for uniqueness within a single
taint.config file. This leads to unintended results when multiple rules
share the same code in multiple taint.config files under analysis.
The errors emitted also lacked positioning information which makes it
harder to find where duplicate rule codes occurred.

Adds ability to cross validate rule codes as now like source and sink
uniqueness checks, rule codes are checked after all taint.config files
are parsed. Also Since we now store positioning information for all
parsed taint.config nodes, adds position information of the previous
and current location when a rule code appears.

Since uniqueness is checked after parsing, modifies Rules module to
contain the taint config path and location to be used in error
messages - created a module in jsonParsing.ml for the same.

Other minor changes:
- Update tests to check for the new sort of RuleDuplicate error
- Update tests to conform to the updated rule records

Pull Request resolved: #735

Test Plan:
- Before the changes, ran pysa on `documentaiton/pysa_tutorial/exercise1/`:

<img width="1179" alt="Screenshot 2023-05-06 at 1 17 36 PM" src="https://user-images.githubusercontent.com/8947010/236805900-3d42af02-c06f-4663-8286-d432bc1a74a5.png">

- After the changes with default `taint.config`:

<img width="983" alt="Screenshot 2023-05-27 at 5 28 55 PM" src="https://github.com/facebook/pyre-check/assets/8947010/c86d4267-0151-4a1d-a6d6-e2472ae021c8">

- After the changes with the following `taint.config`:
```json
{
  "sources": [
    {
      "name": "CustomUserControlled",
      "comment": "use to annotate user input"
    }
  ],

  "sinks": [
    {
      "name": "CodeExecution",
      "comment": "use to annotate execution of python code"
    }
  ],

  "features": [],

  "rules": [
    {
      "name": "Possible RCE:",
      "code": 5001,
      "sources": [ "CustomUserControlled" ],
      "sinks": [ "CodeExecution" ],
      "message_format": "User specified data may reach a code execution sink"
    },
    {
      "name": "test-duplicate",
      "code": 5001,
      "sources": [ "CustomUserControlled" ],
      "sinks": [ "CodeExecution" ],
      "message_format": "duplicate"
    }
  ]
}
```

<img width="1039" alt="Screenshot 2023-05-31 at 2 06 36 PM" src="https://github.com/facebook/pyre-check/assets/8947010/e0125754-5859-481b-b372-7796ba9166e5">

- Ran tests with `make test`.

Fixes part of MLH-Fellowship#82

Footnotes:
- Pysa Github CI Action was failing before this PR

Reviewed By: tianhan0

Differential Revision: D46352827

Pulled By: arthaud

fbshipit-source-id: e6e3bc30939990a95cf8bea2071f930aa642d989
  • Loading branch information
abishekvashok authored and facebook-github-bot committed Jun 1, 2023
1 parent 4eb3bbe commit dbdb32b
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 34 deletions.
1 change: 1 addition & 0 deletions source/interprocedural_analyses/taint/rule.ml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type t = {
code: int;
name: string;
message_format: string; (* format *)
location: JsonParsing.JsonAst.LocationWithPath.t option; (* location where the rule was defined *)
}
[@@deriving compare, show]

Expand Down
1 change: 1 addition & 0 deletions source/interprocedural_analyses/taint/rule.mli
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ type t = {
code: int;
name: string;
message_format: string; (* format *)
location: JsonParsing.JsonAst.LocationWithPath.t option; (* location where the rule was defined *)
}
[@@deriving compare, show]

Expand Down
106 changes: 84 additions & 22 deletions source/interprocedural_analyses/taint/taintConfiguration.ml
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ module Heap = struct
name = "Possible shell injection.";
message_format =
"Possible remote code execution due to [{$sources}] data reaching [{$sinks}] sink(s)";
location = None;
};
{
sources = [Sources.NamedSource "Test"; Sources.NamedSource "UserControlled"];
Expand All @@ -528,6 +529,7 @@ module Heap = struct
code = 5002;
name = "Test flow.";
message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)";
location = None;
};
{
sources = [Sources.NamedSource "UserControlled"];
Expand All @@ -536,6 +538,7 @@ module Heap = struct
code = 5005;
name = "User controlled data to SQL execution.";
message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)";
location = None;
};
{
sources =
Expand All @@ -547,6 +550,7 @@ module Heap = struct
code = 5006;
name = "Restricted data being logged.";
message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)";
location = None;
};
{
sources = [Sources.NamedSource "UserControlled"];
Expand All @@ -555,6 +559,7 @@ module Heap = struct
code = 5007;
name = "User data to XML Parser.";
message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)";
location = None;
};
{
sources = [Sources.NamedSource "UserControlled"];
Expand All @@ -563,6 +568,7 @@ module Heap = struct
code = 5008;
name = "XSS";
message_format = "Possible XSS due to [{$sources}] data reaching [{$sinks}] sink(s)";
location = None;
};
{
sources = [Sources.NamedSource "Demo"];
Expand All @@ -571,6 +577,7 @@ module Heap = struct
code = 5009;
name = "Demo flow.";
message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)";
location = None;
};
{
sources = [Sources.NamedSource "UserControlled"];
Expand All @@ -579,6 +586,7 @@ module Heap = struct
code = 5010;
name = "User data to getattr.";
message_format = "Attacker may control at least one argument to getattr(,).";
location = None;
};
{
sources = [Sources.NamedSource "Test"];
Expand All @@ -588,6 +596,7 @@ module Heap = struct
name = "Flow with one transform.";
message_format =
"Data from [{$sources}] source(s) via [{$transforms}] may reach [{$sinks}] sink(s)";
location = None;
};
{
sources = [Sources.NamedSource "Test"];
Expand All @@ -597,6 +606,7 @@ module Heap = struct
name = "Flow with two transforms.";
message_format =
"Data from [{$sources}] source(s) via [{$transforms}] may reach [{$sinks}] sink(s)";
location = None;
};
{
sources = [Sources.NamedSource "Demo"];
Expand All @@ -606,6 +616,7 @@ module Heap = struct
name = "Duplicate demo flow.";
message_format =
"Possible remote code execution due to [{$sources}] data reaching [{$sinks}] sink(s)";
location = None;
};
]
in
Expand Down Expand Up @@ -717,20 +728,23 @@ module Error = struct
labels: string list;
}
| InvalidMultiSink of string
| RuleCodeDuplicate of int
| RuleCodeDuplicate of {
code: int;
previous_location: JsonAst.LocationWithPath.t option;
}
| OptionDuplicate of string
| SourceDuplicate of string
| SinkDuplicate of string
| TransformDuplicate of string
| FeatureDuplicate of string
[@@deriving equal]
[@@deriving equal, compare]

type t = {
kind: kind;
path: PyrePath.t option;
location: JsonAst.Location.t option;
}
[@@deriving equal]
[@@deriving equal, compare]

let create_with_location ~path ~kind ~location =
{ kind; path = Some path; location = Some location }
Expand Down Expand Up @@ -780,8 +794,17 @@ module Error = struct
sink
(String.concat labels ~sep:", ")
| InvalidMultiSink sink -> Format.fprintf formatter "`%s` is not a multi sink" sink
| RuleCodeDuplicate code ->
Format.fprintf formatter "Multiple rules share the same code `%d`" code
| RuleCodeDuplicate { code; previous_location = None } ->
Format.fprintf formatter "Multuple rules share the same code `%d`" code
| RuleCodeDuplicate { code; previous_location = Some previous_location } ->
Format.fprintf
formatter
"Multiple rules share the same code `%d`, previous rule was at `%a:%a`"
code
PyrePath.pp
previous_location.path
JsonAst.Location.pp_start
previous_location.location
| OptionDuplicate name ->
Format.fprintf formatter "Multiple values were passed in for option `%s`" name
| SourceDuplicate name -> Format.fprintf formatter "Duplicate entry for source: `%s`" name
Expand Down Expand Up @@ -876,9 +899,12 @@ let from_json_list source_json_list =
json_exception_to_error ~path ~section:key (fun () ->
JsonAst.Json.Util.member_exn key value |> JsonAst.Json.Util.to_string_exn |> Result.return)
in
let json_integer_member ~path key value =
let json_integer_member_with_location ~path key value =
json_exception_to_error ~path ~section:key (fun () ->
JsonAst.Json.Util.member_exn key value |> JsonAst.Json.Util.to_int_exn |> Result.return)
let node = JsonAst.Json.Util.member_exn key value in
let value = JsonAst.Json.Util.to_int_exn node in
let location = JsonAst.Json.Util.to_location_exn node in
Result.return (value, location))
in
let array_member ~path ?section name json =
let node = JsonAst.Json.Util.member name json in
Expand Down Expand Up @@ -976,14 +1002,6 @@ let from_json_list source_json_list =
|> Result.combine_errors
|> Result.map_error ~f:List.concat
in
let seen_rules = Int.Hash_set.create () in
let validate_code_uniqueness ~path code =
if Hash_set.mem seen_rules code then
Result.Error [Error.create ~path ~kind:(Error.RuleCodeDuplicate code)]
else (
Hash_set.add seen_rules code;
Result.Ok ())
in
let parse_source_reference ~path ~allowed_sources source =
AnnotationParser.parse_source ~allowed:allowed_sources source
|> Result.map_error ~f:(fun _ -> Error.create ~path ~kind:(Error.UnsupportedSource source))
Expand All @@ -1003,16 +1021,17 @@ let from_json_list source_json_list =
name: string;
message_format: string;
code: int;
location: JsonAst.LocationWithPath.t;
}

let parse ~path json =
json_string_member ~path "name" json
>>= fun name ->
json_string_member ~path "message_format" json
>>= fun message_format ->
json_integer_member ~path "code" json
>>= fun code ->
validate_code_uniqueness ~path code >>= fun () -> Result.Ok { name; message_format; code }
json_integer_member_with_location ~path "code" json
>>| fun (code, location) ->
{ name; message_format; code; location = JsonAst.LocationWithPath.create ~path ~location }
end
in
let parse_rules ~allowed_sources ~allowed_sinks ~allowed_transforms (path, json) =
Expand Down Expand Up @@ -1046,8 +1065,8 @@ let from_json_list source_json_list =
|> Result.combine_errors
>>= fun transforms ->
RuleCommonAttributes.parse ~path json
>>= fun { name; message_format; code } ->
Result.Ok { Rule.sources; sinks; transforms; name; code; message_format }
>>| fun { name; message_format; code; location } ->
{ Rule.sources; sinks; transforms; name; code; message_format; location = Some location }
in
array_member ~path "rules" json
>>= fun rules ->
Expand Down Expand Up @@ -1125,7 +1144,7 @@ let from_json_list source_json_list =
~partial_sink
~partial_sink_converter
~partial_sink_labels
~rule_common_attributues:{ RuleCommonAttributes.name; message_format; code }
~rule_common_attributues:{ RuleCommonAttributes.name; message_format; code; location }
~combined_source_rule_sources:
{ CombinedSourceRuleSources.main_sources; secondary_sources; main_label; secondary_label }
~path
Expand Down Expand Up @@ -1172,6 +1191,7 @@ let from_json_list source_json_list =
name;
code;
message_format;
location = Some location;
}
in
let secondary_rule =
Expand All @@ -1182,6 +1202,7 @@ let from_json_list source_json_list =
name;
code;
message_format;
location = Some location;
}
in
let partial_sink_converter =
Expand Down Expand Up @@ -1502,7 +1523,7 @@ let from_json_list source_json_list =


(** Perform additional checks on the taint configuration. *)
let validate ({ Heap.sources; sinks; transforms; features; _ } as configuration) =
let validate ({ Heap.sources; sinks; transforms; features; rules; _ } as configuration) =
let ensure_list_unique ~get_name ~get_error elements =
let seen = String.Hash_set.create () in
let ensure_unique element =
Expand All @@ -1517,6 +1538,39 @@ let validate ({ Heap.sources; sinks; transforms; features; _ } as configuration)
|> Result.combine_errors_unit
|> Result.map_error ~f:List.concat
in
let ensure_list_unique_with_location ~get_key ~get_error ~get_location elements =
let table = Hashtbl.create (module String) in
let ensure_unique element =
let key = get_key element in
let location = get_location element in
match Hashtbl.find table key with
| None ->
let () = Hashtbl.add_exn table ~key ~data:location in
Result.Ok ()
| Some previous_location
when Option.equal JsonAst.LocationWithPath.equal previous_location location ->
(* Some generated rules can have the same code, compare the base definitions *)
Result.Ok ()
| Some previous_location ->
let error =
match location with
| Some location ->
Error.create_with_location
~path:location.JsonAst.LocationWithPath.path
~location:location.JsonAst.LocationWithPath.location
~kind:(get_error element previous_location)
| None ->
{ Error.path = None; location = None; kind = get_error element previous_location }
in
Result.Error [error]
in
List.map elements ~f:ensure_unique
|> Result.combine_errors_unit
|> Result.map_error ~f:List.concat
(* There are some elements that are generated but go through this loop, for example
combined_source_rule, let's just emit a single error in that case. *)
|> Result.map_error ~f:(List.dedup_and_sort ~compare:Error.compare)
in
Result.combine_errors_unit
[
ensure_list_unique
Expand All @@ -1535,6 +1589,12 @@ let validate ({ Heap.sources; sinks; transforms; features; _ } as configuration)
~get_name:Fn.id
~get_error:(fun name -> Error.FeatureDuplicate name)
features;
ensure_list_unique_with_location
~get_key:(fun { Rule.code; _ } -> string_of_int code)
~get_location:(fun { Rule.location; _ } -> location)
~get_error:(fun { Rule.code; _ } previous_location ->
Error.RuleCodeDuplicate { code; previous_location })
rules;
]
|> Result.map_error ~f:List.concat
|> Result.map ~f:(fun () -> configuration)
Expand All @@ -1560,6 +1620,7 @@ let obscure_flows_configuration configuration =
code = 9001;
name = "Obscure flow.";
message_format = "Data from [{$sources}] source(s) may reach an obscure model";
location = None;
};
]
in
Expand Down Expand Up @@ -1590,6 +1651,7 @@ let missing_type_flows_configuration configuration =
code = 9002;
name = "Unknown callee flow.";
message_format = "Data from [{$sources}] source(s) may flow to an unknown callee";
location = None;
};
]
in
Expand Down
5 changes: 4 additions & 1 deletion source/interprocedural_analyses/taint/taintConfiguration.mli
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,10 @@ module Error : sig
labels: string list;
}
| InvalidMultiSink of string
| RuleCodeDuplicate of int
| RuleCodeDuplicate of {
code: int;
previous_location: JsonParsing.JsonAst.LocationWithPath.t option;
}
| OptionDuplicate of string
| SourceDuplicate of string
| SinkDuplicate of string
Expand Down
Loading

0 comments on commit dbdb32b

Please sign in to comment.