Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable filtering of request query strings using ActiveSupport::ParameterFilter. #111

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,14 @@ user identifier proc can be defined in `config/initializers/dfe_analytics.rb`:
DfE::Analytics.config.user_identifier = proc { |user| user&.uid }
```

#### Filtering PII from web request query string data

Query strings may be filtered using the same `Rails.application.config.filter_parameters`
configuration you'd normally use to prevent PII leakage to logs and error handling services.

To enable this option, ensure the attributes to filter are specified in `Rails.application.config.filter_parameters`.
Then enable filtering with `DfE::Analytics.config.filter_web_request_events = true`.

### 6. Import existing data

To load the current contents of your database into BigQuery, run
Expand Down
13 changes: 13 additions & 0 deletions lib/dfe/analytics.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
require 'dfe/analytics/middleware/request_identity'
require 'dfe/analytics/middleware/send_cached_page_request_event'
require 'dfe/analytics/railtie'
require 'active_support/parameter_filter'

module DfE
module Analytics
Expand Down Expand Up @@ -63,6 +64,8 @@ def self.config
pseudonymise_web_request_user_id
entity_table_checks_enabled
rack_page_cached
filter_web_request_events
web_request_event_filter
]

@config ||= Struct.new(*configurables).new
Expand All @@ -86,6 +89,8 @@ def self.configure
config.pseudonymise_web_request_user_id ||= false
config.entity_table_checks_enabled ||= false
config.rack_page_cached ||= proc { |_rack_env| false }
config.filter_web_request_events ||= false
config.web_request_event_filter ||= ActiveSupport::ParameterFilter.new(Rails.application.config.filter_parameters)
end

def self.initialize!
Expand Down Expand Up @@ -244,5 +249,13 @@ def self.rack_page_cached?(rack_env)
def self.entity_table_checks_enabled?
config.entity_table_checks_enabled
end

def self.filter_web_request_events?
config.filter_web_request_events
end

def self.web_request_event_filter
config.web_request_event_filter
end
end
end
5 changes: 4 additions & 1 deletion lib/dfe/analytics/event.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# frozen_string_literal: true

require 'active_support/values/time_zone'
require 'dfe/analytics/query_string_utilities'

module DfE
module Analytics
class Event
include QueryStringUtilities

EVENT_TYPES = %w[
web_request create_entity update_entity delete_entity import_entity initialise_analytics entity_table_check
].freeze
Expand Down Expand Up @@ -37,7 +40,7 @@ def with_request_details(rack_request)
request_user_agent: ensure_utf8(rack_request.user_agent),
request_method: rack_request.method,
request_path: ensure_utf8(rack_request.path),
request_query: hash_to_kv_pairs(Rack::Utils.parse_query(rack_request.query_string)),
request_query: hash_to_kv_pairs(query_string_to_hash(rack_request.query_string)),
request_referer: ensure_utf8(rack_request.referer),
anonymised_user_agent_and_ip: anonymised_user_agent_and_ip(rack_request)
)
Expand Down
18 changes: 18 additions & 0 deletions lib/dfe/analytics/query_string_utilities.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

module DfE
module Analytics
# Utility methods for query string processing
module QueryStringUtilities
def query_string_to_hash(query_string)
hash = Rack::Utils.parse_query(query_string)
hash = filter_query_string(hash) if DfE::Analytics.filter_web_request_events?
hash
end

def filter_query_string(hash)
DfE::Analytics.web_request_event_filter.filter(hash)
end
end
end
end
27 changes: 27 additions & 0 deletions spec/dfe/analytics/event_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,33 @@ def as_json
end
end

describe 'with_request_details using parameter filter' do
subject do
request = fake_request(
query_string: 'some_param=123&some_other_param=456'
)

described_class.new
.with_request_details(request)
.as_json['request_query']
end

before do
allow(DfE::Analytics).to receive(:web_request_event_filter).and_return(
ActiveSupport::ParameterFilter.new([:some_param])
)
allow(DfE::Analytics).to receive(:filter_web_request_events?).and_return(true)
end

it 'filters specified parameter values' do
expect(subject).to include({ 'key' => 'some_param', 'value' => ['[FILTERED]'] })
end

it 'does not filter unspecified parameter values' do
expect(subject).to include({ 'key' => 'some_other_param', 'value' => ['456'] })
end
end

def fake_request(overrides = {})
attrs = {
uuid: '123',
Expand Down
28 changes: 28 additions & 0 deletions spec/dfe/analytics/requests_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,32 @@ def index
end).to have_been_made
end
end

context 'with request filtering' do
before do
allow(DfE::Analytics).to receive(:web_request_event_filter).and_return(
ActiveSupport::ParameterFilter.new([:array_param])
)
allow(DfE::Analytics).to receive(:filter_web_request_events?).and_return(true)
end

it 'filters out sensitive params' do
request = stub_analytics_event_submission
DfE::Analytics::Testing.webmock! do
perform_enqueued_jobs do
get('/example/path',
params: { page: '1', per_page: '25', array_param: %w[1 2] },
headers: { 'HTTP_USER_AGENT' => 'Test agent' })
end
end

expect(request.with do |req|
body = JSON.parse(req.body)
payload = body['rows'].first['json']
expect(payload['request_query']).to include(
{ 'key' => 'array_param[]', 'value' => ['[FILTERED]'] }
)
end).to have_been_made
end
end
end
Loading