Skip to content

Commit

Permalink
Rewrite persisted ops loader (#114)
Browse files Browse the repository at this point in the history
Our initial strategy for loading persisted operations was flawed, mainly
because of trying to use the local filesystem as a state store,
requiring it to be consistent immediately after loading from a remote,
and trying to read from local file storage right after.

This design was chosen in order to decouple local state from remote
loading strategies, but this can be achieved in other ways.

This PR rewrites the loading behavior to use just the internal state in
memory, and a loading strategy to append to it. This removes the
dependency on the local file system for persisting this state, and
simplifies the interactions for loading persisted operations.

Now, a single `loader` can be specified which currently are `local`,
meaning files are read from the local file system. `gcp` meaning files
are downloaded from a GCP bucket and loaded straight into the internal
state, or `noop` meaning no files are loaded.

It also updates a number of metrics as well as the accompanying
documentation

---------

Co-authored-by: ldebruijn <[email protected]>
  • Loading branch information
ldebruijn and ldebruijn authored Aug 12, 2024
1 parent 6882262 commit 5ac2de8
Show file tree
Hide file tree
Showing 15 changed files with 483 additions and 374 deletions.
12 changes: 8 additions & 4 deletions cmd/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ type Product {
`,
cfgOverrides: func(cfg *config.Config) *config.Config {
cfg.PersistedOperations.Enabled = true
cfg.PersistedOperations.Store = "./"
cfg.PersistedOperations.Loader.Type = "local"
cfg.PersistedOperations.Loader.Location = "./"
return cfg
},
mockResponse: map[string]interface{}{
Expand Down Expand Up @@ -169,7 +170,8 @@ type Product {
cfgOverrides: func(cfg *config.Config) *config.Config {
cfg.ObfuscateUpstreamErrors = false
cfg.PersistedOperations.Enabled = true
cfg.PersistedOperations.Store = "./"
cfg.PersistedOperations.Loader.Type = "local"
cfg.PersistedOperations.Loader.Location = "./"
cfg.PersistedOperations.RejectOnFailure = false
return cfg
},
Expand Down Expand Up @@ -369,7 +371,8 @@ type Product {
`,
cfgOverrides: func(cfg *config.Config) *config.Config {
cfg.PersistedOperations.Enabled = true
cfg.PersistedOperations.Store = "./"
cfg.PersistedOperations.Loader.Type = "local"
cfg.PersistedOperations.Loader.Location = "./"
cfg.PersistedOperations.RejectOnFailure = false
cfg.ObfuscateUpstreamErrors = false
return cfg
Expand Down Expand Up @@ -434,7 +437,8 @@ type Product {
`,
cfgOverrides: func(cfg *config.Config) *config.Config {
cfg.PersistedOperations.Enabled = true
cfg.PersistedOperations.Store = "./"
cfg.PersistedOperations.Loader.Type = "local"
cfg.PersistedOperations.Loader.Location = "./"
cfg.PersistedOperations.RejectOnFailure = false
return cfg
},
Expand Down
9 changes: 4 additions & 5 deletions cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"context"
"errors"
"fmt"
"github.com/ldebruijn/graphql-protect/internal/app/config"
"github.com/ldebruijn/graphql-protect/internal/app/otel"
Expand Down Expand Up @@ -41,13 +40,13 @@ func httpServer(log *slog.Logger, cfg *config.Config, shutdown chan os.Signal) e
return nil
}

remoteLoader, err := persistedoperations.RemoteLoaderFromConfig(cfg.PersistedOperations, log)
if err != nil && !errors.Is(err, persistedoperations.ErrNoRemoteLoaderSpecified) {
log.Error("Error initializing remote loader", "err", err)
loader, err := persistedoperations.NewLoaderFromConfig(cfg.PersistedOperations, log)
if err != nil {
log.Error("Error initializing persisted operations loader", "err", err)
return err
}

po, err := persistedoperations.NewPersistedOperations(log, cfg.PersistedOperations, persistedoperations.NewLocalDirLoader(cfg.PersistedOperations, log), remoteLoader)
po, err := persistedoperations.NewPersistedOperations(log, cfg.PersistedOperations, loader)
if err != nil {
log.Error("Error initializing Persisted Operations", "err", err)
return nil
Expand Down
6 changes: 3 additions & 3 deletions cmd/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ import (
var ErrValidationErrorsFound = errors.New("errors found during validation")

func validate(log *slog.Logger, cfg *config.Config, _ chan os.Signal) error {
// We need a store defined to have files to validate
if cfg.PersistedOperations.Store == "" {
loader, err := persistedoperations.NewLoaderFromConfig(cfg.PersistedOperations, log)
if err != nil {
err := fmt.Errorf("store must be defined to have files to validate")
log.Error("Error running validations", "err", err)
return err
}

// Load the persisted operations from the local dir into memory
persistedOperations, err := persistedoperations.NewPersistedOperations(log, cfg.PersistedOperations, persistedoperations.NewLocalDirLoader(cfg.PersistedOperations, log), nil)
persistedOperations, err := persistedoperations.NewPersistedOperations(log, cfg.PersistedOperations, loader)
if err != nil {
log.Error("Error initializing Persisted Operations", "err", err)
return nil
Expand Down
27 changes: 14 additions & 13 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,23 @@ obfuscate_upstream_errors: true


persisted_operations:
# Enable or disable the feature, enabled by default
# Enable or disable the feature, disabled by default
enabled: false
# Fail unknown operations, disable this feature to allow unknown operations to reach your GraphQL API
reject_on_failure: true
# Store is the location on local disk where graphql-protect can find the persisted operations, it loads any `*.json` files on disk
store: "./store"
reload:
enabled: true
# The interval in which the local store dir is read and refreshes the internal state
interval: 5m
# The timeout for the remote operation
timeout: 10s
remote:
# Load persisted operations from a GCP Cloud Storage bucket.
# Will look at all the objects in the bucket and try to load any object with a `.json` extension
gcp_bucket: ""
# Loader decides how persisted operations are loaded, see loader chapter for more details
loader:
# Type of loader to use
type: local
# Location to load persisted operations from
location: ./store
# Whether to reload persisted operations periodically
reload:
enabled: true
# The interval in which the persisted operations are refreshed
interval: 5m0s
# The timeout for the refreshing operation
timeout: 10s

block_field_suggestions:
enabled: true
Expand Down
64 changes: 42 additions & 22 deletions docs/protections/persisted_operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,40 @@ You can configure `graphql-protect` to enable Persisted Operations.
# ...

persisted_operations:
# Enable or disable the feature, enabled by default
# Enable or disable the feature, disabled by default
enabled: false
# Fail unknown operations, disable this feature to allow unknown operations to reach your GraphQL API
reject_on_failure: true
# Store is the location on local disk where graphql-protect can find the persisted operations, it loads any `*.json` files on disk
store: "./store"
reload:
enabled: true
# The interval in which the local store dir is read and refreshes the internal state
interval: 5m
# The timeout for the remote operation
timeout: 10s
remote:
# Load persisted operations from a GCP Cloud Storage bucket.
# Will look at all the objects in the bucket and try to load any object with a `.json` extension
gcp_bucket: ""
# Loader decides how persisted operations are loaded, see loader chapter for more details
loader:
# Type of loader to use
type: local
# Location to load persisted operations from
location: ./store
# Whether to reload persisted operations periodically
reload:
enabled: true
# The interval in which the persisted operations are refreshed
interval: 5m0s
# The timeout for the refreshing operation
timeout: 10s

# ...
```

## How it works

`graphql-protect` looks at the `store` location on local disk to find any `*.json` files it can parse for persisted operations.
`graphql-protect` looks at the location specified for the `loader` and looks for any `*.json` files it can parse for persisted operations.
These loaders can be specified to look at local directories, or remote locations like GCP buckets.
`graphql-protect` will load the persisted operations from the location and update its internal state with any new operations.

It can be configured to look at this directory and reload based on the files on local disk.
## Loader

Additionally, it can be configured to fetch operations from a remote location onto the local disk.
Currently we have support for the following loaders, specified by the `type` field in the loader configuration:

* `local` - load persisted operations from local file system, this is the default strategy. If need be this allows you to download files from an unsupported remote location to local storage, and have `graphql-protect` pick up on them.
* `gcp` - load persisted operations from a GCP bucket
* `noop` - no persisted operations are loaded. This is the strategy applied when an unknown type is supplied.

## Parsing Structure

Expand Down Expand Up @@ -85,7 +92,7 @@ In order to utilize this feature you need to generate the persisted operations t
This rule produces metrics to help you gain insights into the behavior of the rule.

```
graphql_protect_persisted_operations_results{state, result}
graphql_protect_persisted_operations_result_count{state, result}
```

| `state` | Description |
Expand All @@ -101,13 +108,26 @@ graphql_protect_persisted_operations_results{state, result}
| `rejected` | The rule rejected the request |

```
graphql_protect_persisted_operations_reload{system}
graphql_protect_persisted_operations_load_result_count{type, result}
```


| `system` | Description |
|--------|-------------------------------------------------------------------------------------------------------|
| `local` | The rule reloaded its state from local storage |
| `remote` | The rule reloaded the remote state onto local disk. This does not refresh the local state on its own. |
| `type` | Description |
|---------|-------------------------------|
| `local` | Loaded using the local loader |
| `gcp` | Loaded using the gcp loader |
| `noop` | Loaded using the noop loader |


| `result` | Description |
|-----------|---------------------------|
| `success` | loading was successful |
| `failure` | loading produced an error |

No metrics are produced when the rule is disabled.

```
graphql_protect_persisted_operations_unique_hashes_in_memory_count{}
```

No metrics are produced when the rule is disabled.
74 changes: 33 additions & 41 deletions internal/app/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,19 @@ func TestNewConfig(t *testing.T) {
},
PersistedOperations: persistedoperations.Config{
Enabled: false,
Store: "./store",
Reload: struct {
Enabled bool `conf:"default:true" yaml:"enabled"`
Interval time.Duration `conf:"default:5m" yaml:"interval"`
Timeout time.Duration `conf:"default:10s" yaml:"timeout"`
}(struct {
Enabled bool
Interval time.Duration
Timeout time.Duration
}{
Enabled: true,
Interval: 5 * time.Minute,
Timeout: 10 * time.Second,
}),
Remote: struct {
GcpBucket string `yaml:"gcp_bucket"`
}(struct{ GcpBucket string }{GcpBucket: ""}),
Loader: persistedoperations.LoaderConfig{
Type: "local",
Location: "./store",
Reload: struct {
Enabled bool `conf:"default:true" yaml:"enabled"`
Interval time.Duration `conf:"default:5m" yaml:"interval"`
Timeout time.Duration `conf:"default:10s" yaml:"timeout"`
}{
Enabled: true,
Interval: 5 * time.Minute,
Timeout: 10 * time.Second,
},
},
RejectOnFailure: true,
},
BlockFieldSuggestions: block_field_suggestions.Config{
Expand Down Expand Up @@ -161,13 +157,13 @@ obfuscate_upstream_errors: false
persisted_operations:
enabled: true
reject_on_failure: false
store: "store"
reload:
enabled: true
interval: 1s
timeout: 1s
remote:
gcp_bucket: "gcp_bucket"
loader:
type: gcp
location: some-bucket
reload:
enabled: true
interval: 1s
timeout: 1s
max_aliases:
enabled: false
Expand Down Expand Up @@ -251,23 +247,19 @@ log:
},
PersistedOperations: persistedoperations.Config{
Enabled: true,
Store: "store",
Reload: struct {
Enabled bool `conf:"default:true" yaml:"enabled"`
Interval time.Duration `conf:"default:5m" yaml:"interval"`
Timeout time.Duration `conf:"default:10s" yaml:"timeout"`
}(struct {
Enabled bool
Interval time.Duration
Timeout time.Duration
}{
Enabled: true,
Interval: 1 * time.Second,
Timeout: 1 * time.Second,
}),
Remote: struct {
GcpBucket string `yaml:"gcp_bucket"`
}(struct{ GcpBucket string }{GcpBucket: "gcp_bucket"}),
Loader: persistedoperations.LoaderConfig{
Type: "gcp",
Location: "some-bucket",
Reload: struct {
Enabled bool `conf:"default:true" yaml:"enabled"`
Interval time.Duration `conf:"default:5m" yaml:"interval"`
Timeout time.Duration `conf:"default:10s" yaml:"timeout"`
}{
Enabled: true,
Interval: 1 * time.Second,
Timeout: 1 * time.Second,
},
},
RejectOnFailure: false,
},
BlockFieldSuggestions: block_field_suggestions.Config{
Expand Down
Loading

0 comments on commit 5ac2de8

Please sign in to comment.