Skip to content

Commit

Permalink
Read resource state after each Create and Update operation to avoid s…
Browse files Browse the repository at this point in the history
…purious diffs (#3042)

The code change in this PR is fairly small but it does change the
fundamental flow of the requests that the provider makes to Azure API,
it's worth a careful consideration. The goal is to minimize the diffs
between `pulumi up` and `pulumi refresh`.

### Current situation

We use PUT operations for both resource creation and updates. The
provider's Create() and Update() methods call the PUT endpoint
associated with a resources and awaits for HTTP response, that may come
immediately of after the long-running operation (LRO) succeeds. Still,
logically, it's a response from the PUT request, as it's defined in Open
API specs and implemented in the actual API. We calculate resource
outputs from that response.

When we refresh (or import) resources, the provider's Read() method
calls the GET (sometimes it's POST) endpoint of the same resource. It
then calculates resource outputs and inputs from it. Open API
(potentially) defines the type of GET response separately from PUT
response.

Our implicit assumption here is that actually PUT response and GET
response have the same shape and the same data for the same resource.
This assumption is mostly okay, but not perfect. As #2245 and #2798
report, and `RequireEmptyPreviewAfterRefresh=false` tests demonstrate,
the actual payloads of PUT vs. GET may differ. In particular, I noticed
the following scenario:

1. Some resource properties have default values that are assigned by
Azure.
2. A user does not set any value for such property.
3. PUT response still has no default value.
4. GET response does return the default value.
5. Refresh finds the PUT vs GET discrepancy and updates state.
6. The next Update thinks those values were removed from the code, and
suggests updating them.
7. Even though it's a no-op on Azure, it's a diff that the users sees
and get confused.

### Proposed change

This PR amends the flow of Create() and Update() by an extra GET call
after successful PUT operation. The response of that extra GET is what
is used to calculate resource outputs. This guarantees that the shapes
and data of Create/Update outputs and Read outputs are going to be the
same. It resolves the problematic flow above, as proved by test changes.

The only downside that I can see is that we do an extra GET request for
each resource change, which may slow things down. However, we observe
that GET endpoints are much faster than PUTs, especially for LRO-based
resources. I assert that the correctness win has exceedingly high values
compared to this minor slowdown.

Resolve #2798
Resolve #2245
  • Loading branch information
mikhailshilkov authored Feb 9, 2024
1 parent d71ffb8 commit 151c80b
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 50 deletions.
14 changes: 4 additions & 10 deletions examples/appservice/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,8 @@ new web.WebAppMetadata("meta", {
properties: {
CURRENT_STACK: "dotnetcore",
},
// These properties are set by resources below, and thus should be ignored here for clean updates.
}, { ignoreChanges: [
"properties.CloneUri",
"properties.RepoUrl",
"properties.ScmOperationId",
"properties.ScmOperationTime",
"properties.ScmUri",
"properties.subscription",
]});
// These properties conflict with other resources in this stack, and thus should be ignored here for clean updates.
}, { ignoreChanges: [ "properties.*" ]});

new web.WebAppApplicationSettings("settings", {
resourceGroupName: resourceGroup.name,
Expand All @@ -137,7 +130,8 @@ new web.WebAppApplicationSettings("settings", {
"ApplicationInsights:InstrumentationKey": appInsights.instrumentationKey?.apply(v => v!),
"test": "Test"
},
});
// These properties conflict with other resources in this stack, and thus should be ignored here for clean updates.
}, { ignoreChanges: [ "properties.*" ]});

new web.WebAppConnectionStrings("conns", {
resourceGroupName: resourceGroup.name,
Expand Down
1 change: 0 additions & 1 deletion examples/examples_dotnet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ func TestAccSql(t *testing.T) {
Dir: filepath.Join(getCwd(t), "cs-sql"),
})

test.RequireEmptyPreviewAfterRefresh = false
integration.ProgramTest(t, &test)
}

Expand Down
2 changes: 0 additions & 2 deletions examples/examples_nodejs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ func TestAccAppServiceTs(t *testing.T) {
Dir: filepath.Join(getCwd(t), "appservice"),
})

test.RequireEmptyPreviewAfterRefresh = false
integration.ProgramTest(t, &test)
}

Expand All @@ -43,7 +42,6 @@ func TestAccSimpleTs(t *testing.T) {
Dir: filepath.Join(getCwd(t), "simple"),
})

test.RequireEmptyPreviewAfterRefresh = false
integration.ProgramTest(t, &test)
}

Expand Down
5 changes: 1 addition & 4 deletions examples/py-loadbalancer-subresource/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@
"public_ip_address": {
"id": public_ip.id,
},
}],
# Can be removed after https://github.com/pulumi/pulumi-azure-native/issues/3049 is fixed.
opts=ResourceOptions(ignore_changes=["backendAddressPools"])
)
}])

# Create the BackendAddressPool as a separate resource rather than a property of the load balancer.
backend_address_pool1 = network.LoadBalancerBackendAddressPool(
Expand Down
4 changes: 1 addition & 3 deletions examples/py-nsg/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
# Create a new NetworkSecurityGroup
nsg = network.NetworkSecurityGroup('network_security_group',
resource_group_name=resource_group.name,
location=resource_group.location,
# Can be removed after https://github.com/pulumi/pulumi-azure-native/issues/3049 is fixed.
opts=ResourceOptions(ignore_changes=["securityRules"]))
location=resource_group.location)

# Create a default SecurityRule
rule = network.SecurityRule('security_rule',
Expand Down
3 changes: 1 addition & 2 deletions examples/vnet-subnets-resolution/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ const externalVnet = new network.VirtualNetwork("external", {
tags: {
"step": "1",
},
// Can be removed after https://github.com/pulumi/pulumi-azure-native/issues/3049 is fixed.
}, { ignoreChanges: ["subnets"] });
});

const externalSubnet = new network.Subnet("default", {
resourceGroupName: resourceGroup.name,
Expand Down
3 changes: 1 addition & 2 deletions examples/vnet-subnets-resolution/step2/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ const externalVnet = new network.VirtualNetwork("external", {
// Updating tags causes a resource update that should NOT try to delete subnets.
"step": "2",
},
// Can be removed after https://github.com/pulumi/pulumi-azure-native/issues/3049 is fixed.
}, { ignoreChanges: ["subnets"] });
});

const externalSubnet = new network.Subnet("default", {
resourceGroupName: resourceGroup.name,
Expand Down
98 changes: 72 additions & 26 deletions provider/pkg/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,23 @@ const (
type azureNativeProvider struct {
rpc.UnimplementedResourceProviderServer

azureClient azure.AzureClient
host *provider.HostClient
name string
version string
subscriptionID string
environment azureEnv.Environment
// client autorest.Client
resourceMap *resources.PartialAzureAPIMetadata
config map[string]string
schemaBytes []byte
metadataBytes []byte
fullPkgSpec *schema.PackageSpec
fullResourceMap *resources.AzureAPIMetadata
converter *convert.SdkShapeConverter
customResources map[string]*customresources.CustomResource
rgLocationMap map[string]string
lookupType resources.TypeLookupFunc
azureClient azure.AzureClient
host *provider.HostClient
name string
version string
subscriptionID string
environment azureEnv.Environment
resourceMap *resources.PartialAzureAPIMetadata
config map[string]string
schemaBytes []byte
metadataBytes []byte
fullPkgSpec *schema.PackageSpec
fullResourceMap *resources.AzureAPIMetadata
converter *convert.SdkShapeConverter
customResources map[string]*customresources.CustomResource
rgLocationMap map[string]string
lookupType resources.TypeLookupFunc
skipReadOnUpdate bool
}

func makeProvider(host *provider.HostClient, name, version string, schemaBytes []byte,
Expand Down Expand Up @@ -231,6 +231,8 @@ func (k *azureNativeProvider) Configure(ctx context.Context,
return nil, fmt.Errorf("initializing custom resources: %w", err)
}

k.skipReadOnUpdate = k.getConfig("skipReadOnUpdate", "ARM_SKIP_READ_ON_UPDATE") == "true"

return &rpc.ConfigureResponse{
SupportsPreview: true,
}, nil
Expand Down Expand Up @@ -933,6 +935,23 @@ func (k *azureNativeProvider) Create(ctx context.Context, req *rpc.CreateRequest
id = azureId
}

// Read the state of the resource from its Read endpoint. While we already have resource state from
// the PUT reponse, it may not match precisely the shape of Read reponses that we also use for Refresh
// operations. That discrepancy may cause noisy diffs that are hard for users to reconcile.
if !k.skipReadOnUpdate {
readResponse, err := k.azureRead(ctx, res.ReadMethod, id+res.ReadPath, res.APIVersion)
if err != nil {
// Since this read operation was introduced in a minor version of the provider, we choose to ignore
// any errors here to avoid user-facing regressions. If no warnings are reported, we should be able
// to promote this situation to an error.
k.host.Log(ctx, diag.Warning, urn, `Failed to read resource after Create. Please report this issue.
Verbose logs contain more information, see https://www.pulumi.com/docs/support/troubleshooting/#verbose-logging.`)
logging.V(9).Infof("failed read resource %q after creation: %s", id, err.Error())
} else if readResponse != nil {
response = readResponse
}
}

// Map the raw response to the shape of outputs that the SDKs expect.
outputs = k.converter.ResponseBodyToSdkOutputs(res.Response, response)
}
Expand Down Expand Up @@ -1058,15 +1077,8 @@ func (k *azureNativeProvider) Read(ctx context.Context, req *rpc.ReadRequest) (*
err = k.azureClient.Head(ctx, url, res.APIVersion)
response = oldState.Mappable()
outputs = k.converter.ResponseBodyToSdkOutputs(res.Response, response)
case res.ReadMethod == "POST":
bodyParams := map[string]interface{}{}
queryParams := map[string]interface{}{
"api-version": res.APIVersion,
}
response, err = k.azureClient.Post(ctx, url, bodyParams, queryParams)
outputs = k.converter.ResponseBodyToSdkOutputs(res.Response, response)
default:
response, err = k.azureClient.Get(ctx, url, res.APIVersion)
response, err = k.azureRead(ctx, res.ReadMethod, url, res.APIVersion)
outputs = k.converter.ResponseBodyToSdkOutputs(res.Response, response)
}
if err != nil {
Expand Down Expand Up @@ -1177,7 +1189,8 @@ func (k *azureNativeProvider) resetUnsetSubResourceProperties(ctx context.Contex
result, ok := copy.(map[string]interface{})
if !ok {
// This should never happen.
k.host.Log(ctx, diag.Warning, urn, "Failed to remove unset sub-resource properties. Please report this issue. See verbose logs for more information.")
k.host.Log(ctx, diag.Warning, urn, `Failed to remove unset sub-resource properties. Please report this issue.
Verbose logs contain more information, see https://www.pulumi.com/docs/support/troubleshooting/#verbose-logging.`)
logging.V(9).Infof("failed to remove unset sub-resource properties: failed to cast copy value, expected map[string]interface{}, found %v", copy)
return sdkResponse
}
Expand Down Expand Up @@ -1290,6 +1303,23 @@ func (k *azureNativeProvider) Update(ctx context.Context, req *rpc.UpdateRequest
return nil, azure.AzureError(err)
}

// Read the state of the resource from its Read endpoint. While we already have resource state from
// the PUT reponse, it may not match precisely the shape of Read reponses that we also use for Refresh
// operations. That discrepancy may cause noisy diffs that are hard for users to reconcile.
if !k.skipReadOnUpdate {
readResponse, err := k.azureRead(ctx, res.ReadMethod, id+res.ReadPath, res.APIVersion)
if err != nil {
// Since this read operation was introduced in a minor version of the provider, we choose to ignore
// any errors here to avoid user-facing regressions. If no warnings are reported, we should be able
// to promote this situation to an error.
k.host.Log(ctx, diag.Warning, urn, `Failed to read resource after Update. Please report this issue.
Verbose logs contain more information, see https://www.pulumi.com/docs/support/troubleshooting/#verbose-logging.`)
logging.V(9).Infof("failed read resource %q after creation: %s", id, err.Error())
} else if readResponse != nil {
response = readResponse
}
}

// Map the raw response to the shape of outputs that the SDKs expect.
outputs = k.converter.ResponseBodyToSdkOutputs(res.Response, response)
}
Expand Down Expand Up @@ -1525,6 +1555,22 @@ func (k *azureNativeProvider) findUnsetPropertiesToMaintain(res *resources.Azure
return missingProperties
}

func (k *azureNativeProvider) azureRead(ctx context.Context, readMethod, url, apiVersion string) (map[string]interface{}, error) {
switch readMethod {
case "HEAD":
err := k.azureClient.Head(ctx, url, apiVersion)
return nil, err
case "POST":
bodyParams := map[string]interface{}{}
queryParams := map[string]interface{}{
"api-version": apiVersion,
}
return k.azureClient.Post(ctx, url, bodyParams, queryParams)
default:
return k.azureClient.Get(ctx, url, apiVersion)
}
}

func (k *azureNativeProvider) prepareAzureRESTInputs(path string, parameters []resources.AzureAPIParameter, requiredContainers gen.RequiredContainers, methodInputs,
clientInputs map[string]interface{}) (string, map[string]interface{}, map[string]interface{}, error) {
// Schema-driven mapping of inputs into Autorest id/body/query
Expand Down

0 comments on commit 151c80b

Please sign in to comment.