Skip to content

Commit

Permalink
feat: implement vmss refresh functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
kristina-solovyova committed Jan 12, 2024
1 parent deda7bf commit 3198ead
Show file tree
Hide file tree
Showing 21 changed files with 629 additions and 257 deletions.
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,6 @@ proxy_url = VALUE
| [azurerm_lb_rule.backend_lb_rule](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/lb_rule) | resource |
| [azurerm_lb_rule.ui_lb_rule](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/lb_rule) | resource |
| [azurerm_linux_function_app.function_app](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/linux_function_app) | resource |
| [azurerm_linux_virtual_machine_scale_set.vmss](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/linux_virtual_machine_scale_set) | resource |
| [azurerm_log_analytics_workspace.la_workspace](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/log_analytics_workspace) | resource |
| [azurerm_logic_app_standard.logic_app_standard](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/logic_app_standard) | resource |
| [azurerm_monitor_diagnostic_setting.function_diagnostic_setting](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_diagnostic_setting) | resource |
Expand All @@ -338,7 +337,10 @@ proxy_url = VALUE
| [azurerm_role_assignment.logic_app_standard_reader](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.logic_app_standard_reader_secret](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.logic_app_standard_reader_smb_data](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.managed_identity_operator](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.network_contributor](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.obs_storage_blob_data_contributor](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.reader](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.storage_account_contributor](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.storage_blob_data_contributor](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.storage_blob_data_reader](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
Expand All @@ -347,6 +349,8 @@ proxy_url = VALUE
| [azurerm_storage_account.deployment_sa](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_account) | resource |
| [azurerm_storage_account.logicapp](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_account) | resource |
| [azurerm_storage_blob.state](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_blob) | resource |
| [azurerm_storage_blob.vmss_config](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_blob) | resource |
| [azurerm_storage_blob.vmss_state](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_blob) | resource |
| [azurerm_storage_container.deployment](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_container) | resource |
| [azurerm_storage_share_directory.share_directory_scale_down](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_share_directory) | resource |
| [azurerm_storage_share_directory.share_directory_scale_up](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_share_directory) | resource |
Expand All @@ -356,6 +360,7 @@ proxy_url = VALUE
| [azurerm_subnet.dns_resolver_subnet](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/subnet) | resource |
| [azurerm_subnet.logicapp_subnet_delegation](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/subnet) | resource |
| [azurerm_subnet.subnet_delegation](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/subnet) | resource |
| [azurerm_user_assigned_identity.vmss](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/user_assigned_identity) | resource |
| [local_file.connections_workflow_file](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource |
| [local_file.private_key](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource |
| [local_file.public_key](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource |
Expand Down Expand Up @@ -397,13 +402,13 @@ proxy_url = VALUE
| <a name="input_deployment_storage_account_access_key"></a> [deployment\_storage\_account\_access\_key](#input\_deployment\_storage\_account\_access\_key) | The access key of the existing Blob object store container. | `string` | `""` | no |
| <a name="input_deployment_storage_account_name"></a> [deployment\_storage\_account\_name](#input\_deployment\_storage\_account\_name) | Name of exising deployment storage account | `string` | `""` | no |
| <a name="input_function_access_restriction_enabled"></a> [function\_access\_restriction\_enabled](#input\_function\_access\_restriction\_enabled) | Allow public access, Access restrictions apply to inbound access to internal vent | `bool` | `false` | no |
| <a name="input_function_app_dist"></a> [function\_app\_dist](#input\_function\_app\_dist) | Function app code dist | `string` | `"release"` | no |
| <a name="input_function_app_dist"></a> [function\_app\_dist](#input\_function\_app\_dist) | Function app code dist | `string` | `"dev"` | no |
| <a name="input_function_app_log_level"></a> [function\_app\_log\_level](#input\_function\_app\_log\_level) | Log level for function app (from -1 to 5). See https://github.com/rs/zerolog#leveled-logging | `number` | `1` | no |
| <a name="input_function_app_storage_account_container_prefix"></a> [function\_app\_storage\_account\_container\_prefix](#input\_function\_app\_storage\_account\_container\_prefix) | Weka storage account container name prefix | `string` | `"weka-tf-functions-deployment-"` | no |
| <a name="input_function_app_storage_account_prefix"></a> [function\_app\_storage\_account\_prefix](#input\_function\_app\_storage\_account\_prefix) | Weka storage account name prefix | `string` | `"weka"` | no |
| <a name="input_function_app_subnet_delegation_cidr"></a> [function\_app\_subnet\_delegation\_cidr](#input\_function\_app\_subnet\_delegation\_cidr) | Subnet delegation enables you to designate a specific subnet for an Azure PaaS service. | `string` | `"10.0.1.0/25"` | no |
| <a name="input_function_app_subnet_delegation_id"></a> [function\_app\_subnet\_delegation\_id](#input\_function\_app\_subnet\_delegation\_id) | Required to specify if subnet\_name were used to specify pre-defined subnets for weka. Function subnet delegation requires an additional subnet, and in the case of pre-defined networking this one also should be pre-defined | `string` | `""` | no |
| <a name="input_function_app_version"></a> [function\_app\_version](#input\_function\_app\_version) | Function app code version (hash) | `string` | `"24709e83a002b423746db0ddf4b45512"` | no |
| <a name="input_function_app_version"></a> [function\_app\_version](#input\_function\_app\_version) | Function app code version (hash) | `string` | `"60ec8c21cc5fd2b21d698fa9bb9e69ac"` | no |
| <a name="input_get_weka_io_token"></a> [get\_weka\_io\_token](#input\_get\_weka\_io\_token) | The token to download the Weka release from get.weka.io. | `string` | `""` | no |
| <a name="input_hotspare"></a> [hotspare](#input\_hotspare) | Number of hotspares to set on weka cluster. Refer to https://docs.weka.io/overview/ssd-capacity-management#hot-spare | `number` | `1` | no |
| <a name="input_install_cluster_dpdk"></a> [install\_cluster\_dpdk](#input\_install\_cluster\_dpdk) | Install weka cluster with DPDK | `bool` | `true` | no |
Expand Down
10 changes: 7 additions & 3 deletions blob.tf
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,14 @@ resource "azurerm_storage_blob" "vmss_state" {

source_content = jsonencode({
vmss_created = false
vmss_id = ""
upgrade_needed = false
vmss_version = 0
refresh_status = 0
current_config = {}
})

lifecycle {
ignore_changes = all
}
}

resource "azurerm_storage_blob" "vmss_config" {
Expand All @@ -73,7 +78,6 @@ resource "azurerm_storage_blob" "vmss_config" {
upgrade_mode = "Manual"
health_probe_id = azurerm_lb_probe.backend_lb_probe.id
admin_username = var.vm_username
instances = var.cluster_size
computer_name_prefix = "${var.prefix}-${var.cluster_name}-backend"
custom_data = base64encode(local.custom_data_script)
disable_password_authentication = true
Expand Down
77 changes: 72 additions & 5 deletions function-app/code/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"

Expand Down Expand Up @@ -46,6 +47,34 @@ for d in json.load(sys.stdin)['disks']:
print(d['devPath'])
`

func WriteResponse(w http.ResponseWriter, resData map[string]any, statusCode *int) {
outputs := make(map[string]any)

outputs["res"] = resData
invokeResponse := InvokeResponse{Outputs: outputs, Logs: nil, ReturnValue: nil}

responseJson, _ := json.Marshal(invokeResponse)

w.Header().Set("Content-Type", "application/json")
w.Write(responseJson)
}

func WriteErrorResponse(w http.ResponseWriter, err error) {
resData := make(map[string]any)
resData["body"] = err.Error()

badReqStatus := http.StatusBadRequest
WriteResponse(w, resData, &badReqStatus)
}

func WriteSuccessResponse(w http.ResponseWriter, data any) {
resData := make(map[string]any)
resData["body"] = data

successStatus := http.StatusOK
WriteResponse(w, resData, &successStatus)
}

func leaseContainerAcquire(ctx context.Context, storageAccountName, containerName string, leaseIdIn *string) (leaseIdOut *string, err error) {
logger := logging.LoggerFromCtx(ctx)

Expand Down Expand Up @@ -746,6 +775,18 @@ func GetScaleSetInfo(ctx context.Context, subscriptionId, resourceGroupName, vmS
return &scaleSetInfo, err
}

func GetScaleSetSize(ctx context.Context, subscriptionId, resourceGroupName, vmScaleSetName string) (size int, err error) {
logger := logging.LoggerFromCtx(ctx)

scaleSet, err := getScaleSet(ctx, subscriptionId, resourceGroupName, vmScaleSetName)
if err != nil {
logger.Error().Err(err).Send()
return
}
size = int(*scaleSet.SKU.Capacity)
return
}

// Gets a list of all VMs in a scale set
// see https://learn.microsoft.com/en-us/rest/api/compute/virtual-machine-scale-set-vms/list
func GetScaleSetInstances(ctx context.Context, subscriptionId, resourceGroupName, vmScaleSetName string, expand *string) (vms []*armcompute.VirtualMachineScaleSetVM, err error) {
Expand Down Expand Up @@ -918,8 +959,12 @@ func GetWekaClusterPassword(ctx context.Context, keyVaultUri string) (password s
return GetKeyVaultValue(ctx, keyVaultUri, "weka-password")
}

func GetVmScaleSetName(prefix, clusterName string) string {
return fmt.Sprintf("%s-%s-vmss", prefix, clusterName)
func GetVmScaleSetName(prefix, clusterName string, version uint16) string {
versionStr := ""
if version > 0 {
versionStr = fmt.Sprintf("-v%d", version)
}
return fmt.Sprintf("%s-%s-vmss%s", prefix, clusterName, versionStr)
}

func GetScaleSetInstanceIds(ctx context.Context, subscriptionId, resourceGroupName, vmScaleSetName string) (instanceIds []string, err error) {
Expand Down Expand Up @@ -1131,7 +1176,29 @@ func AssignVmssContributorRoleToFunctionApp(ctx context.Context, subscriptionId,
return nil
}

func CreateVmss(ctx context.Context, subscriptionId, resourceGroupName, vmScaleSetName string, config VMSSConfig) (id *string, err error) {
func DeleteVmss(ctx context.Context, subscriptionId, resourceGroupName, vmScaleSetName string) (err error) {
logger := logging.LoggerFromCtx(ctx)
logger.Info().Msgf("Deleting vmss %s", vmScaleSetName)

credential, err := azidentity.NewDefaultAzureCredential(nil)
if err != nil {
logger.Error().Err(err).Send()
return
}
client, err := armcompute.NewVirtualMachineScaleSetsClient(subscriptionId, credential, nil)
if err != nil {
logger.Error().Err(err).Send()
return
}

_, err = client.BeginDelete(ctx, resourceGroupName, vmScaleSetName, nil)
if err != nil {
logger.Error().Err(err).Send()
}
return
}

func CreateOrUpdateVmss(ctx context.Context, subscriptionId, resourceGroupName, vmScaleSetName string, config VMSSConfig, vmssSize int) (id *string, err error) {
logger := logging.LoggerFromCtx(ctx)

credential, err := azidentity.NewDefaultAzureCredential(nil)
Expand All @@ -1146,7 +1213,7 @@ func CreateVmss(ctx context.Context, subscriptionId, resourceGroupName, vmScaleS
return
}

size := int64(config.Instances)
size := int64(vmssSize)
forceDeletion := false
sshKeyPath := fmt.Sprintf("/home/%s/.ssh/authorized_keys", config.AdminUsername)

Expand Down Expand Up @@ -1303,7 +1370,7 @@ func CreateVmss(ctx context.Context, subscriptionId, resourceGroupName, vmScaleS
}
resp, err := poller.PollUntilDone(ctx, &runtime.PollUntilDoneOptions{Frequency: time.Second})
if err != nil {
err = fmt.Errorf("cannot create vmss: %v", err)
err = fmt.Errorf("cannot create/update vmss: %v", err)
return
}
id = resp.VirtualMachineScaleSet.ID
Expand Down
47 changes: 42 additions & 5 deletions function-app/code/common/vmss_config.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
package common

import "fmt"
import (
"fmt"
"strings"

"github.com/google/go-cmp/cmp"
)

type Identity struct {
IdentityIDs []string `json:"identity_ids"`
Expand Down Expand Up @@ -62,7 +67,6 @@ type VMSSConfig struct {
Zones []string `json:"zones"`
ResourceGroupName string `json:"resource_group_name"`
SKU string `json:"sku"`
Instances int `json:"instances"`
SourceImageID string `json:"source_image_id"`
Tags map[string]*string `json:"tags"`

Expand All @@ -87,10 +91,43 @@ type VMSSConfig struct {
SecondaryNICs SecondaryNICs `json:"secondary_nics"`
}

func VmssConfigsDiff(old, new *VMSSConfig) string {
return cmp.Diff(new, old) // arguments order: (want, got)
}

type RefreshStatus uint8

const (
RefreshNone RefreshStatus = iota
RefreshInProgress RefreshStatus = iota
RefreshNeeded RefreshStatus = iota
)

func (s *RefreshStatus) String() string {
return []string{"none", "in_progress", "needed"}[*s]
}

type VMSSState struct {
VmssCreated bool `json:"vmss_created"`
VmssId string `json:"vmss_id"`
UpgradeNeeded bool `json:"upgrade_needed"`
VmssCreated bool `json:"vmss_created"`
VmssVersion uint16 `json:"vmss_version"`
RefreshStatus RefreshStatus `json:"refresh_status"`
CurrentConfig *VMSSConfig `json:"current_config,omitempty"`
}

func GetRefreshVmssName(outdatedVmssName string, currentVmssVersion uint16) string {
versionStr := fmt.Sprintf("-v%d", currentVmssVersion)
newVersionStr := fmt.Sprintf("-v%d", currentVmssVersion+1)

vmssNameBase := strings.TrimSuffix(outdatedVmssName, versionStr)
return fmt.Sprintf("%s%s", vmssNameBase, newVersionStr)
}

type VMSSStateVerbose struct {
VmssCreated bool `json:"vmss_created"`
VmssName string `json:"vmss_name"`
RefreshStatus string `json:"refresh_status"`
RefreshVmssName string `json:"refresh_vmss_name"`
CurrentConfig *VMSSConfig `json:"current_config,omitempty"`
}

func ToEnumStrValue[T interface{ ~string }](val string, possibleEnumValues []T) (*T, error) {
Expand Down
26 changes: 16 additions & 10 deletions function-app/code/functions/clusterize/clusterize.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type ClusterizationParams struct {
Location string
Prefix string
KeyVaultUri string
VmssVersion uint16

StateContainerName string
StateStorageName string
Expand Down Expand Up @@ -96,7 +97,7 @@ func HandleLastClusterVm(ctx context.Context, state protocol.ClusterState, p Clu
logger := logging.LoggerFromCtx(ctx)
logger.Info().Msg("This is the last instance in the cluster, creating obs and clusterization script")

vmScaleSetName := common.GetVmScaleSetName(p.Prefix, p.Cluster.ClusterName)
vmScaleSetName := common.GetVmScaleSetName(p.Prefix, p.Cluster.ClusterName, p.VmssVersion)

if p.Cluster.SetObs {
if p.Obs.AccessKey == "" {
Expand Down Expand Up @@ -177,7 +178,7 @@ func Clusterize(ctx context.Context, p ClusterizationParams) (clusterizeScript s

instanceName := strings.Split(p.VmName, ":")[0]
instanceId := common.GetScaleSetVmIndex(instanceName)
vmScaleSetName := common.GetVmScaleSetName(p.Prefix, p.Cluster.ClusterName)
vmScaleSetName := common.GetVmScaleSetName(p.Prefix, p.Cluster.ClusterName, p.VmssVersion)
vmName := p.VmName

ip, err := common.GetPublicIp(ctx, p.SubscriptionId, p.ResourceGroupName, vmScaleSetName, p.Prefix, p.Cluster.ClusterName, instanceId)
Expand Down Expand Up @@ -226,6 +227,7 @@ func Clusterize(ctx context.Context, p ClusterizationParams) (clusterizeScript s
func Handler(w http.ResponseWriter, r *http.Request) {
stateContainerName := os.Getenv("STATE_CONTAINER_NAME")
stateStorageName := os.Getenv("STATE_STORAGE_NAME")
vmssStateStorageName := os.Getenv("VMSS_STATE_STORAGE_NAME")
hostsNum, _ := strconv.Atoi(os.Getenv("HOSTS_NUM"))
clusterName := os.Getenv("CLUSTER_NAME")
subscriptionId := os.Getenv("SUBSCRIPTION_ID")
Expand Down Expand Up @@ -255,7 +257,6 @@ func Handler(w http.ResponseWriter, r *http.Request) {
addFrontend = true
}

outputs := make(map[string]interface{})
resData := make(map[string]interface{})
var invokeRequest common.InvokeRequest

Expand Down Expand Up @@ -283,12 +284,21 @@ func Handler(w http.ResponseWriter, r *http.Request) {
return
}

vmssState, err := common.ReadVmssState(ctx, vmssStateStorageName, stateContainerName)
if err != nil {
err = fmt.Errorf("cannot read vmss state to read get vmss version: %v", err)
logger.Error().Err(err).Send()
common.WriteErrorResponse(w, err)
return
}

params := ClusterizationParams{
SubscriptionId: subscriptionId,
ResourceGroupName: resourceGroupName,
Location: location,
Prefix: prefix,
KeyVaultUri: keyVaultUri,
VmssVersion: vmssState.VmssVersion,
StateContainerName: stateContainerName,
StateStorageName: stateStorageName,
VmName: data.Vm,
Expand Down Expand Up @@ -317,19 +327,15 @@ func Handler(w http.ResponseWriter, r *http.Request) {
FunctionAppName: functionAppName,
}

status := http.StatusOK
if data.Vm == "" {
msg := "Cluster name wasn't supplied"
logger.Error().Msgf(msg)
resData["body"] = msg
status = http.StatusBadRequest
} else {
clusterizeScript := Clusterize(ctx, params)
resData["body"] = clusterizeScript
}
outputs["res"] = resData
invokeResponse := common.InvokeResponse{Outputs: outputs, Logs: nil, ReturnValue: nil}

responseJson, _ := json.Marshal(invokeResponse)

w.Header().Set("Content-Type", "application/json")
w.Write(responseJson)
common.WriteResponse(w, resData, &status)
}
Loading

0 comments on commit 3198ead

Please sign in to comment.