Thursday, May 30, 2024

Canary Deployment on GKE with Terraform and Cloud Deploy

 Here's a detailed guide to setting up a Canary deployment on Google Cloud using Terraform and Cloud Deploy. This example demonstrates deploying a sample application image to a Google Kubernetes Engine (GKE) cluster, utilizing Canary deployment strategies to gradually roll out updates




Setting Up Terraform Configuration Files

provider.tf

terraform {
  required_providers {
    google = {
      source  = "hashicorp/google"
      version = "5.18.0"
    }
  }
}

provider "google" {
  project = var.project_id
  region  = var.region
  zone    = var.zone
}

provider "google-beta" {
  project = var.project_id
  region  = var.region
  zone    = var.zone
}


variable.tf

variable "project_id" {
  default = ""
}

variable "region" {
  default = ""
}

variable "zone" {
  default = ""
}

variable "sec_region" {
  default = ""
}

variable "sec_zone" {
  default = ""
}

terraform.tfvars

project_id = <PROYECT-ID>
region     = "us-central1"
zone       = "us-central1-a"
sec_region = "us-west1"
sec_zone   = "us-west1-a"

Service Account Configuration

serviceaccount.tf

resource "google_service_account" "gke_service_account" {
  project      = var.project_id
  account_id   = "gke-service-account-id"
  display_name = "Service Account for Canary-deploy"
}

output "service_account_email" {
  value = google_service_account.gke_service_account.email
}


Network Configuration

network.tf

resource "google_compute_network" "nw1-vpc" {
  name                    = "nw1-vpc"
  auto_create_subnetworks = false
  mtu                     = 1460
}

resource "google_compute_subnetwork" "nw1-subnet1" {
  name                     = "nw1-vpc-sub1-${var.region}"
  network                  = google_compute_network.nw1-vpc.id
  ip_cidr_range            = "10.10.1.0/24"
  region                   = var.region
  private_ip_google_access = true
}

resource "google_compute_subnetwork" "nw1-subnet2" {
  name                     = "nw2-vpc-sub3-euro-west2"
  network                  = google_compute_network.nw1-vpc.id
  ip_cidr_range            = "10.10.2.0/24"
  region                   = "europe-west2"
  private_ip_google_access = true
}

resource "google_compute_firewall" "nw1-ssh-icmp-allow" {
  name    = "nw1-vpc-ssh-allow"
  network = google_compute_network.nw1-vpc.id
  allow {
    protocol = "icmp"
  }
  allow {
    protocol = "tcp"
    ports    = ["22"]
  }
  source_ranges = ["39.33.11.48/32"]
  target_tags   = ["nw1-vpc-ssh-allow"]
  priority      = 1000
}

resource "google_compute_firewall" "nw1-internal-allow" {
  name    = "nw1-vpc-internal-allow"
  network = google_compute_network.nw1-vpc.id
  allow {
    protocol = "icmp"
  }
  allow {
    protocol = "udp"
    ports    = ["0-65535"]
  }
  allow {
    protocol = "tcp"
    ports    = ["0-65535"]
  }
  source_ranges = ["10.10.0.0/16"]
  priority      = 1100
}

resource "google_compute_firewall" "nw1-iap-allow" {
  name    = "nw1-vpc-iap-allow"
  network = google_compute_network.nw1-vpc.id
  allow {
    protocol = "icmp"
  }
  allow {
    protocol = "tcp"
    ports    = ["0-65535"]
  }
  source_ranges = ["35.235.240.0/20"]
  priority      = 1200
}

resource "google_compute_address" "natpip" {
  name   = "ipv4-address"
  region = "europe-west2"
}

resource "google_compute_router" "router1" {
  name    = "nat-router1"
  region  = "europe-west2"
  network = google_compute_network.nw1-vpc.id
  bgp {
    asn = 64514
  }
}

resource "google_compute_router_nat" "nat1" {
  name                               = "natgw1"
  router                             = google_compute_router.router1.name
  region                             = "europe-west2"
  nat_ip_allocate_option             = "MANUAL_ONLY"
  nat_ips                            = [google_compute_address.natpip.self_link]
  source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
  min_ports_per_vm                   = 256
  max_ports_per_vm                   = 512
  log_config {
    enable = true
    filter = "ERRORS_ONLY"
  }
}

resource "google_compute_global_address" "private_ip_address" {
  name          = google_compute_network.nw1-vpc.name
  purpose       = "VPC_PEERING"
  address_type  = "INTERNAL"
  prefix_length = 16
  network       = google_compute_network.nw1-vpc.name
}

resource "google_service_networking_connection" "private_vpc_connection" {
  network                 = google_compute_network.nw1-vpc.id
  service                 = "servicenetworking.googleapis.com"
  reserved_peering_ranges = [google_compute_global_address.private_ip_address.name]
}


IAM Configuration

iam.tf

resource "google_project_iam_member" "member-role" {
  depends_on = [google_service_account.gke_service_account]

  for_each = toset([
    "roles/iam.serviceAccountTokenCreator",
    "roles/clouddeploy.jobRunner",
    "roles/container.developer",
  ])
  role    = each.key
  project = var.project_id
  member  = "serviceAccount:${google_service_account.gke_service_account.email}"
}


GKE Cluster Configuration

main.tf

resource "google_container_cluster" "primary" {
  depends_on = [google_service_account.gke_service_account]

  name     = "canary-quickstart-cluster"
  location = var.region

  deletion_protection      = false
  remove_default_node_pool = true
  initial_node_count       = 1
  enable_shielded_nodes    = true

  node_config {
    service_account = google_service_account.gke_service_account.email
    oauth_scopes = [
      "https://www.googleapis.com/auth/cloud-platform"
    ]
    labels = {
      foo = "bar"
    }
    tags = ["foo", "bar"]
  }

  network    = google_compute_network.nw1-vpc.self_link
  subnetwork = google_compute_subnetwork.nw1-subnet1.self_link
}

resource "google_container_node_pool" "primary_preemptible_nodes" {
  depends_on = [
    google_container_cluster.primary,
    google_service_account.gke_service_account
  ]

  name       = "my-node-pool"
  location   = var.region
  cluster    = google_container_cluster.primary.name
  node_count = 1

  node_config {
    preemptible  = true
    machine_type = "e2-medium"
    service_account = google_service_account.gke_service_account.email
    oauth_scopes = [
      "https://www.googleapis.com/auth/cloud-platform"
    ]
  }
}


Running Terraform Commands

Initialize Terraform:

terraform init

Validate the configuration:


terraform validate

Apply the configuration:


terraform apply -auto-approve


Deploying the Application

Skaffold Configuration

Create a skaffold.yaml file:

apiVersion: skaffold/v4beta7
kind: Config
manifests:
  rawYaml:
  - kubernetes.yaml
deploy:
  kubectl: {}

Kubernetes Manifest


Create a kubernetes.yaml file:

apiVersion: apps/v1
kind: Deployment
metadata:
  name: my-deployment
  labels:
    app: my-app
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: my-app
  template:
    metadata:
      labels:
        app: my-app
    spec:
      containers:
      - name: nginx
        image: my-app-image
---
apiVersion: v1
kind: Service
metadata:
  name: my-service
  namespace: default
spec:
  selector:
    app: my-app
  ports:
    - protocol: TCP
      port: 80

Cloud Deploy Pipeline

Create a clouddeploy.yaml file:

apiVersion: deploy.cloud.google.com/v1
kind: DeliveryPipeline
metadata:
  name: my-canary-demo-app-1
description: main application pipeline
serialPipeline:
  stages:
  - targetId: prod
    profiles: []
    strategy:
      canary:
        runtimeConfig:
          kubernetes:
            serviceNetworking:
              service: "my-service"
              deployment: "my-deployment"
        canaryDeployment:
          percentages: [50]
          verify: false
---
apiVersion: deploy.cloud.google.com/v1
kind: Target
metadata:
  name: prod
description: prod GKE cluster
gke:
  cluster: projects/PROJECT_ID/locations/us-central1/clusters/canary-quickstart-cluster

Registering and Creating Releases

Register the pipeline:

gcloud deploy apply --file=clouddeploy.yaml --region=us-central1 --project=PROJECT_ID







Create the first release (skips canary phase):

gcloud deploy releases create test-release-001 \
  --project=PROJECT_ID \
  --region=us-central1 \
  --delivery-pipeline=my-canary-demo-app-1 \
  --images=my-app-image=gcr.io/google-containers/nginx@sha256:f49a843c290594dcf4d193535d1f4ba8af7d56cea2cf79d1e9554f077f1e7aaa










After the first release, the canary phase was skipped, and the rollout is waiting to start the "stable" phase, which deploys the application to 100%:

  • In the pipeline visualization, click Advance to stable.
  • When prompted, click Advance to confirm.

After a few minutes, the rollout is now in the "stable" phase, and the application is deployed to 100%.










Create the second release (executes canary deployment):

Because the first release skipped the canary phase skipped the canary phase, we'll now create another release, which this time does execute a canary deployment.

gcloud deploy releases create test-release-002 \
  --project=PROJECT_ID \
  --region=us-central1 \
  --delivery-pipeline=my-canary-demo-app-1 \
  --images=my-app-image=gcr.io/google-containers/nginx@sha256:f49a843c290594dcf4d193535d1f4ba8af7d56cea2cf79d1e9554f077f1e7aaa








When the first rollout phase finishes, the rollout is now in the canary phase:










This means that the application is now deployed to 50%. For serviceNetworking-based GKE, it's deployed to half of your pods. For Gateway API-based GKE and Cloud Run traffic is allocated to 50%.

Click Advance Rollout, then click Advance when prompted.





















Monitoring Deployment

View and manage the deployment in the Google Cloud Console under the Cloud Deploy Delivery pipelines page. Follow the instructions to advance the rollout phases as necessary.

This comprehensive guide helps you set up a canary deployment pipeline on GKE using Terraform and Google Cloud Deploy, allowing you to roll out updates gradually and safely.



Source Code:

Here on GitHub.

References:

https://cloud.google.com/deploy/docs/deploy-app-canary#gke_4

https://cloud.google.com/config-connector/docs/how-to/install-upgrade-uninstall














No comments:

Post a Comment

Creating REST APIs with OpenAPI, Spring Boot 3.3.3, Java 21, and Jakarta

 Introduction In today's software landscape, designing robust and scalable REST APIs is a crucial aspect of application development. Wit...