Difference between revisions of "Terraform resource: aws cloudwatch metric alarm"

From wikieduonline
Jump to navigation Jump to search
 
(37 intermediate revisions by 4 users not shown)
Line 1: Line 1:
 
* https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm
 
* https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm
 +
* <code>aws_cloudwatch_metric_alarm</code>
  
<pre>
+
* <code>actions_enabled    = "true" (optional)</code>
resource "aws_cloudwatch_metric_alarm" "db_cpu_utilization_too_high" {
+
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highCPUUtilization")
+
== Official example ==
   comparison_operator = "GreaterThanThreshold"
+
resource "aws_cloudwatch_metric_alarm" "nlb_healthyhosts" {
 +
  alarm_name          = "alarmname"
 +
  comparison_operator = "[[LessThanThreshold]]"
 +
  evaluation_periods  = "1"
 +
  metric_name        = "HealthyHostCount"
 +
  namespace          = "AWS/NetworkELB"
 +
  period              = "60"
 +
  statistic          = "Average"
 +
  threshold          = var.logstash_servers_count
 +
  alarm_description  = "Number of healthy nodes in Target Group"
 +
  [[actions_enabled]]    = "true"
 +
  alarm_actions      = [aws_sns_topic.sns.arn]
 +
  ok_actions          = [aws_sns_topic.sns.arn]
 +
  dimensions = {
 +
    TargetGroup  = aws_lb_target_group.lb-tg.arn_suffix
 +
    LoadBalancer = aws_lb.lb.arn_suffix
 +
  }
 +
}
 +
 
 +
== Examples ==
 +
 
 +
[[LB]]
 +
resource "aws_cloudwatch_metric_alarm" "lb_targets_not_available" {
 +
  [[alarm_name]]          = format("%s-%s", lower("${var.rds_name}"), "db-highCPUUtilization")
 +
  [[comparison_operator]] = "LessThanOrEqualToThreshold"
 +
  evaluation_periods  = "2"
 +
  [[metric_name]]        = "[[CPUUtilization]]"
 +
  [[namespace]]          = "[[AWS/NetworkELB]]"
 +
  period              = "30"
 +
  [[statistic]]          = "[[SampleCount]]"
 +
  threshold          = "0"
 +
  alarm_description  = "No healthy target group available to LB listener"
 +
 +
  dimensions = {
 +
    HealthyHostCount = [[aws_lb]].your-load-balancer-name.id
 +
  }
 +
}
 +
 
 +
== [[AWS/RDS]] ==
 +
[[CPU]]
 +
resource "aws_cloudwatch_metric_alarm" "db_cpu_utilization_too_high" {
 +
   [[alarm_name]]         = format("%s-%s", lower("${var.rds_name}"), "db-highCPUUtilization")
 +
   [[comparison_operator]] = "GreaterThanThreshold"
 
   evaluation_periods  = "5"
 
   evaluation_periods  = "5"
   metric_name        = "CPUUtilization"
+
   [[metric_name]]         = "[[CPUUtilization]]"
   namespace          = "AWS/RDS"
+
   namespace          = "[[AWS/RDS]]"
 
   period              = "120"
 
   period              = "120"
 
   statistic          = "Average"
 
   statistic          = "Average"
 
   threshold          = "80"
 
   threshold          = "80"
 
   alarm_description  = "Average database CPU utilization is too high."
 
   alarm_description  = "Average database CPU utilization is too high."
 
+
 
   dimensions = {
 
   dimensions = {
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
   }
 
   }
}
+
}
  
resource "aws_cloudwatch_metric_alarm" "db_disk_queue_depth_too_high" {
+
[[DiskQueueDepth]]
 +
 +
resource "aws_cloudwatch_metric_alarm" "db_disk_queue_depth_too_high" {
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highDiskQueueDepth")
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highDiskQueueDepth")
 
   comparison_operator = "GreaterThanThreshold"
 
   comparison_operator = "GreaterThanThreshold"
Line 28: Line 73:
 
   threshold          = "10"
 
   threshold          = "10"
 
   alarm_description  = "Average database disk queue depth is too high, performance may be negatively impacted."
 
   alarm_description  = "Average database disk queue depth is too high, performance may be negatively impacted."
 
+
 
   dimensions = {
 
   dimensions = {
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
   }
 
   }
}
+
}
  
resource "aws_cloudwatch_metric_alarm" "db_disk_free_storage_space_too_low" {
+
[[FreeStorageSpace]]
 +
 
 +
resource "aws_cloudwatch_metric_alarm" "db_disk_free_storage_space_too_low" {
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeStorageSpace")
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeStorageSpace")
   comparison_operator = "LessThanThreshold"
+
   comparison_operator = "[[LessThanThreshold]]"
 
   evaluation_periods  = "2"
 
   evaluation_periods  = "2"
 
   metric_name        = "FreeStorageSpace"
 
   metric_name        = "FreeStorageSpace"
Line 48: Line 95:
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
   }
 
   }
}
+
}
  
resource "aws_cloudwatch_metric_alarm" "db_memory_freeable_too_low" {
+
[[FreeableMemory]]
 +
resource "aws_cloudwatch_metric_alarm" "db_memory_freeable_too_low" {
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeableMemory")
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeableMemory")
 
   comparison_operator = "LessThanThreshold"
 
   comparison_operator = "LessThanThreshold"
Line 64: Line 112:
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
   }
 
   }
}
+
}
</pre>
+
 
 +
== ChatGPT examples for [[ALB]] (01/2023) ==
 +
resource "aws_cloudwatch_metric_alarm" "elb_5xx_error_rate" {
 +
  alarm_name                = "elb_5xx_error_rate"
 +
  comparison_operator      = "GreaterThanThreshold"
 +
  evaluation_periods        = "5"
 +
  [[metric_name]]              = "[[HTTPCode_Backend_5XX]]"
 +
  [[namespace]]                = "[[AWS/ApplicationELB]]"
 +
  period                    = "60"
 +
  statistic                = "SampleCount"
 +
  threshold                = "1"
 +
  alarm_description        = "This alarm will notify when the 5XX error rate is greater than 1% for 5 consecutive minutes"
 +
  [[alarm_actions]]            = [aws_sns_topic.example.arn]
 +
  dimensions = {
 +
    LoadBalancer = aws_elbv2_load_balancer.example.name
 +
  }
 +
}
  
 +
resource "aws_cloudwatch_metric_alarm" "elb_response_time" {
 +
  alarm_name                = "elb_response_time"
 +
  comparison_operator      = "GreaterThanThreshold"
 +
  evaluation_periods        = "5"
 +
  metric_name              = "[[TargetResponseTime]]"
 +
  namespace                = "AWS/ApplicationELB"
 +
  period                    = "60"
 +
  statistic                = "[[Average]]"
 +
  threshold                = "2"
 +
  alarm_description        = "This alarm will notify when the average response time is greater than 2 seconds for 5 consecutive data points"
 +
  alarm_actions            = [aws_sns_topic.example.arn]
 +
  dimensions = {
 +
    LoadBalancer = aws_elbv2_load_balancer.example.name
 +
  }
 +
}
  
 +
resource "aws_cloudwatch_metric_alarm" "elb_response_time" {
 +
  alarm_name                = "elb_response_time"
 +
  comparison_operator      = "GreaterThanThreshold"
 +
  evaluation_periods        = "5"
 +
  metric_name              = "[[TargetResponseTime]]"
 +
  namespace                = "AWS/ApplicationELB"
 +
  period                    = "60"
 +
  statistic                = "[[Maximum]]"
 +
  threshold                = "1"
 +
  alarm_description        = "This alarm will notify when the maximum response time is greater than 1 second for 5 consecutive data points"
 +
  alarm_actions            = [aws_sns_topic.example.arn]
 +
  dimensions = {
 +
    LoadBalancer = aws_elbv2_load_balancer.example.name
 +
  }
 +
}
  
 +
resource "aws_cloudwatch_metric_alarm" "elb_response_time_90th_percentile" {
 +
  alarm_name                = "elb_response_time_90th_percentile"
 +
  comparison_operator      = "GreaterThanThreshold"
 +
  evaluation_periods        = "5"
 +
  metric_name              = "TargetResponseTime"
 +
  namespace                = "AWS/ApplicationELB"
 +
  period                    = "60"
 +
  statistic                = "p90"
 +
  threshold                = "1"
 +
  alarm_description        = "This alarm will notify when the 90th percentile of the response time is greater than 1 second for 5 consecutive data points"
 +
  [[alarm_actions]]            = [aws_sns_topic.example.arn]
 +
  dimensions = {
 +
    LoadBalancer = aws_elbv2_load_balancer.example.name
 +
  }
 +
}
  
 +
resource "[[aws_sns_topic]]" "example" {
 +
  name = "example-topic"
 +
}
  
 
== Related ==
 
== Related ==
 
* <code>[[aws cloudwatch metric alarm]]</code>
 
* <code>[[aws cloudwatch metric alarm]]</code>
 
* <code>[[alarm_actions]]</code>
 
* <code>[[alarm_actions]]</code>
 +
* <code>[[aws_sns_topic]]</code>
  
 
== See also ==
 
== See also ==

Latest revision as of 09:12, 20 August 2024

  • actions_enabled = "true" (optional)

Official example[edit]

resource "aws_cloudwatch_metric_alarm" "nlb_healthyhosts" {
 alarm_name          = "alarmname"
 comparison_operator = "LessThanThreshold"
 evaluation_periods  = "1"
 metric_name         = "HealthyHostCount"
 namespace           = "AWS/NetworkELB"
 period              = "60"
 statistic           = "Average"
 threshold           = var.logstash_servers_count
 alarm_description   = "Number of healthy nodes in Target Group"
 actions_enabled     = "true"
 alarm_actions       = [aws_sns_topic.sns.arn]
 ok_actions          = [aws_sns_topic.sns.arn]
 dimensions = {
   TargetGroup  = aws_lb_target_group.lb-tg.arn_suffix
   LoadBalancer = aws_lb.lb.arn_suffix
 }
}

Examples[edit]

LB

resource "aws_cloudwatch_metric_alarm" "lb_targets_not_available" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highCPUUtilization")
 comparison_operator = "LessThanOrEqualToThreshold"
 evaluation_periods  = "2"
 metric_name         = "CPUUtilization"
 namespace           = "AWS/NetworkELB"
 period              = "30"
 statistic           = "SampleCount"
 threshold           = "0"
 alarm_description   = "No healthy target group available to LB listener"

 dimensions = {
   HealthyHostCount = aws_lb.your-load-balancer-name.id
 }
}

AWS/RDS[edit]

CPU

resource "aws_cloudwatch_metric_alarm" "db_cpu_utilization_too_high" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highCPUUtilization")
 comparison_operator = "GreaterThanThreshold"
 evaluation_periods  = "5"
 metric_name         = "CPUUtilization"
 namespace           = "AWS/RDS"
 period              = "120"
 statistic           = "Average"
 threshold           = "80"
 alarm_description   = "Average database CPU utilization is too high."

 dimensions = {
   DBInstanceIdentifier = aws_db_instance.rds_instance.id
 }
}

DiskQueueDepth

resource "aws_cloudwatch_metric_alarm" "db_disk_queue_depth_too_high" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highDiskQueueDepth")
 comparison_operator = "GreaterThanThreshold"
 evaluation_periods  = "2"
 metric_name         = "DiskQueueDepth"
 namespace           = "AWS/RDS"
 period              = "120"
 statistic           = "Average"
 threshold           = "10"
 alarm_description   = "Average database disk queue depth is too high, performance may be negatively impacted."

 dimensions = {
   DBInstanceIdentifier = aws_db_instance.rds_instance.id
 }
}

FreeStorageSpace

resource "aws_cloudwatch_metric_alarm" "db_disk_free_storage_space_too_low" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeStorageSpace")
 comparison_operator = "LessThanThreshold"
 evaluation_periods  = "2"
 metric_name         = "FreeStorageSpace"
 namespace           = "AWS/RDS"
 period              = "120"
 statistic           = "Average"
 threshold           = "10"
 alarm_description   = "Average database free storage space is too low and may fill up soon."
 dimensions = {
   DBInstanceIdentifier = aws_db_instance.rds_instance.id
 }
}

FreeableMemory

resource "aws_cloudwatch_metric_alarm" "db_memory_freeable_too_low" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeableMemory")
 comparison_operator = "LessThanThreshold"
 evaluation_periods  = "2"
 metric_name         = "FreeableMemory"
 namespace           = "AWS/RDS"
 period              = "120"
 statistic           = "Average"
 threshold           = "10"
 alarm_description   = "Average database freeable memory is too low, performance may be negatively impacted."
 dimensions = {
   DBInstanceIdentifier = aws_db_instance.rds_instance.id
 }
}

ChatGPT examples for ALB (01/2023)[edit]

resource "aws_cloudwatch_metric_alarm" "elb_5xx_error_rate" {
 alarm_name                = "elb_5xx_error_rate"
 comparison_operator       = "GreaterThanThreshold"
 evaluation_periods        = "5"
 metric_name               = "HTTPCode_Backend_5XX"
 namespace                 = "AWS/ApplicationELB"
 period                    = "60"
 statistic                 = "SampleCount"
 threshold                 = "1"
 alarm_description         = "This alarm will notify when the 5XX error rate is greater than 1% for 5 consecutive minutes"
 alarm_actions             = [aws_sns_topic.example.arn]
 dimensions = {
   LoadBalancer = aws_elbv2_load_balancer.example.name
 }
}
resource "aws_cloudwatch_metric_alarm" "elb_response_time" {
 alarm_name                = "elb_response_time"
 comparison_operator       = "GreaterThanThreshold"
 evaluation_periods        = "5"
 metric_name               = "TargetResponseTime"
 namespace                 = "AWS/ApplicationELB"
 period                    = "60"
 statistic                 = "Average"
 threshold                 = "2"
 alarm_description         = "This alarm will notify when the average response time is greater than 2 seconds for 5 consecutive data points"
 alarm_actions             = [aws_sns_topic.example.arn]
 dimensions = {
   LoadBalancer = aws_elbv2_load_balancer.example.name
 }
}
resource "aws_cloudwatch_metric_alarm" "elb_response_time" {
 alarm_name                = "elb_response_time"
 comparison_operator       = "GreaterThanThreshold"
 evaluation_periods        = "5"
 metric_name               = "TargetResponseTime"
 namespace                 = "AWS/ApplicationELB"
 period                    = "60"
 statistic                 = "Maximum"
 threshold                 = "1"
 alarm_description         = "This alarm will notify when the maximum response time is greater than 1 second for 5 consecutive data points"
 alarm_actions             = [aws_sns_topic.example.arn]
 dimensions = {
   LoadBalancer = aws_elbv2_load_balancer.example.name
 }
}
resource "aws_cloudwatch_metric_alarm" "elb_response_time_90th_percentile" {
 alarm_name                = "elb_response_time_90th_percentile"
 comparison_operator       = "GreaterThanThreshold"
 evaluation_periods        = "5"
 metric_name               = "TargetResponseTime"
 namespace                 = "AWS/ApplicationELB"
 period                    = "60"
 statistic                 = "p90"
 threshold                 = "1"
 alarm_description         = "This alarm will notify when the 90th percentile of the response time is greater than 1 second for 5 consecutive data points"
 alarm_actions             = [aws_sns_topic.example.arn]
 dimensions = {
   LoadBalancer = aws_elbv2_load_balancer.example.name
 }
}
resource "aws_sns_topic" "example" {
 name = "example-topic"
}

Related[edit]

See also[edit]

Advertising: