Difference between revisions of "Terraform resource: aws cloudwatch metric alarm"

From wikieduonline
Jump to navigation Jump to search
 
(20 intermediate revisions by 2 users not shown)
Line 1: Line 1:
 
* https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm
 
* https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm
  aws_cloudwatch_metric_alarm
+
* <code>aws_cloudwatch_metric_alarm</code>
 +
 
 +
* <code>actions_enabled    = "true" (optional)</code>
 +
 
 +
== Official example ==
 +
  resource "aws_cloudwatch_metric_alarm" "nlb_healthyhosts" {
 +
  alarm_name          = "alarmname"
 +
  comparison_operator = "[[LessThanThreshold]]"
 +
  evaluation_periods  = "1"
 +
  metric_name        = "HealthyHostCount"
 +
  namespace          = "AWS/NetworkELB"
 +
  period              = "60"
 +
  statistic          = "Average"
 +
  threshold          = var.logstash_servers_count
 +
  alarm_description  = "Number of healthy nodes in Target Group"
 +
  [[actions_enabled]]    = "true"
 +
  alarm_actions      = [aws_sns_topic.sns.arn]
 +
  ok_actions          = [aws_sns_topic.sns.arn]
 +
  dimensions = {
 +
    TargetGroup  = aws_lb_target_group.lb-tg.arn_suffix
 +
    LoadBalancer = aws_lb.lb.arn_suffix
 +
  }
 +
}
  
 
== Examples ==
 
== Examples ==
  
 +
[[LB]]
 +
resource "aws_cloudwatch_metric_alarm" "lb_targets_not_available" {
 +
  [[alarm_name]]          = format("%s-%s", lower("${var.rds_name}"), "db-highCPUUtilization")
 +
  [[comparison_operator]] = "LessThanOrEqualToThreshold"
 +
  evaluation_periods  = "2"
 +
  [[metric_name]]        = "[[CPUUtilization]]"
 +
  [[namespace]]          = "[[AWS/NetworkELB]]"
 +
  period              = "30"
 +
  [[statistic]]          = "[[SampleCount]]"
 +
  threshold          = "0"
 +
  alarm_description  = "No healthy target group available to LB listener"
 +
 +
  dimensions = {
 +
    HealthyHostCount = [[aws_lb]].your-load-balancer-name.id
 +
  }
 +
}
 +
 +
== [[AWS/RDS]] ==
 
[[CPU]]
 
[[CPU]]
 
  resource "aws_cloudwatch_metric_alarm" "db_cpu_utilization_too_high" {
 
  resource "aws_cloudwatch_metric_alarm" "db_cpu_utilization_too_high" {
Line 21: Line 61:
 
  }
 
  }
  
<pre>
+
[[DiskQueueDepth]]
resource "aws_cloudwatch_metric_alarm" "db_disk_queue_depth_too_high" {
+
 +
resource "aws_cloudwatch_metric_alarm" "db_disk_queue_depth_too_high" {
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highDiskQueueDepth")
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highDiskQueueDepth")
 
   comparison_operator = "GreaterThanThreshold"
 
   comparison_operator = "GreaterThanThreshold"
Line 32: Line 73:
 
   threshold          = "10"
 
   threshold          = "10"
 
   alarm_description  = "Average database disk queue depth is too high, performance may be negatively impacted."
 
   alarm_description  = "Average database disk queue depth is too high, performance may be negatively impacted."
 
+
 
   dimensions = {
 
   dimensions = {
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
   }
 
   }
}
+
}
  
resource "aws_cloudwatch_metric_alarm" "db_disk_free_storage_space_too_low" {
+
[[FreeStorageSpace]]
 +
 
 +
resource "aws_cloudwatch_metric_alarm" "db_disk_free_storage_space_too_low" {
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeStorageSpace")
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeStorageSpace")
 
   comparison_operator = "[[LessThanThreshold]]"
 
   comparison_operator = "[[LessThanThreshold]]"
Line 52: Line 95:
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
   }
 
   }
}
+
}
  
resource "aws_cloudwatch_metric_alarm" "db_memory_freeable_too_low" {
+
[[FreeableMemory]]
 +
resource "aws_cloudwatch_metric_alarm" "db_memory_freeable_too_low" {
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeableMemory")
 
   alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeableMemory")
 
   comparison_operator = "LessThanThreshold"
 
   comparison_operator = "LessThanThreshold"
Line 68: Line 112:
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
     DBInstanceIdentifier = aws_db_instance.rds_instance.id
 
   }
 
   }
}
+
}
</pre>
 
  
 
== ChatGPT examples for [[ALB]] (01/2023) ==
 
== ChatGPT examples for [[ALB]] (01/2023) ==
Line 77: Line 120:
 
   evaluation_periods        = "5"
 
   evaluation_periods        = "5"
 
   [[metric_name]]              = "[[HTTPCode_Backend_5XX]]"
 
   [[metric_name]]              = "[[HTTPCode_Backend_5XX]]"
   [[namespace]]                = "AWS/ApplicationELB"
+
   [[namespace]]                = "[[AWS/ApplicationELB]]"
 
   period                    = "60"
 
   period                    = "60"
 
   statistic                = "SampleCount"
 
   statistic                = "SampleCount"
Line 144: Line 187:
 
* <code>[[alarm_actions]]</code>
 
* <code>[[alarm_actions]]</code>
 
* <code>[[aws_sns_topic]]</code>
 
* <code>[[aws_sns_topic]]</code>
* [[Comparison operator]]
 
  
 
== See also ==
 
== See also ==

Latest revision as of 09:12, 20 August 2024

  • actions_enabled = "true" (optional)

Official example[edit]

resource "aws_cloudwatch_metric_alarm" "nlb_healthyhosts" {
 alarm_name          = "alarmname"
 comparison_operator = "LessThanThreshold"
 evaluation_periods  = "1"
 metric_name         = "HealthyHostCount"
 namespace           = "AWS/NetworkELB"
 period              = "60"
 statistic           = "Average"
 threshold           = var.logstash_servers_count
 alarm_description   = "Number of healthy nodes in Target Group"
 actions_enabled     = "true"
 alarm_actions       = [aws_sns_topic.sns.arn]
 ok_actions          = [aws_sns_topic.sns.arn]
 dimensions = {
   TargetGroup  = aws_lb_target_group.lb-tg.arn_suffix
   LoadBalancer = aws_lb.lb.arn_suffix
 }
}

Examples[edit]

LB

resource "aws_cloudwatch_metric_alarm" "lb_targets_not_available" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highCPUUtilization")
 comparison_operator = "LessThanOrEqualToThreshold"
 evaluation_periods  = "2"
 metric_name         = "CPUUtilization"
 namespace           = "AWS/NetworkELB"
 period              = "30"
 statistic           = "SampleCount"
 threshold           = "0"
 alarm_description   = "No healthy target group available to LB listener"

 dimensions = {
   HealthyHostCount = aws_lb.your-load-balancer-name.id
 }
}

AWS/RDS[edit]

CPU

resource "aws_cloudwatch_metric_alarm" "db_cpu_utilization_too_high" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highCPUUtilization")
 comparison_operator = "GreaterThanThreshold"
 evaluation_periods  = "5"
 metric_name         = "CPUUtilization"
 namespace           = "AWS/RDS"
 period              = "120"
 statistic           = "Average"
 threshold           = "80"
 alarm_description   = "Average database CPU utilization is too high."

 dimensions = {
   DBInstanceIdentifier = aws_db_instance.rds_instance.id
 }
}

DiskQueueDepth

resource "aws_cloudwatch_metric_alarm" "db_disk_queue_depth_too_high" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-highDiskQueueDepth")
 comparison_operator = "GreaterThanThreshold"
 evaluation_periods  = "2"
 metric_name         = "DiskQueueDepth"
 namespace           = "AWS/RDS"
 period              = "120"
 statistic           = "Average"
 threshold           = "10"
 alarm_description   = "Average database disk queue depth is too high, performance may be negatively impacted."

 dimensions = {
   DBInstanceIdentifier = aws_db_instance.rds_instance.id
 }
}

FreeStorageSpace

resource "aws_cloudwatch_metric_alarm" "db_disk_free_storage_space_too_low" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeStorageSpace")
 comparison_operator = "LessThanThreshold"
 evaluation_periods  = "2"
 metric_name         = "FreeStorageSpace"
 namespace           = "AWS/RDS"
 period              = "120"
 statistic           = "Average"
 threshold           = "10"
 alarm_description   = "Average database free storage space is too low and may fill up soon."
 dimensions = {
   DBInstanceIdentifier = aws_db_instance.rds_instance.id
 }
}

FreeableMemory

resource "aws_cloudwatch_metric_alarm" "db_memory_freeable_too_low" {
 alarm_name          = format("%s-%s", lower("${var.rds_name}"), "db-lowFreeableMemory")
 comparison_operator = "LessThanThreshold"
 evaluation_periods  = "2"
 metric_name         = "FreeableMemory"
 namespace           = "AWS/RDS"
 period              = "120"
 statistic           = "Average"
 threshold           = "10"
 alarm_description   = "Average database freeable memory is too low, performance may be negatively impacted."
 dimensions = {
   DBInstanceIdentifier = aws_db_instance.rds_instance.id
 }
}

ChatGPT examples for ALB (01/2023)[edit]

resource "aws_cloudwatch_metric_alarm" "elb_5xx_error_rate" {
 alarm_name                = "elb_5xx_error_rate"
 comparison_operator       = "GreaterThanThreshold"
 evaluation_periods        = "5"
 metric_name               = "HTTPCode_Backend_5XX"
 namespace                 = "AWS/ApplicationELB"
 period                    = "60"
 statistic                 = "SampleCount"
 threshold                 = "1"
 alarm_description         = "This alarm will notify when the 5XX error rate is greater than 1% for 5 consecutive minutes"
 alarm_actions             = [aws_sns_topic.example.arn]
 dimensions = {
   LoadBalancer = aws_elbv2_load_balancer.example.name
 }
}
resource "aws_cloudwatch_metric_alarm" "elb_response_time" {
 alarm_name                = "elb_response_time"
 comparison_operator       = "GreaterThanThreshold"
 evaluation_periods        = "5"
 metric_name               = "TargetResponseTime"
 namespace                 = "AWS/ApplicationELB"
 period                    = "60"
 statistic                 = "Average"
 threshold                 = "2"
 alarm_description         = "This alarm will notify when the average response time is greater than 2 seconds for 5 consecutive data points"
 alarm_actions             = [aws_sns_topic.example.arn]
 dimensions = {
   LoadBalancer = aws_elbv2_load_balancer.example.name
 }
}
resource "aws_cloudwatch_metric_alarm" "elb_response_time" {
 alarm_name                = "elb_response_time"
 comparison_operator       = "GreaterThanThreshold"
 evaluation_periods        = "5"
 metric_name               = "TargetResponseTime"
 namespace                 = "AWS/ApplicationELB"
 period                    = "60"
 statistic                 = "Maximum"
 threshold                 = "1"
 alarm_description         = "This alarm will notify when the maximum response time is greater than 1 second for 5 consecutive data points"
 alarm_actions             = [aws_sns_topic.example.arn]
 dimensions = {
   LoadBalancer = aws_elbv2_load_balancer.example.name
 }
}
resource "aws_cloudwatch_metric_alarm" "elb_response_time_90th_percentile" {
 alarm_name                = "elb_response_time_90th_percentile"
 comparison_operator       = "GreaterThanThreshold"
 evaluation_periods        = "5"
 metric_name               = "TargetResponseTime"
 namespace                 = "AWS/ApplicationELB"
 period                    = "60"
 statistic                 = "p90"
 threshold                 = "1"
 alarm_description         = "This alarm will notify when the 90th percentile of the response time is greater than 1 second for 5 consecutive data points"
 alarm_actions             = [aws_sns_topic.example.arn]
 dimensions = {
   LoadBalancer = aws_elbv2_load_balancer.example.name
 }
}
resource "aws_sns_topic" "example" {
 name = "example-topic"
}

Related[edit]

See also[edit]

Advertising: