1
0

initial commit

This commit is contained in:
xpk
2026-02-13 15:44:24 +08:00
parent 66be8224f4
commit 09ce4c881a
570 changed files with 61807 additions and 0 deletions
@@ -0,0 +1,27 @@
# Monitoring module
This module deploys the default cloudwatch metric monitoring
## Notes
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
Unlike other monitoring modules which discovers resources details automatically, EKS pod name need to be supplied to this module.
AWS cli does not provide pod information.
## Example
```terraform
data "aws_eks_clusters" "eks-clusters" {}
module "eks-monitoring" {
cw-alarm-prefix = local.cw-alarm-prefix
for_each = data.aws_eks_clusters.eks-clusters.names
source = "../../modules/ManagementGovernance/Monitoring.EKS"
default-tags = local.default-tags
cluster-name = each.value
eks-namespace = "default"
pod-names = ["depl-nginx", "depl-alpine"]
threshold-pod_cpu_utilization = 85
threshold-pod_memory_utilization = 85
threshold-pod_number_of_container_restarts = 5
actions-enabled = var.actions-enabled
sns-targets = local.sns-targets
}
```
@@ -0,0 +1,69 @@
// The following checks requires container insights
resource "aws_cloudwatch_metric_alarm" "eks-pod_cpu_utilization" {
for_each = toset(var.pod-names)
alarm_name = "${each.value["ecccode"]}:${var.cw-alarm-prefix}:EKS:${var.cluster-name}:${each.value}:${var.settings.alarm1.metric}"
comparison_operator = var.settings.alarm1.comparison_operator
evaluation_periods = var.settings.alarm1.evaluation_periods
metric_name = var.settings.alarm1.metric
period = var.settings.alarm1.period
statistic = var.settings.alarm1.statistic
threshold = var.settings.alarm1.threshold
alarm_description = "EKS:${var.settings.alarm1.metric}"
namespace = "ContainerInsights"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.settings.alarm1.action]
ok_actions = [var.settings.alarm1.action]
dimensions = {
"PodName" = each.value
"ClusterName" = var.cluster-name
"Namespace" = var.eks-namespace
}
}
resource "aws_cloudwatch_metric_alarm" "eks-pod_memory_utilization" {
for_each = toset(var.pod-names)
alarm_name = "${each.value["ecccode"]}:${var.cw-alarm-prefix}:EKS:${var.cluster-name}:${each.value}:${var.settings.alarm2.metric}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "3"
metric_name = var.settings.alarm2.metric
period = var.settings.alarm2.period
statistic = var.settings.alarm2.statistic
threshold = var.settings.alarm2.threshold
alarm_description = "EKS:${var.settings.alarm2.metric}"
namespace = "ContainerInsights"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.settings.alarm2.action]
ok_actions = [var.settings.alarm2.action]
dimensions = {
"PodName" = each.value
"ClusterName" = var.cluster-name
"Namespace" = var.eks-namespace
}
}
resource "aws_cloudwatch_metric_alarm" "eks-pod_number_of_container_restarts" {
for_each = toset(var.pod-names)
alarm_name = "${each.value["ecccode"]}:${var.cw-alarm-prefix}:EKS:${var.cluster-name}:${each.value}:${var.settings.alarm3.metric}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "3"
metric_name = var.settings.alarm3.metric
period = var.settings.alarm3.period
statistic = var.settings.alarm3.statistic
threshold = var.settings.alarm3.threshold
alarm_description = "EKS:${var.settings.alarm3.metric}"
namespace = "ContainerInsights"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.settings.alarm3.action]
ok_actions = [var.settings.alarm3.action]
dimensions = {
"PodName" = each.value
"ClusterName" = var.cluster-name
"Namespace" = var.eks-namespace
}
}
@@ -0,0 +1,9 @@
terraform {
required_version = "~> 1.3.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 4.36.1"
}
}
}
@@ -0,0 +1,8 @@
variable cw-alarm-prefix {}
variable actions-enabled {}
variable cluster-name {}
variable eks-namespace {}
variable pod-names {
type = list
}
variable settings {}