initial commit
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
<!-- This readme file is generated with terraform-docs -->
|
||||
This module installs Cloudwatch agent via SSM State Manager.
|
||||
It creates an association and install the agent to all instances every 1 day.
|
||||
|
||||
Then a default cloudwatch agent config is generated using amazon-cloudwatch-agent-config-wizard,
|
||||
saved on /opt/aws/amazon-cloudwatch-agent/bin/config.json, supplemented with additional collections,
|
||||
and uploaded on SSM parameter store as ```AmazonCloudWatch-linux```.
|
||||
|
||||
Note that for cloudwatch agent to fully function, the instance needs an instance profile with the
|
||||
following managed policies attached:
|
||||
|
||||
* CloudWatchAgentServerPolicy
|
||||
* AmazonSSMManagedInstanceCore
|
||||
|
||||
## Requirements
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| terraform | >= 1.3.0 |
|
||||
| aws | >= 5.0 |
|
||||
|
||||
## Providers
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| aws | >= 5.0 |
|
||||
|
||||
## Modules
|
||||
|
||||
No modules.
|
||||
|
||||
## Resources
|
||||
|
||||
| Name | Type |
|
||||
|------|------|
|
||||
| [aws_ssm_association.ConfigCwAgent](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ssm_association) | resource |
|
||||
| [aws_ssm_association.InstallCwAgent](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ssm_association) | resource |
|
||||
| [aws_ssm_parameter.CwAgentConfigLinux](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ssm_parameter) | resource |
|
||||
|
||||
## Inputs
|
||||
|
||||
No inputs.
|
||||
|
||||
## Outputs
|
||||
|
||||
No outputs.
|
||||
|
||||
---
|
||||
## Authorship
|
||||
This module was developed by UPDATE_THIS.
|
||||
@@ -0,0 +1,135 @@
|
||||
resource "aws_ssm_association" "InstallCwAgent" {
|
||||
name = "AWS-ConfigureAWSPackage"
|
||||
association_name = "CwAgentInstall"
|
||||
schedule_expression = "cron(0 00 01 ? * * *)"
|
||||
max_concurrency = 10
|
||||
parameters = {
|
||||
name = "AmazonCloudWatchAgent"
|
||||
action = "Install"
|
||||
installationType = "Uninstall and reinstall"
|
||||
additionalArguments = "{}"
|
||||
}
|
||||
targets {
|
||||
key = "InstanceIds"
|
||||
values = ["*"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_ssm_association" "ConfigCwAgent" {
|
||||
name = "AmazonCloudWatch-ManageAgent"
|
||||
association_name = "CwAgentConfiguration"
|
||||
schedule_expression = "cron(0 00 02 ? * * *)"
|
||||
max_concurrency = 10
|
||||
parameters = {
|
||||
action = "configure"
|
||||
optionalConfigurationLocation = "AmazonCloudWatch-linux"
|
||||
optionalConfigurationSource = "ssm"
|
||||
mode = "ec2"
|
||||
optionalRestart = "yes"
|
||||
}
|
||||
targets {
|
||||
key = "InstanceIds"
|
||||
values = ["*"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_ssm_parameter" "CwAgentConfigLinux" {
|
||||
name = "AmazonCloudWatch-linux"
|
||||
description = "Cloudwatch agent Standard config for Linux"
|
||||
type = "String"
|
||||
value = local.CwAgentLinuxConfig
|
||||
}
|
||||
|
||||
locals {
|
||||
CwAgentLinuxConfig = jsonencode(
|
||||
{
|
||||
"agent" : {
|
||||
"metrics_collection_interval" : 60,
|
||||
"run_as_user" : "root"
|
||||
},
|
||||
"metrics" : {
|
||||
"aggregation_dimensions" : [
|
||||
[
|
||||
"InstanceId"
|
||||
]
|
||||
],
|
||||
"append_dimensions" : {
|
||||
"AutoScalingGroupName" : "$${aws:AutoScalingGroupName}",
|
||||
"ImageId" : "$${aws:ImageId}",
|
||||
"InstanceId" : "$${aws:InstanceId}",
|
||||
"InstanceType" : "$${aws:InstanceType}"
|
||||
},
|
||||
"metrics_collected" : {
|
||||
"cpu" : {
|
||||
"measurement" : [
|
||||
"cpu_usage_idle",
|
||||
"cpu_usage_iowait",
|
||||
"cpu_usage_user",
|
||||
"cpu_usage_system"
|
||||
],
|
||||
"metrics_collection_interval" : 60,
|
||||
"resources" : [
|
||||
"*"
|
||||
],
|
||||
"totalcpu" : false
|
||||
},
|
||||
"disk" : {
|
||||
"measurement" : [
|
||||
"used_percent",
|
||||
"inodes_free"
|
||||
],
|
||||
"metrics_collection_interval" : 60,
|
||||
"resources" : [
|
||||
"*"
|
||||
],
|
||||
"ignore_file_system_types" : [
|
||||
"devtmpfs",
|
||||
"overlay",
|
||||
"sysfs",
|
||||
"tmpfs"
|
||||
]
|
||||
},
|
||||
"diskio" : {
|
||||
"measurement" : [
|
||||
"io_time"
|
||||
],
|
||||
"metrics_collection_interval" : 60,
|
||||
"resources" : [
|
||||
"*"
|
||||
]
|
||||
},
|
||||
"mem" : {
|
||||
"measurement" : [
|
||||
"mem_used_percent"
|
||||
],
|
||||
"metrics_collection_interval" : 60
|
||||
},
|
||||
"statsd" : {
|
||||
"metrics_aggregation_interval" : 60,
|
||||
"metrics_collection_interval" : 10,
|
||||
"service_address" : ":8125"
|
||||
},
|
||||
"swap" : {
|
||||
"measurement" : [
|
||||
"swap_used_percent"
|
||||
],
|
||||
"metrics_collection_interval" : 60
|
||||
},
|
||||
"net": {
|
||||
"measurement": [
|
||||
"net_err_in",
|
||||
"net_err_out"
|
||||
],
|
||||
"metrics_collection_interval": 60
|
||||
},
|
||||
"processes": {
|
||||
"measurement": [
|
||||
"processes_total"
|
||||
],
|
||||
"metrics_collection_interval": 60
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = ">= 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
<!-- This readme file is generated with terraform-docs -->
|
||||
|
||||
This module configure CloudwatchLog and stream logs to s3 bucket via Kinesis Firehose
|
||||
|
||||
## Requirements
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| terraform | ~> 1.3.0 |
|
||||
| aws | >= 5.0 |
|
||||
|
||||
## Providers
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| aws | >= 5.0 |
|
||||
| random | n/a |
|
||||
|
||||
## Modules
|
||||
|
||||
No modules.
|
||||
|
||||
## Resources
|
||||
|
||||
| Name | Type |
|
||||
|------|------|
|
||||
| [aws_cloudwatch_log_group.firehose-log](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
|
||||
| [aws_cloudwatch_log_subscription_filter.cwl-sub-filter](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_subscription_filter) | resource |
|
||||
| [aws_iam_policy.cwlog-role-policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
|
||||
| [aws_iam_policy.firehose-role-policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
|
||||
| [aws_iam_role.cwlog-stream-role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
|
||||
| [aws_iam_role.firehose-stream-iam-role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
|
||||
| [aws_iam_role_policy_attachment.cwlog-role-policy-attachment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
|
||||
| [aws_iam_role_policy_attachment.firehose-role-policy-attachment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
|
||||
| [aws_kinesis_firehose_delivery_stream.cwl-s3-firehose-stream](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kinesis_firehose_delivery_stream) | resource |
|
||||
| [random_id.rid](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/id) | resource |
|
||||
| [aws_caller_identity.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|------|---------|:--------:|
|
||||
| cwl-region | AWS region where Cloudwatch LogGroup resides. Needed for setting up cwlog-stream-role | `string` | n/a | yes |
|
||||
| dest-bucket-arn | Destination S3 bucket ARN | `string` | n/a | yes |
|
||||
| dest-bucket-kmskey-arn | KMS key ARN for destination bucket | `string` | n/a | yes |
|
||||
| dest-bucket-prefix | S3 object prefix for this stream. Please do not start with / end with a /. For example, r53-log/acme.local/ | `string` | n/a | yes |
|
||||
| enable-firehose-errorlog | Enable firehose errorlog | `bool` | `false` | no |
|
||||
| firehose-kmskey-arn | KMS Key arn for Firehose | `string` | n/a | yes |
|
||||
| source-cwlgroup-name | Name of source CloudwatchLog group | `string` | n/a | yes |
|
||||
| stream-name | Name of Kinesis Data Firehose delivery stream | `string` | n/a | yes |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| cloudwatchstream-iam-role-arn | n/a |
|
||||
| firehose-iam-role-arn | n/a |
|
||||
|
||||
---
|
||||
## Authorship
|
||||
This module was developed by Rackspace.
|
||||
@@ -0,0 +1,162 @@
|
||||
resource "aws_kinesis_firehose_delivery_stream" "cwl-s3-firehose-stream" {
|
||||
name = var.stream-name
|
||||
destination = "extended_s3"
|
||||
|
||||
extended_s3_configuration {
|
||||
role_arn = aws_iam_role.firehose-stream-iam-role.arn
|
||||
bucket_arn = var.dest-bucket-arn
|
||||
prefix = trimprefix(var.dest-bucket-prefix, "/")
|
||||
error_output_prefix = "FirehoseErrors/"
|
||||
kms_key_arn = var.dest-bucket-kmskey-arn
|
||||
compression_format = "GZIP"
|
||||
cloudwatch_logging_options {
|
||||
enabled = var.enable-firehose-errorlog
|
||||
log_group_name = try(aws_cloudwatch_log_group.firehose-log[0].name, null)
|
||||
log_stream_name = "DestinationDelivery"
|
||||
}
|
||||
}
|
||||
server_side_encryption {
|
||||
enabled = true
|
||||
key_type = "CUSTOMER_MANAGED_CMK"
|
||||
key_arn = var.firehose-kmskey-arn
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_log_group" "firehose-log" {
|
||||
count = var.enable-firehose-errorlog ? 1 : 0
|
||||
name = "/aws/kinesisfirehose/${var.stream-name}"
|
||||
retention_in_days = 365
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_log_subscription_filter" "cwl-sub-filter" {
|
||||
log_group_name = var.source-cwlgroup-name
|
||||
name = "stream-to-s3"
|
||||
role_arn = aws_iam_role.cwlog-stream-role.arn
|
||||
filter_pattern = ""
|
||||
destination_arn = aws_kinesis_firehose_delivery_stream.cwl-s3-firehose-stream.arn
|
||||
}
|
||||
|
||||
resource "random_id" "rid" {
|
||||
byte_length = 4
|
||||
}
|
||||
|
||||
resource "aws_iam_role" "firehose-stream-iam-role" {
|
||||
name = "firehose-stream-role-${var.stream-name}-${random_id.rid.dec}"
|
||||
description = "Kinesis Firehose IAM role for streaming logs from CloudwatchLog to S3"
|
||||
assume_role_policy = jsonencode(
|
||||
{
|
||||
"Version" : "2012-10-17",
|
||||
"Statement" : [
|
||||
{
|
||||
"Sid" : "FirehoseStreaming",
|
||||
"Effect" : "Allow",
|
||||
"Principal" : {
|
||||
"Service" : "firehose.amazonaws.com"
|
||||
},
|
||||
"Action" : "sts:AssumeRole"
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "firehose-role-policy-attachment" {
|
||||
role = aws_iam_role.firehose-stream-iam-role.name
|
||||
policy_arn = aws_iam_policy.firehose-role-policy.arn
|
||||
}
|
||||
|
||||
resource "aws_iam_policy" "firehose-role-policy" {
|
||||
name = "kinesis-firehose-log-stream-${var.stream-name}-${random_id.rid.dec}"
|
||||
description = "Policy for Kinesis Firehose streaming logs to s3"
|
||||
policy = jsonencode(
|
||||
{
|
||||
"Version" : "2012-10-17",
|
||||
"Statement" : [
|
||||
{
|
||||
"Effect" : "Allow",
|
||||
"Action" : [
|
||||
"s3:AbortMultipartUpload",
|
||||
"s3:GetBucketLocation",
|
||||
"s3:GetObject",
|
||||
"s3:ListBucket",
|
||||
"s3:ListBucketMultipartUploads",
|
||||
"s3:PutObject"
|
||||
],
|
||||
"Resource" : [
|
||||
var.dest-bucket-arn,
|
||||
"${var.dest-bucket-arn}/*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"Effect" : "Allow",
|
||||
"Action" : [
|
||||
"kms:Decrypt",
|
||||
"kms:GenerateDataKey"
|
||||
],
|
||||
"Resource" : [
|
||||
var.dest-bucket-kmskey-arn
|
||||
]
|
||||
},
|
||||
{
|
||||
"Effect" : "Allow",
|
||||
"Action" : [
|
||||
"logs:PutLogEvents",
|
||||
"logs:PutLogEventsBatch",
|
||||
"logs:CreateLogStream"
|
||||
],
|
||||
"Resource" : [
|
||||
"arn:aws:logs:*:*:log-group:/aws/kinesisfirehose/${var.stream-name}/*"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
resource "aws_iam_role" "cwlog-stream-role" {
|
||||
name = "cloudwatchlog-stream-role-${var.stream-name}-${random_id.rid.dec}"
|
||||
description = "CloudwatchLog role for streaming to firehose"
|
||||
assume_role_policy = jsonencode(
|
||||
{
|
||||
"Version" : "2012-10-17",
|
||||
"Statement" : [
|
||||
{
|
||||
"Sid" : "CloudwatchLogStreaming",
|
||||
"Effect" : "Allow",
|
||||
"Principal" : {
|
||||
"Service" : "logs.${var.cwl-region}.amazonaws.com"
|
||||
},
|
||||
"Action" : "sts:AssumeRole"
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "cwlog-role-policy-attachment" {
|
||||
role = aws_iam_role.cwlog-stream-role.name
|
||||
policy_arn = aws_iam_policy.cwlog-role-policy.arn
|
||||
}
|
||||
|
||||
resource "aws_iam_policy" "cwlog-role-policy" {
|
||||
name = "cloudwatchlog-stream-${var.stream-name}-${random_id.rid.dec}"
|
||||
description = "Policy for CloudWatch Logs streaming to Kinesis Firehose"
|
||||
policy = jsonencode(
|
||||
{
|
||||
"Version" : "2012-10-17",
|
||||
"Statement" : [
|
||||
{
|
||||
"Effect" : "Allow",
|
||||
"Action" : ["firehose:PutRecord"],
|
||||
"Resource" : [
|
||||
"arn:aws:firehose:${var.cwl-region}:${data.aws_caller_identity.this.account_id}:deliverystream/${var.stream-name}"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
data "aws_caller_identity" "this" {}
|
||||
@@ -0,0 +1,7 @@
|
||||
output firehose-iam-role-arn {
|
||||
value = aws_iam_role.firehose-stream-iam-role.arn
|
||||
}
|
||||
|
||||
output cloudwatchstream-iam-role-arn {
|
||||
value = aws_iam_role.cwlog-stream-role.arn
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
variable "stream-name" {
|
||||
type = string
|
||||
description = "Name of Kinesis Data Firehose delivery stream"
|
||||
}
|
||||
|
||||
variable "firehose-kmskey-arn" {
|
||||
type = string
|
||||
description = "KMS Key arn for Firehose"
|
||||
}
|
||||
|
||||
variable "dest-bucket-arn" {
|
||||
type = string
|
||||
description = "Destination S3 bucket ARN"
|
||||
}
|
||||
|
||||
variable "dest-bucket-prefix" {
|
||||
type = string
|
||||
description = "S3 object prefix for this stream. Please do not start with / end with a /. For example, r53-log/acme.local/"
|
||||
}
|
||||
|
||||
variable "dest-bucket-kmskey-arn" {
|
||||
type = string
|
||||
description = "KMS key ARN for destination bucket"
|
||||
}
|
||||
|
||||
variable "source-cwlgroup-name" {
|
||||
type = string
|
||||
description = "Name of source CloudwatchLog group"
|
||||
}
|
||||
|
||||
variable "cwl-region" {
|
||||
type = string
|
||||
description = "AWS region where Cloudwatch LogGroup resides. Needed for setting up cwlog-stream-role"
|
||||
}
|
||||
|
||||
variable "enable-firehose-errorlog" {
|
||||
type = bool
|
||||
description = "Enable firehose errorlog"
|
||||
default = false
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "alb-arns" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "alb"
|
||||
}
|
||||
|
||||
module "alb-monitoring" {
|
||||
for_each = toset(split(" ", data.external.alb-arns.result.result))
|
||||
source = "../../modules/ManagementGovernance/Monitoring.ALB"
|
||||
default-tags = local.default-tags
|
||||
load-balancer = each.value
|
||||
threshold-HealthHostCountMin = 1
|
||||
}
|
||||
|
||||
```
|
||||
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
eval "$(jq -r '@sh "lb=\(.lb)"')"
|
||||
|
||||
RESULTS=$(aws elbv2 describe-target-groups --load-balancer-arn $lb --query TargetGroups[*].TargetGroupArn --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
|
||||
jq -n --arg result "$RESULTS" '{"result":$result}'
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
locals {
|
||||
alb-name = "app/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}"
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "alb-HTTPCode_ELB_5XX_Count" {
|
||||
alarm_name = "${var.settings.HTTPCode_ELB_5XX_Count.ecccode}-ALB_${local.alb-name}-HTTPCode_ELB_5XX_Count"
|
||||
comparison_operator = var.settings.HTTPCode_ELB_5XX_Count.comparison_operator
|
||||
evaluation_periods = var.settings.HTTPCode_ELB_5XX_Count.evaluation_periods
|
||||
metric_name = "HTTPCode_ELB_5XX_Count"
|
||||
period = var.settings.HTTPCode_ELB_5XX_Count.period
|
||||
statistic = var.settings.HTTPCode_ELB_5XX_Count.statistic
|
||||
threshold = var.settings.HTTPCode_ELB_5XX_Count.threshold
|
||||
alarm_description = "ALB:HTTPCode_ELB_5XX_Count"
|
||||
namespace = "AWS/ApplicationELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.HTTPCode_ELB_5XX_Count.action]
|
||||
ok_actions = [var.settings.HTTPCode_ELB_5XX_Count.action]
|
||||
dimensions = {
|
||||
LoadBalancer = local.alb-name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "alb-TargetConnectionErrorCount" {
|
||||
alarm_name = "${var.settings.TargetConnectionErrorCount.ecccode}-ALB_${local.alb-name}-TargetConnectionErrorCount"
|
||||
comparison_operator = var.settings.TargetConnectionErrorCount.comparison_operator
|
||||
evaluation_periods = var.settings.TargetConnectionErrorCount.evaluation_periods
|
||||
metric_name = "TargetConnectionErrorCount"
|
||||
period = var.settings.TargetConnectionErrorCount.period
|
||||
statistic = var.settings.TargetConnectionErrorCount.statistic
|
||||
threshold = var.settings.TargetConnectionErrorCount.threshold
|
||||
alarm_description = "ALB:TargetConnectionErrorCount"
|
||||
namespace = "AWS/ApplicationELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.TargetConnectionErrorCount.action]
|
||||
ok_actions = [var.settings.TargetConnectionErrorCount.action]
|
||||
dimensions = {
|
||||
LoadBalancer = local.alb-name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "alb-TargetResponseTime" {
|
||||
alarm_name = "${var.settings.TargetResponseTime.ecccode}-ALB_${local.alb-name}-TargetResponseTime"
|
||||
comparison_operator = var.settings.TargetResponseTime.comparison_operator
|
||||
evaluation_periods = var.settings.TargetResponseTime.evaluation_periods
|
||||
metric_name = "TargetResponseTime"
|
||||
period = var.settings.TargetResponseTime.period
|
||||
statistic = var.settings.TargetResponseTime.statistic
|
||||
threshold = var.settings.TargetResponseTime.threshold
|
||||
alarm_description = "ALB:TargetResponseTime"
|
||||
namespace = "AWS/ApplicationELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.TargetResponseTime.action]
|
||||
ok_actions = [var.settings.TargetResponseTime.action]
|
||||
dimensions = {
|
||||
LoadBalancer = local.alb-name
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
module "alb-targetgroups" {
|
||||
source = "../../util/resource-list"
|
||||
resource-type = "alb-targetgroups"
|
||||
query-input = var.load-balancer
|
||||
asrolearn = var.asrolearn
|
||||
}
|
||||
*/
|
||||
// causes Rate exceeded error, maybe because of adaptive AWS_RETRY_MODE?
|
||||
|
||||
/*
|
||||
module "alb_tgs" {
|
||||
assume_role_arn = var.asrolearn
|
||||
role_session_name = "terraform-resource-list"
|
||||
source = "../../util/terraform-aws-cli"
|
||||
aws_cli_commands = ["elbv2", "describe-target-groups", "--load-balancer-arn", var.load-balancer]
|
||||
aws_cli_query = "TargetGroups[*].TargetGroupArn"
|
||||
}
|
||||
*/
|
||||
|
||||
module alb_tgs {
|
||||
source = "../../util/awscli"
|
||||
access_key = var.target-account-ak
|
||||
aws_cli_commands = "elbv2 describe-target-groups --load-balancer-arn ${var.load-balancer} --query TargetGroups[*].TargetGroupArn"
|
||||
secret_key = var.target-account-sk
|
||||
session_token = var.target-account-token
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "alb-HealthyHostCount" {
|
||||
# for_each = module.alb-targetgroups.result-set
|
||||
for_each = toset(module.alb_tgs.awscliout)
|
||||
alarm_name = "${var.settings.HealthHostCountMin.ecccode}-ALBTG_:${split(":", each.value)[5]}-HealthyHostCount"
|
||||
comparison_operator = var.settings.HealthHostCountMin.comparison_operator
|
||||
evaluation_periods = var.settings.HealthHostCountMin.evaluation_periods
|
||||
metric_name = "HealthyHostCount"
|
||||
period = var.settings.HealthHostCountMin.period
|
||||
statistic = var.settings.HealthHostCountMin.statistic
|
||||
threshold = var.settings.HealthHostCountMin.threshold
|
||||
alarm_description = "ALBTG:HealthyHostCount"
|
||||
namespace = "AWS/ApplicationELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.HealthHostCountMin.action]
|
||||
ok_actions = [var.settings.HealthHostCountMin.action]
|
||||
dimensions = {
|
||||
TargetGroup = split(":", each.value)[5]
|
||||
LoadBalancer = "app/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
output alb-tg-count {
|
||||
# value = length(module.alb-targetgroups.result-set)
|
||||
value = length(flatten(module.alb_tgs.awscliout))
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable load-balancer {}
|
||||
variable settings {}
|
||||
# variable asrolearn {}
|
||||
variable target-account-ak {}
|
||||
variable target-account-sk {}
|
||||
variable target-account-token {}
|
||||
@@ -0,0 +1,24 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "asg" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "asg"
|
||||
}
|
||||
|
||||
module "asg-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.asg.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.ASG"
|
||||
default-tags = local.default-tags
|
||||
asg-name = each.value
|
||||
threshold-CPUUtilization = 90
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,41 @@
|
||||
data "aws_autoscaling_group" "asg" {
|
||||
name = var.asg-name
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "asg-CPUUtilization" {
|
||||
alarm_name = "${var.settings.CPUUtilization.ecccode}-ASG_${var.asg-name}-CPUUtilization"
|
||||
comparison_operator = var.settings.CPUUtilization.comparison_operator
|
||||
evaluation_periods = var.settings.CPUUtilization.evaluation_periods
|
||||
metric_name = "CPUUtilization"
|
||||
period = var.settings.CPUUtilization.period
|
||||
statistic = var.settings.CPUUtilization.statistic
|
||||
threshold = var.settings.CPUUtilization.threshold
|
||||
alarm_description = "ASG:CPUUtilization"
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.CPUUtilization.action]
|
||||
ok_actions = [var.settings.CPUUtilization.action]
|
||||
dimensions = {
|
||||
AutoScalingGroupName =var.asg-name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "asg-GroupInServiceCapacity" {
|
||||
alarm_name = "${var.settings.GroupInServiceCapacity.ecccode}-ASG_${var.asg-name}-GroupInServiceCapacity"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = var.settings.GroupInServiceCapacity.evaluation_periods
|
||||
metric_name = "GroupInServiceCapacity"
|
||||
period = var.settings.GroupInServiceCapacity.period
|
||||
statistic = "Minimum"
|
||||
threshold = data.aws_autoscaling_group.asg.min_size
|
||||
alarm_description = "ASG:GroupInServiceCapacity"
|
||||
namespace = "AWS/AutoScaling"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.GroupInServiceCapacity.action]
|
||||
ok_actions = [var.settings.GroupInServiceCapacity.action]
|
||||
dimensions = {
|
||||
AutoScalingGroupName = var.asg-name
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable asg-name {}
|
||||
variable settings {}
|
||||
variable ecccode {}
|
||||
@@ -0,0 +1,74 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "ec2-instances" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "ec2"
|
||||
}
|
||||
|
||||
module "ec2-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.ec2-instances.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.EC2"
|
||||
default-tags = local.default-tags
|
||||
ec2-instance-id = each.value
|
||||
threshold-CPUUtilization = 90
|
||||
threshold-mem_free = 100000
|
||||
threshold-swap_free = 100000
|
||||
threshold-disk_free = 1 * 1000 * 1000 * 1000
|
||||
threshold-disk_inodes_free = 10000
|
||||
threshold-processes_total = 500
|
||||
threshold-LogicalDiskFreePct = 10
|
||||
threshold-MemoryCommittedPct = 90
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
|
||||
## Sample cloudwatch alarm email notification
|
||||
```
|
||||
Subject: ALARM: "TestAlarmPleaseIgnore" in Asia Pacific (Hong Kong)
|
||||
|
||||
You are receiving this email because your Amazon CloudWatch Alarm "TestAlarmPleaseIgnore" in the
|
||||
Asia Pacific (Hong Kong) region has entered the ALARM state, because "Threshold Crossed: 1 out of
|
||||
the last 1 datapoints [864.0 (24/01/24 00:56:00)] was less than or equal to the threshold (900.0)
|
||||
(minimum 1 datapoint for OK -> ALARM transition)." at "Wednesday 24 January, 2024 01:01:34 UTC".
|
||||
|
||||
View this alarm in the AWS Management Console:
|
||||
https://ap-east-1.console.aws.amazon.com%2Fcloudwatch...
|
||||
|
||||
Alarm Details:
|
||||
- Name: TestAlarmPleaseIgnore
|
||||
- Description: Cloudwatch alarm for the following resource
|
||||
- Instance ID: xxx
|
||||
- Instance Name: yyy
|
||||
- Instance IP: zz.zz.zz.zz
|
||||
- State Change: OK -> ALARM
|
||||
- Reason for State Change: Threshold Crossed: 1 out of the last 1 datapoints [864.0 (24/01/24 00:56:00)] was less than or equal to the threshold (900.0) (minimum 1 datapoint for OK -> ALARM transition).
|
||||
- Timestamp: Wednesday 24 January, 2024 01:01:34 UTC
|
||||
- AWS Account: 111122223333
|
||||
- Alarm Arn: arn:aws:cloudwatch:ap-east-1:111122223333:alarm:TestAlarmPleaseIgnore
|
||||
|
||||
Threshold:
|
||||
- The alarm is in the ALARM state when the metric is LessThanOrEqualToThreshold 900.0 for at least 1 of the last 1 period(s) of 300 seconds.
|
||||
|
||||
Monitored Metric:
|
||||
- MetricNamespace: AWS/EC2
|
||||
- MetricName: CPUCreditBalance
|
||||
- Dimensions: [InstanceId = i-050d4adeafaa53cd0]
|
||||
- Period: 300 seconds
|
||||
- Statistic: Average
|
||||
- Unit: not specified
|
||||
- TreatMissingData: missing
|
||||
|
||||
|
||||
State Change Actions:
|
||||
- OK:
|
||||
- ALARM: [arn:aws:sns:ap-east-1:111122223333:CWA-SNS-Email-KenFong]
|
||||
- INSUFFICIENT_DATA:
|
||||
```
|
||||
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
eval "$(jq -r '@sh "export id=\(.input) asrolearn=\(.asrolearn)"')"
|
||||
eval $(aws sts assume-role --role-arn $asrolearn --role-session-name awscli | jq -cr '"export AWS_ACCESS_KEY_ID=" + .Credentials.AccessKeyId, "export AWS_SECRET_ACCESS_KEY=" + .Credentials.SecretAccessKey, "export AWS_SESSION_TOKEN=" + .Credentials.SessionToken, "export AWS_SESSION_EXPIRATION=" + .Credentials.Expiration')
|
||||
|
||||
aws cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free \
|
||||
--dimensions Name=InstanceId,Value=$id Name=path,Value=/ | \
|
||||
jq '.Metrics[] | .Dimensions[] | select ((.Name=="device") or (.Name=="fstype")) | { (.Name): (.Value)}' | \
|
||||
jq -s 'add // {"device":"unknown", "fstype":"unknown"}'
|
||||
|
||||
|
||||
exit 0
|
||||
|
||||
DEVICE=$(aws cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free \
|
||||
--dimensions Name=InstanceId,Value=$id Name=path,Value=/ \
|
||||
--query 'Metrics[].Dimensions[?Name==`device`].Value' --output text)
|
||||
|
||||
FSTYPE=$(aws cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free \
|
||||
--dimensions Name=InstanceId,Value=$id Name=path,Value=/ \
|
||||
--query 'Metrics[].Dimensions[?Name==`fstype`].Value' --output text)
|
||||
|
||||
jq -n --arg device "$DEVICE" --arg fstype "$FSTYPE" '{"device":$device,"fstype":$fstype}'
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
# Get the query
|
||||
TERRAFORM_QUERY=$(jq -Mc .)
|
||||
|
||||
# Extract the query attributes
|
||||
access_key=$(echo "${TERRAFORM_QUERY}" | jq -r '.access_key')
|
||||
secret_key=$(echo "${TERRAFORM_QUERY}" | jq -r '.secret_key')
|
||||
session_token=$(echo "${TERRAFORM_QUERY}" | jq -r '.session_token')
|
||||
iid=$(echo "${TERRAFORM_QUERY}" | jq -r '.iid')
|
||||
|
||||
# eval "$(jq -r '@sh "export id=\(.input) asrolearn=\(.asrolearn)"')"
|
||||
# eval $(aws sts assume-role --role-arn $asrolearn --role-session-name awscli | jq -cr '"export AWS_ACCESS_KEY_ID=" + .Credentials.AccessKeyId, "export AWS_SECRET_ACCESS_KEY=" + .Credentials.SecretAccessKey, "export AWS_SESSION_TOKEN=" + .Credentials.SessionToken, "export AWS_SESSION_EXPIRATION=" + .Credentials.Expiration')
|
||||
|
||||
export AWS_ACCESS_KEY_ID=$access_key
|
||||
export AWS_SECRET_ACCESS_KEY=$secret_key
|
||||
export AWS_SESSION_TOKEN=$session_token
|
||||
|
||||
#aws cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free \
|
||||
#--dimensions Name=InstanceId,Value=$iid Name=path,Value=/ | \
|
||||
#jq '.Metrics[] | .Dimensions[] | {(.Name):(.Value)}' | jq -s 'add'
|
||||
|
||||
# when there are multiple metrics with the same name...
|
||||
aws cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free \
|
||||
--dimensions Name=InstanceId,Value=$iid Name=path,Value=/ --query Metrics[] | \
|
||||
jq '. | last | .Dimensions[] | {(.Name):(.Value)}' | jq -s 'add'
|
||||
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
eval "$(jq -r '@sh "export id=\(.input) asrolearn=\(.asrolearn)"')"
|
||||
eval $(aws sts assume-role --role-arn $asrolearn --role-session-name awscli | jq -cr '"export AWS_ACCESS_KEY_ID=" + .Credentials.AccessKeyId, "export AWS_SECRET_ACCESS_KEY=" + .Credentials.SecretAccessKey, "export AWS_SESSION_TOKEN=" + .Credentials.SessionToken, "export AWS_SESSION_EXPIRATION=" + .Credentials.Expiration')
|
||||
|
||||
EC2OS=$(aws ec2 describe-instances --instance-ids $id | jq -r '.Reservations[].Instances[].PlatformDetails')
|
||||
|
||||
if [ $EC2OS == "Windows" ]; then
|
||||
echo '{"os": "Windows"}'
|
||||
else
|
||||
echo '{"os": "Linux"}'
|
||||
fi
|
||||
|
||||
@@ -0,0 +1,395 @@
|
||||
locals {
|
||||
# alarm-message limited to 1024 characters
|
||||
alarm-message = <<EOF
|
||||
Cloudwatch alarm for the following resource
|
||||
- Instance ID: ${var.ec2-instance-id}
|
||||
- Instance Name: ${data.aws_instance.ec2-instance.tags["Name"]}
|
||||
- Instance IP: ${data.aws_instance.ec2-instance.private_ip}
|
||||
- Instance Type: ${data.aws_instance.ec2-instance.instance_type}
|
||||
EOF
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
|
||||
alarm_name = "${var.settings.StatusCheckFailed_System.ecccode}-EC2_${var.ec2-instance-id}-StatusCheckFailed_System"
|
||||
comparison_operator = var.settings.StatusCheckFailed_System.comparison_operator
|
||||
evaluation_periods = var.settings.StatusCheckFailed_System.evaluation_periods
|
||||
metric_name = "StatusCheckFailed_System"
|
||||
period = var.settings.StatusCheckFailed_System.period
|
||||
statistic = var.settings.StatusCheckFailed_System.statistic
|
||||
threshold = var.settings.StatusCheckFailed_System.threshold
|
||||
# alarm_description = "EC2:StatusCheckFailed_System"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.StatusCheckFailed_System.action]
|
||||
ok_actions = [var.settings.StatusCheckFailed_System.action]
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
|
||||
alarm_name = "${var.settings.StatusCheckFailed_Instance.ecccode}-EC2_${var.ec2-instance-id}-StatusCheckFailed_Instance"
|
||||
comparison_operator = var.settings.StatusCheckFailed_Instance.comparison_operator
|
||||
evaluation_periods = var.settings.StatusCheckFailed_Instance.evaluation_periods
|
||||
metric_name = "StatusCheckFailed_Instance"
|
||||
period = var.settings.StatusCheckFailed_Instance.period
|
||||
statistic = var.settings.StatusCheckFailed_Instance.statistic
|
||||
threshold = var.settings.StatusCheckFailed_Instance.threshold
|
||||
# alarm_description = "EC2:StatusCheckFailed_Instance"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.StatusCheckFailed_Instance.action]
|
||||
ok_actions = [var.settings.StatusCheckFailed_Instance.action]
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
|
||||
alarm_name = "${var.settings.CPUUtilization.ecccode}-EC2_${var.ec2-instance-id}-CPUUtilization"
|
||||
comparison_operator = var.settings.CPUUtilization.comparison_operator
|
||||
evaluation_periods = var.settings.CPUUtilization.evaluation_periods
|
||||
metric_name = "CPUUtilization"
|
||||
period = var.settings.CPUUtilization.period
|
||||
statistic = var.settings.CPUUtilization.statistic
|
||||
threshold = var.settings.CPUUtilization.threshold
|
||||
# alarm_description = "EC2:CPUUtilization"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.CPUUtilization.action]
|
||||
ok_actions = [var.settings.CPUUtilization.action]
|
||||
treat_missing_data = "notBreaching"
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
}
|
||||
}
|
||||
|
||||
# cwagent metrics
|
||||
data "aws_instance" "ec2-instance" {
|
||||
instance_id = var.ec2-instance-id
|
||||
}
|
||||
|
||||
# put instance name or ip in alarm name
|
||||
locals {
|
||||
instance-ip = data.aws_instance.ec2-instance.private_ip
|
||||
instance-name = data.aws_instance.ec2-instance.tags["Name"]
|
||||
}
|
||||
|
||||
module "ec2_os" {
|
||||
source = "../../util/awscli"
|
||||
access_key = var.target-account-ak
|
||||
aws_cli_commands = "ec2 describe-instances --instance-ids ${var.ec2-instance-id} --query Reservations[].Instances[].PlatformDetails"
|
||||
secret_key = var.target-account-sk
|
||||
session_token = var.target-account-token
|
||||
}
|
||||
|
||||
# Linux specific checks
|
||||
# default cw agent uses mem_used_percent metric
|
||||
|
||||
# detect presense of cloudwatch agent
|
||||
module "detect_cloudwatch_agent" {
|
||||
source = "../../util/awscli"
|
||||
access_key = var.target-account-ak
|
||||
secret_key = var.target-account-sk
|
||||
session_token = var.target-account-token
|
||||
aws_cli_commands = "cloudwatch list-metrics --namespace CWAgent --dimensions Name=InstanceId,Value=${var.ec2-instance-id} --query Metrics[].MetricName --max-items 1"
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-mem_used_percent" {
|
||||
count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
|
||||
alarm_name = "${var.settings.mem_used_percent.ecccode}-EC2_${var.ec2-instance-id}-mem_used_percent"
|
||||
comparison_operator = var.settings.mem_used_percent.comparison_operator
|
||||
evaluation_periods = var.settings.mem_used_percent.evaluation_periods
|
||||
metric_name = "mem_used_percent"
|
||||
period = var.settings.mem_used_percent.period
|
||||
statistic = var.settings.mem_used_percent.statistic
|
||||
threshold = var.settings.mem_used_percent.threshold
|
||||
# alarm_description = "EC2:mem_used_percent"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.mem_used_percent.action]
|
||||
ok_actions = [var.settings.mem_used_percent.action]
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
}
|
||||
}
|
||||
|
||||
data "external" "cw-dimensions" {
|
||||
program = ["bash", "${path.module}/get-cwagent-dimensions.sh"]
|
||||
query = {
|
||||
iid = var.ec2-instance-id
|
||||
access_key = var.target-account-ak
|
||||
secret_key = var.target-account-sk
|
||||
session_token = var.target-account-token
|
||||
}
|
||||
}
|
||||
|
||||
/* module returns blank
|
||||
module "cw-dimensions" {
|
||||
source = "../../util/awscli"
|
||||
access_key = var.target-account-ak
|
||||
aws_cli_commands = "cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free --dimensions Name=InstanceId,Value=${var.ec2-instance-id} Name=path,Value=/ --query Metrics[].Dimensions[] | jq '.[] | {(.Name):(.Value)}' | jq -s 'add'"
|
||||
secret_key = var.target-account-sk
|
||||
session_token = var.target-account-token
|
||||
}
|
||||
*/
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-swap_used_percent" {
|
||||
count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
|
||||
alarm_name = "${var.settings.swap_used_percent.ecccode}-EC2_${var.ec2-instance-id}-swap_used_percent"
|
||||
comparison_operator = var.settings.swap_used_percent.comparison_operator
|
||||
evaluation_periods = var.settings.swap_used_percent.evaluation_periods
|
||||
metric_name = "swap_used_percent"
|
||||
period = var.settings.swap_used_percent.period
|
||||
statistic = var.settings.swap_used_percent.statistic
|
||||
threshold = var.settings.swap_used_percent.threshold
|
||||
# alarm_description = "EC2:swap_used_percent"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.swap_used_percent.action]
|
||||
ok_actions = [var.settings.swap_used_percent.action]
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent_warn" {
|
||||
count = module.ec2_os.awscliout[0] != "Windows" && data.external.cw-dimensions.result != null ? 1 : 0
|
||||
alarm_name = "${var.settings.disk_used_percent_warn.ecccode}-EC2_${var.ec2-instance-id}-disk_used_percent"
|
||||
comparison_operator = var.settings.disk_used_percent_warn.comparison_operator
|
||||
evaluation_periods = var.settings.disk_used_percent_warn.evaluation_periods
|
||||
metric_name = "disk_used_percent"
|
||||
period = var.settings.disk_used_percent_warn.period
|
||||
statistic = var.settings.disk_used_percent_warn.statistic
|
||||
threshold = var.settings.disk_used_percent_warn.threshold
|
||||
# alarm_description = "EC2:disk_used_percent"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.disk_used_percent_warn.action]
|
||||
ok_actions = [var.settings.disk_used_percent_warn.action]
|
||||
dimensions = data.external.cw-dimensions.result
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent_crit" {
|
||||
count = module.ec2_os.awscliout[0] != "Windows" && data.external.cw-dimensions.result != null ? 1 : 0
|
||||
alarm_name = "${var.settings.disk_used_percent_crit.ecccode}-EC2_${var.ec2-instance-id}-disk_used_percent"
|
||||
comparison_operator = var.settings.disk_used_percent_crit.comparison_operator
|
||||
evaluation_periods = var.settings.disk_used_percent_crit.evaluation_periods
|
||||
metric_name = "disk_used_percent"
|
||||
period = var.settings.disk_used_percent_crit.period
|
||||
statistic = var.settings.disk_used_percent_crit.statistic
|
||||
threshold = var.settings.disk_used_percent_crit.threshold
|
||||
# alarm_description = "EC2:disk_used_percent"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.disk_used_percent_crit.action]
|
||||
ok_actions = [var.settings.disk_used_percent_crit.action]
|
||||
dimensions = data.external.cw-dimensions.result
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" {
|
||||
count = module.ec2_os.awscliout[0] != "Windows" && data.external.cw-dimensions.result != null ? 1 : 0
|
||||
alarm_name = "${var.settings.disk_inodes_free.ecccode}-EC2_${var.ec2-instance-id}-disk_inodes_free"
|
||||
comparison_operator = var.settings.disk_inodes_free.comparison_operator
|
||||
evaluation_periods = var.settings.disk_inodes_free.evaluation_periods
|
||||
metric_name = "disk_inodes_free"
|
||||
period = var.settings.disk_inodes_free.period
|
||||
statistic = var.settings.disk_inodes_free.statistic
|
||||
threshold = var.settings.disk_inodes_free.threshold
|
||||
# alarm_description = "EC2:disk_inodes_free"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.disk_inodes_free.action]
|
||||
ok_actions = [var.settings.disk_inodes_free.action]
|
||||
dimensions = data.external.cw-dimensions.result
|
||||
}
|
||||
|
||||
# process metric not published by default cw agent config
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" {
|
||||
count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
|
||||
alarm_name = "${var.settings.processes_total.ecccode}-EC2_${var.ec2-instance-id}-processes_total"
|
||||
comparison_operator = var.settings.processes_total.comparison_operator
|
||||
evaluation_periods = var.settings.processes_total.evaluation_periods
|
||||
metric_name = "processes_total"
|
||||
period = var.settings.processes_total.period
|
||||
statistic = var.settings.processes_total.statistic
|
||||
threshold = var.settings.processes_total.threshold
|
||||
# alarm_description = "EC2:processes_total"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.processes_total.action]
|
||||
ok_actions = [var.settings.processes_total.action]
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-net_err" {
|
||||
count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
|
||||
alarm_name = "${var.settings.net_err_in.ecccode}-EC2_${var.ec2-instance-id}-net_err"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = var.settings.net_err_in.evaluation_periods
|
||||
threshold = 0
|
||||
# alarm_description = "EC2:net_err_in or EC2:net_err_out exceeds threshold"
|
||||
alarm_description = local.alarm-message
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = false
|
||||
alarm_actions = [var.settings.net_err_in.action]
|
||||
ok_actions = [var.settings.net_err_in.action]
|
||||
treat_missing_data = "notBreaching"
|
||||
|
||||
metric_query {
|
||||
id = "e1"
|
||||
expression = "IF(m1 > ${var.settings.net_err_in.threshold} OR m2 > ${var.settings.net_err_out.threshold}, 1, 0)"
|
||||
label = "net_err_exceeds_threshold"
|
||||
return_data = "true"
|
||||
}
|
||||
|
||||
metric_query {
|
||||
id = "m1"
|
||||
metric {
|
||||
metric_name = "net_err_in"
|
||||
namespace = "CWAgent"
|
||||
period = var.settings.net_err_in.period
|
||||
stat = var.settings.net_err_in.statistic
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
interface = "eth0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metric_query {
|
||||
id = "m2"
|
||||
metric {
|
||||
metric_name = "net_err_out"
|
||||
namespace = "CWAgent"
|
||||
period = var.settings.net_err_out.period
|
||||
stat = var.settings.net_err_out.statistic
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
interface = "eth0"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-NetworkIn" {
|
||||
count = try(var.settings.NetworkIn.monitor, false) ? 1 : 0
|
||||
alarm_name = "${var.settings.NetworkIn.ecccode}-EC2_${var.ec2-instance-id}-NetworkIn"
|
||||
comparison_operator = var.settings.NetworkIn.comparison_operator
|
||||
evaluation_periods = var.settings.NetworkIn.evaluation_periods
|
||||
metric_name = "NetworkIn"
|
||||
period = var.settings.NetworkIn.period
|
||||
statistic = var.settings.NetworkIn.statistic
|
||||
threshold = var.settings.NetworkIn.threshold
|
||||
# alarm_description = "EC2:NetworkIn"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.NetworkIn.action]
|
||||
ok_actions = [var.settings.NetworkIn.action]
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-NetworkOut" {
|
||||
count = try(var.settings.NetworkIn.monitor, false) ? 1 : 0
|
||||
alarm_name = "${var.settings.NetworkOut.ecccode}-EC2_${var.ec2-instance-id}-NetworkOut"
|
||||
comparison_operator = var.settings.NetworkOut.comparison_operator
|
||||
evaluation_periods = var.settings.NetworkOut.evaluation_periods
|
||||
metric_name = "NetworkOut"
|
||||
period = var.settings.NetworkOut.period
|
||||
statistic = var.settings.NetworkOut.statistic
|
||||
threshold = var.settings.NetworkOut.threshold
|
||||
# alarm_description = "EC2:NetworkOut"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.NetworkOut.action]
|
||||
ok_actions = [var.settings.NetworkOut.action]
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
}
|
||||
}
|
||||
|
||||
# Windows specific checks
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" {
|
||||
count = module.ec2_os.awscliout[0] == "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
|
||||
alarm_name = "${var.settings.MemoryCommittedPct.ecccode}-EC2_${var.ec2-instance-id}-MemoryCommittedPct"
|
||||
comparison_operator = var.settings.MemoryCommittedPct.comparison_operator
|
||||
evaluation_periods = var.settings.MemoryCommittedPct.evaluation_periods
|
||||
metric_name = "Memory % Committed Bytes In Use"
|
||||
period = var.settings.MemoryCommittedPct.period
|
||||
statistic = var.settings.MemoryCommittedPct.statistic
|
||||
threshold = var.settings.MemoryCommittedPct.threshold
|
||||
# alarm_description = "EC2:MemoryCommittedBytes"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.MemoryCommittedPct.action]
|
||||
ok_actions = [var.settings.MemoryCommittedPct.action]
|
||||
dimensions = {
|
||||
objectname = "Memory"
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-LogicalDiskFreePct" {
|
||||
count = module.ec2_os.awscliout[0] == "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
|
||||
alarm_name = "${var.settings.LogicalDiskFreePct.ecccode}-EC2_${var.ec2-instance-id}-LogicalDiskFreePct"
|
||||
comparison_operator = var.settings.LogicalDiskFreePct.comparison_operator
|
||||
evaluation_periods = var.settings.LogicalDiskFreePct.evaluation_periods
|
||||
metric_name = "LogicalDisk % Free Space"
|
||||
period = var.settings.LogicalDiskFreePct.period
|
||||
statistic = var.settings.LogicalDiskFreePct.statistic
|
||||
threshold = var.settings.LogicalDiskFreePct.threshold
|
||||
# alarm_description = "EC2:OsDiskFreePct"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.LogicalDiskFreePct.action]
|
||||
ok_actions = [var.settings.LogicalDiskFreePct.action]
|
||||
dimensions = {
|
||||
instance = "C:"
|
||||
objectname = "LogicalDisk"
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
variable "cw-alarm-prefix" {}
|
||||
variable "actions-enabled" {}
|
||||
variable "ec2-instance-id" {}
|
||||
variable "settings" {}
|
||||
# variable asrolearn {}
|
||||
variable target-account-ak {}
|
||||
variable target-account-sk {}
|
||||
variable target-account-token {}
|
||||
@@ -0,0 +1,27 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
Unlike other monitoring modules which discovers resources details automatically, EKS pod name need to be supplied to this module.
|
||||
AWS cli does not provide pod information.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
data "aws_eks_clusters" "eks-clusters" {}
|
||||
|
||||
module "eks-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = data.aws_eks_clusters.eks-clusters.names
|
||||
source = "../../modules/ManagementGovernance/Monitoring.EKS"
|
||||
default-tags = local.default-tags
|
||||
cluster-name = each.value
|
||||
eks-namespace = "default"
|
||||
pod-names = ["depl-nginx", "depl-alpine"]
|
||||
threshold-pod_cpu_utilization = 85
|
||||
threshold-pod_memory_utilization = 85
|
||||
threshold-pod_number_of_container_restarts = 5
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = local.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,69 @@
|
||||
// The following checks requires container insights
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "eks-pod_cpu_utilization" {
|
||||
for_each = toset(var.pod-names)
|
||||
alarm_name = "${each.value["ecccode"]}:${var.cw-alarm-prefix}:EKS:${var.cluster-name}:${each.value}:${var.settings.alarm1.metric}"
|
||||
comparison_operator = var.settings.alarm1.comparison_operator
|
||||
evaluation_periods = var.settings.alarm1.evaluation_periods
|
||||
metric_name = var.settings.alarm1.metric
|
||||
period = var.settings.alarm1.period
|
||||
statistic = var.settings.alarm1.statistic
|
||||
threshold = var.settings.alarm1.threshold
|
||||
alarm_description = "EKS:${var.settings.alarm1.metric}"
|
||||
namespace = "ContainerInsights"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.alarm1.action]
|
||||
ok_actions = [var.settings.alarm1.action]
|
||||
dimensions = {
|
||||
"PodName" = each.value
|
||||
"ClusterName" = var.cluster-name
|
||||
"Namespace" = var.eks-namespace
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "eks-pod_memory_utilization" {
|
||||
for_each = toset(var.pod-names)
|
||||
|
||||
alarm_name = "${each.value["ecccode"]}:${var.cw-alarm-prefix}:EKS:${var.cluster-name}:${each.value}:${var.settings.alarm2.metric}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = var.settings.alarm2.metric
|
||||
period = var.settings.alarm2.period
|
||||
statistic = var.settings.alarm2.statistic
|
||||
threshold = var.settings.alarm2.threshold
|
||||
alarm_description = "EKS:${var.settings.alarm2.metric}"
|
||||
namespace = "ContainerInsights"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.alarm2.action]
|
||||
ok_actions = [var.settings.alarm2.action]
|
||||
dimensions = {
|
||||
"PodName" = each.value
|
||||
"ClusterName" = var.cluster-name
|
||||
"Namespace" = var.eks-namespace
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "eks-pod_number_of_container_restarts" {
|
||||
for_each = toset(var.pod-names)
|
||||
|
||||
alarm_name = "${each.value["ecccode"]}:${var.cw-alarm-prefix}:EKS:${var.cluster-name}:${each.value}:${var.settings.alarm3.metric}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = var.settings.alarm3.metric
|
||||
period = var.settings.alarm3.period
|
||||
statistic = var.settings.alarm3.statistic
|
||||
threshold = var.settings.alarm3.threshold
|
||||
alarm_description = "EKS:${var.settings.alarm3.metric}"
|
||||
namespace = "ContainerInsights"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.alarm3.action]
|
||||
ok_actions = [var.settings.alarm3.action]
|
||||
dimensions = {
|
||||
"PodName" = each.value
|
||||
"ClusterName" = var.cluster-name
|
||||
"Namespace" = var.eks-namespace
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable cluster-name {}
|
||||
variable eks-namespace {}
|
||||
variable pod-names {
|
||||
type = list
|
||||
}
|
||||
variable settings {}
|
||||
@@ -0,0 +1,25 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "emr-clusters" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "emr"
|
||||
}
|
||||
|
||||
module "emr-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.emr-clusters.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.EMR"
|
||||
default-tags = local.default-tags
|
||||
job-flow-id = split("/", each.value)[1]
|
||||
threshold-AppsPending = 2
|
||||
threshold-CapacityRemainingGB = 100
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,19 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "emr-alarms" {
|
||||
for_each = var.settings
|
||||
alarm_name = "${each.value["ecccode"]}-EMR_${var.job-flow-id}-${each.value["metric"]}"
|
||||
comparison_operator = each.value["comparison_operator"]
|
||||
evaluation_periods = each.value["evaluation_periods"]
|
||||
metric_name = each.value["metric"]
|
||||
period = each.value["period"]
|
||||
statistic = each.value["statistic"]
|
||||
threshold = each.value["threshold"]
|
||||
alarm_description = "EMR:${each.value["metric"]}"
|
||||
namespace = "AWS/ElasticMapReduce"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [each.value["action"]]
|
||||
ok_actions = [each.value["action"]]
|
||||
dimensions = {
|
||||
JobFlowId = var.job-flow-id
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable job-flow-id {}
|
||||
variable settings {}
|
||||
@@ -0,0 +1,5 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
@@ -0,0 +1,46 @@
|
||||
resource "aws_cloudwatch_event_rule" "EventRule" {
|
||||
name = "${var.cw-alarm-prefix}-health-events"
|
||||
description = "A CloudWatch Event Rule that triggers on changes in the status of AWS Personal Health Dashboard (AWS Health) and forwards the events to an SNS topic."
|
||||
state = var.actions-enabled
|
||||
event_pattern = <<PATTERN
|
||||
{
|
||||
"detail": {
|
||||
"service": ["DIRECTCONNECT", "VPN", "LAMBDA", "EC2", "RDS"]
|
||||
},
|
||||
"detail-type": [
|
||||
"AWS Health Event"
|
||||
],
|
||||
"source": [
|
||||
"aws.health"
|
||||
]
|
||||
}
|
||||
PATTERN
|
||||
lifecycle {
|
||||
ignore_changes = [tags["LastModified"]]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_event_target" "TargetForEventRule" {
|
||||
rule = aws_cloudwatch_event_rule.EventRule.name
|
||||
# target_id = "health-event-notification-sns"
|
||||
arn = var.settings.healthEvents.action
|
||||
input_transformer {
|
||||
input_paths = {
|
||||
"account" : "$.account",
|
||||
"endTime" : "$.detail.endTime",
|
||||
"message" : "$.detail.eventDescription[0].latestDescription",
|
||||
"resources" : "$.resources",
|
||||
"service" : "$.detail.service",
|
||||
"startTime" : "$.detail.startTime"
|
||||
}
|
||||
input_template = <<EOF
|
||||
"A maintenance has been scheduled for <service> on AWS account <account>."
|
||||
|
||||
"Resources: <resources>"
|
||||
"Start time: <startTime>"
|
||||
"End time: <endTime>"
|
||||
|
||||
"Detail: <message>"
|
||||
EOF
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable settings {}
|
||||
@@ -0,0 +1,24 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "kafka-clusters" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "kafka"
|
||||
}
|
||||
|
||||
module "kafka-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.kafka-clusters.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.Kafka"
|
||||
default-tags = local.default-tags
|
||||
cluster-name = each.value
|
||||
threshold-ZooKeeperRequestLatencyMsMean = 30
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,116 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "Kafka-ZooKeeperRequestLatencyMsMean" {
|
||||
alarm_name = "${var.settings.ZooKeeperRequestLatencyMsMean.ecccode}-Kafka_${var.cluster-name}-ZooKeeperRequestLatencyMsMean"
|
||||
comparison_operator = var.settings.ZooKeeperRequestLatencyMsMean.comparison_operator
|
||||
evaluation_periods = var.settings.ZooKeeperRequestLatencyMsMean.evaluation_periods
|
||||
metric_name = "ZooKeeperRequestLatencyMsMean"
|
||||
period = var.settings.ZooKeeperRequestLatencyMsMean.period
|
||||
statistic = var.settings.ZooKeeperRequestLatencyMsMean.statistic
|
||||
threshold = var.settings.ZooKeeperRequestLatencyMsMean.threshold
|
||||
alarm_description = "Kafka:ZooKeeperRequestLatencyMsMean"
|
||||
namespace = "AWS/Kafka"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.ZooKeeperRequestLatencyMsMean.action]
|
||||
ok_actions = [var.settings.ZooKeeperRequestLatencyMsMean.action]
|
||||
dimensions = {
|
||||
"Cluster Name" = var.cluster-name
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_msk_cluster" "msk-cluster" {
|
||||
cluster_name = var.cluster-name
|
||||
}
|
||||
|
||||
data "aws_msk_broker_nodes" "msk-broker" {
|
||||
cluster_arn = data.aws_msk_cluster.msk-cluster.arn
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "Kafka-CpuUserSystem" {
|
||||
for_each = toset([for i in data.aws_msk_broker_nodes.msk-broker.node_info_list[*].broker_id : tostring(i)])
|
||||
alarm_name = "${var.settings.CpuUserSystem.ecccode}-Kafka_${var.cluster-name}-${each.value}-CpuUsage"
|
||||
comparison_operator = var.settings.CpuUserSystem.comparison_operator
|
||||
evaluation_periods = var.settings.CpuUserSystem.evaluation_periods
|
||||
threshold = var.settings.CpuUserSystem.threshold
|
||||
alarm_description = "Kafka:ZooKeeperRequestLatencyMsMean"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.CpuUserSystem.action]
|
||||
ok_actions = [var.settings.CpuUserSystem.action]
|
||||
metric_query {
|
||||
id = "m1"
|
||||
metric {
|
||||
metric_name = "CpuUser"
|
||||
namespace = "AWS/Kafka"
|
||||
period = var.settings.CpuUserSystem.period
|
||||
stat = var.settings.CpuUserSystem.statistic
|
||||
dimensions = {
|
||||
"Cluster Name" = var.cluster-name
|
||||
"Broker ID" = each.value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metric_query {
|
||||
id = "m2"
|
||||
metric {
|
||||
metric_name = "CpuSystem"
|
||||
namespace = "AWS/Kafka"
|
||||
period = var.settings.CpuUserSystem.period
|
||||
stat = var.settings.CpuUserSystem.statistic
|
||||
dimensions = {
|
||||
"Cluster Name" = var.cluster-name
|
||||
"Broker ID" = each.value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metric_query {
|
||||
id = "e1"
|
||||
expression = "m1 + m2"
|
||||
label = "CpuUserSystem"
|
||||
return_data = "true"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "Kafka-KafkaDataLogsDiskUsed" {
|
||||
for_each = toset([for i in data.aws_msk_broker_nodes.msk-broker.node_info_list[*].broker_id : tostring(i)])
|
||||
alarm_name = "${var.settings.KafkaDataLogsDiskUsed.ecccode}-Kafka_${var.cluster-name}-${each.value}-KafkaDataLogsDiskUsed"
|
||||
comparison_operator = var.settings.KafkaDataLogsDiskUsed.comparison_operator
|
||||
evaluation_periods = var.settings.KafkaDataLogsDiskUsed.evaluation_periods
|
||||
metric_name = "KafkaDataLogsDiskUsed"
|
||||
period = var.settings.KafkaDataLogsDiskUsed.period
|
||||
statistic = var.settings.KafkaDataLogsDiskUsed.statistic
|
||||
threshold = var.settings.KafkaDataLogsDiskUsed.threshold
|
||||
alarm_description = "Kafka:KafkaDataLogsDiskUsed"
|
||||
namespace = "AWS/Kafka"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.KafkaDataLogsDiskUsed.action]
|
||||
ok_actions = [var.settings.KafkaDataLogsDiskUsed.action]
|
||||
dimensions = {
|
||||
"Cluster Name" = var.cluster-name
|
||||
"Broker ID" = each.value
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "Kafka-HeapMemoryAfterGC" {
|
||||
for_each = toset([for i in data.aws_msk_broker_nodes.msk-broker.node_info_list[*].broker_id : tostring(i)])
|
||||
alarm_name = "${var.settings.HeapMemoryAfterGC.ecccode}-Kafka_${var.cluster-name}-${each.value}-HeapMemoryAfterGC"
|
||||
comparison_operator = var.settings.HeapMemoryAfterGC.comparison_operator
|
||||
evaluation_periods = var.settings.HeapMemoryAfterGC.evaluation_periods
|
||||
metric_name = "HeapMemoryAfterGC"
|
||||
period = var.settings.HeapMemoryAfterGC.period
|
||||
statistic = var.settings.HeapMemoryAfterGC.statistic
|
||||
threshold = var.settings.HeapMemoryAfterGC.threshold
|
||||
alarm_description = "Kafka:HeapMemoryAfterGC"
|
||||
namespace = "AWS/Kafka"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.HeapMemoryAfterGC.action]
|
||||
ok_actions = [var.settings.HeapMemoryAfterGC.action]
|
||||
dimensions = {
|
||||
"Cluster Name" = var.cluster-name
|
||||
"Broker ID" = each.value
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable cluster-name {}
|
||||
variable settings {}
|
||||
@@ -0,0 +1,26 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "ngw" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "ngw"
|
||||
}
|
||||
|
||||
module "ngw-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.ngw.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.NGW"
|
||||
default-tags = local.default-tags
|
||||
job-flow-id = split("/", each.value)[1]
|
||||
threshold-ErrorPortAllocation = 2
|
||||
threshold-ConnectionEstablishedCount = 1000
|
||||
threshold-PacketsDropCount = 10
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,19 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "ngw-alarms" {
|
||||
for_each = var.settings
|
||||
alarm_name = "${each.value["ecccode"]}-NGW_${var.res-id}-${each.value["metric"]}"
|
||||
comparison_operator = each.value["comparison_operator"]
|
||||
evaluation_periods = each.value["evaluation_periods"]
|
||||
metric_name = each.value["metric"]
|
||||
period = each.value["period"]
|
||||
statistic = each.value["statistic"]
|
||||
threshold = each.value["threshold"]
|
||||
alarm_description = "NGW:${each.value["metric"]}"
|
||||
namespace = "AWS/NATGateway"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [each.value["action"]]
|
||||
ok_actions = [each.value["action"]]
|
||||
dimensions = {
|
||||
NatGatewayId = var.res-id
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable res-id {}
|
||||
variable settings {}
|
||||
@@ -0,0 +1,24 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "nlb-arns" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "nlb"
|
||||
}
|
||||
|
||||
module "nlb-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.nlb-arns.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.NLB"
|
||||
default-tags = local.default-tags
|
||||
load-balancer = each.value
|
||||
threshold-HealthHostCountMin = 1
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
data "external" "nlb-targetgroups" {
|
||||
program = ["bash", "${path.module}/list-nlb-targetgroups.sh"]
|
||||
query = {
|
||||
parameter = var.load-balancer
|
||||
}
|
||||
}
|
||||
*/
|
||||
locals {
|
||||
nlb-name = "net/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}"
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "nlb-TCP_Target_Reset_Count" {
|
||||
alarm_name = "${var.settings.TCP_Target_Reset_Count.ecccode}-NLB_${local.nlb-name}-TCP_Target_Reset_Count"
|
||||
comparison_operator = var.settings.TCP_Target_Reset_Count.comparison_operator
|
||||
evaluation_periods = var.settings.TCP_Target_Reset_Count.evaluation_periods
|
||||
metric_name = "TCP_Target_Reset_Count"
|
||||
period = var.settings.TCP_Target_Reset_Count.period
|
||||
statistic = var.settings.TCP_Target_Reset_Count.statistic
|
||||
threshold = var.settings.TCP_Target_Reset_Count.threshold
|
||||
alarm_description = "NLB:TCP_Target_Reset_Count"
|
||||
namespace = "AWS/NetworkELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.TCP_Target_Reset_Count.action]
|
||||
ok_actions = [var.settings.TCP_Target_Reset_Count.action]
|
||||
dimensions = {
|
||||
LoadBalancer = local.nlb-name
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
module "nlb-targetgroups" {
|
||||
source = "../../util/resource-list"
|
||||
resource-type = "nlb-targetgroups"
|
||||
query-input = var.load-balancer
|
||||
asrolearn = var.asrolearn
|
||||
}
|
||||
*/
|
||||
|
||||
// causes Rate exceeded error, maybe because of adaptive AWS_RETRY_MODE?
|
||||
|
||||
/*
|
||||
module "nlb_tgs" {
|
||||
assume_role_arn = var.asrolearn
|
||||
role_session_name = "terraform-resource-list"
|
||||
source = "../../util/terraform-aws-cli"
|
||||
aws_cli_commands = ["elbv2", "describe-target-groups", "--load-balancer-arn", var.load-balancer]
|
||||
aws_cli_query = "TargetGroups[*].TargetGroupArn"
|
||||
}
|
||||
*/
|
||||
|
||||
module nlb_tgs {
|
||||
source = "../../util/awscli"
|
||||
access_key = var.target-account-ak
|
||||
aws_cli_commands = "elbv2 describe-target-groups --load-balancer-arn ${var.load-balancer} --query TargetGroups[*].TargetGroupArn"
|
||||
secret_key = var.target-account-sk
|
||||
session_token = var.target-account-token
|
||||
}
|
||||
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "nlb-HealthyHostCount" {
|
||||
# for_each = module.nlb-targetgroups.result-set
|
||||
for_each = toset(module.nlb_tgs.awscliout)
|
||||
alarm_name = "${var.settings.HealthHostCountMin.ecccode}-NLBTG_${split(":", each.value)[5]}-HealthyHostCount"
|
||||
comparison_operator = var.settings.HealthHostCountMin.comparison_operator
|
||||
evaluation_periods = var.settings.HealthHostCountMin.evaluation_periods
|
||||
metric_name = "HealthyHostCount"
|
||||
period = var.settings.HealthHostCountMin.period
|
||||
statistic = var.settings.HealthHostCountMin.statistic
|
||||
threshold = var.settings.HealthHostCountMin.threshold
|
||||
alarm_description = "NLBTG:HealthyHostCount"
|
||||
namespace = "AWS/NetworkELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.HealthHostCountMin.action]
|
||||
ok_actions = [var.settings.HealthHostCountMin.action]
|
||||
dimensions = {
|
||||
TargetGroup = split(":", each.value)[5]
|
||||
LoadBalancer = "net/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "nlb-UnHealthyHostCount" {
|
||||
# for_each = module.nlb-targetgroups.result-set
|
||||
for_each = toset(module.nlb_tgs.awscliout)
|
||||
alarm_name = "${var.settings.UnHealthyHostCount.ecccode}-NLBTG_${split(":", each.value)[5]}-UnHealthyHostCount"
|
||||
comparison_operator = var.settings.UnHealthyHostCount.comparison_operator
|
||||
evaluation_periods = var.settings.UnHealthyHostCount.evaluation_periods
|
||||
metric_name = "UnHealthyHostCount"
|
||||
period = var.settings.UnHealthyHostCount.period
|
||||
statistic = var.settings.UnHealthyHostCount.statistic
|
||||
threshold = var.settings.UnHealthyHostCount.threshold
|
||||
alarm_description = "NLBTG:UnHealthyHostCount"
|
||||
namespace = "AWS/NetworkELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.settings.UnHealthyHostCount.action]
|
||||
ok_actions = [var.settings.UnHealthyHostCount.action]
|
||||
dimensions = {
|
||||
TargetGroup = split(":", each.value)[5]
|
||||
LoadBalancer = "net/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
output nlb-tg-count {
|
||||
# value = length(module.nlb-targetgroups.result-set)
|
||||
value = length(flatten(module.nlb_tgs.awscliout))
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable load-balancer {}
|
||||
variable settings {}
|
||||
# variable asrolearn {}
|
||||
variable target-account-ak {}
|
||||
variable target-account-sk {}
|
||||
variable target-account-token {}
|
||||
@@ -0,0 +1,27 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "es-domains" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "opensearch"
|
||||
}
|
||||
|
||||
module "es-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.es-domains.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.OpenSearch"
|
||||
default-tags = local.default-tags
|
||||
domain-name = each.value
|
||||
threshold-CPUUtilization = 90
|
||||
threshold-IndexingLatency = 3
|
||||
threshold-SearchLatency = 3
|
||||
# threshold-KibanaHealthyNodes = 1
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,22 @@
|
||||
data "aws_caller_identity" "this" {}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ES-alarms" {
|
||||
for_each = var.settings
|
||||
alarm_name = "${each.value["ecccode"]}-ES_${var.domain-name}-${each.value["metric"]}"
|
||||
comparison_operator = each.value["comparison_operator"]
|
||||
evaluation_periods = each.value["evaluation_periods"]
|
||||
metric_name = each.value["metric"]
|
||||
period = each.value["period"]
|
||||
statistic = each.value["statistic"]
|
||||
threshold = each.value["threshold"]
|
||||
alarm_description = "ES:${each.value["metric"]}"
|
||||
namespace = "AWS/ES"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [each.value["action"]]
|
||||
ok_actions = [each.value["action"]]
|
||||
dimensions = {
|
||||
DomainName = var.domain-name
|
||||
ClientId = data.aws_caller_identity.this.id
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
variable "cw-alarm-prefix" {}
|
||||
variable "actions-enabled" {}
|
||||
variable "domain-name" {}
|
||||
variable "settings" {}
|
||||
@@ -0,0 +1,31 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
AWS provider 4.47.0 or above is needed for datasource aws_db_instances (https://github.com/hashicorp/terraform-provider-aws/blob/main/CHANGELOG.md)
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "rds-instances" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "rds"
|
||||
}
|
||||
|
||||
module "rds-monitoring" {
|
||||
# for_each = toset(var.rds-instance-ids)
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.rds-instances.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.RDS"
|
||||
default-tags = local.default-tags
|
||||
rds-instance-name = each.value
|
||||
threshold-CpuUtilization = 90
|
||||
threshold-FreeableMemory = 512 * 1024 * 1024
|
||||
threshold-FreeStorageSpace = 5 * 1024 * 1024 * 1024
|
||||
threshold-DiskQueueDepth = 30
|
||||
threshold-ReadLatency = 0.03
|
||||
threshold-WriteLatency = 0.03
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,19 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "rds-alarms" {
|
||||
for_each = var.settings
|
||||
alarm_name = "${each.value["ecccode"]}-RDS_${var.rds-instance-name}-${each.value["metric"]}"
|
||||
comparison_operator = each.value["comparison_operator"]
|
||||
evaluation_periods = each.value["evaluation_periods"]
|
||||
metric_name = each.value["metric"]
|
||||
period = each.value["period"]
|
||||
statistic = each.value["statistic"]
|
||||
threshold = each.value["threshold"]
|
||||
alarm_description = "RDS:${each.value["metric"]}"
|
||||
namespace = "AWS/RDS"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [each.value["action"]]
|
||||
ok_actions = [each.value["action"]]
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds-instance-name
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.47.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable rds-instance-name {}
|
||||
variable settings {}
|
||||
@@ -0,0 +1,26 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "redis-instances" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "redis"
|
||||
}
|
||||
|
||||
module "redis-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.redis-instances.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.Redis"
|
||||
default-tags = local.default-tags
|
||||
redis-cluster-id = each.value
|
||||
threshold-EngineCPUUtilization = 90
|
||||
threshold-DatabaseMemoryUsagePercentage = 90
|
||||
threshold-CacheHitRate = 3
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,21 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "redis-alarms" {
|
||||
for_each = var.settings
|
||||
alarm_name = "${each.value["ecccode"]}-Redis_${var.redis-cluster-id}-${each.value["metric"]}"
|
||||
comparison_operator = each.value["comparison_operator"]
|
||||
evaluation_periods = each.value["evaluation_periods"]
|
||||
metric_name = each.value["metric"]
|
||||
period = each.value["period"]
|
||||
statistic = each.value["statistic"]
|
||||
threshold = each.value["threshold"]
|
||||
alarm_description = "ElastiCache:${each.value["metric"]}"
|
||||
namespace = "AWS/ElastiCache"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [each.value["action"]]
|
||||
ok_actions = [each.value["action"]]
|
||||
treat_missing_data = "notBreaching"
|
||||
dimensions = {
|
||||
CacheClusterId = var.redis-cluster-id
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
variable "cw-alarm-prefix" {}
|
||||
variable "actions-enabled" {}
|
||||
variable "redis-cluster-id" {}
|
||||
variable "settings" {}
|
||||
@@ -0,0 +1,24 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
module "tgw" {
|
||||
source = "../../modules/util/resource-list"
|
||||
resource-type = "tgw"
|
||||
}
|
||||
|
||||
module "tgw-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = module.tgw.result-set
|
||||
source = "../../modules/ManagementGovernance/Monitoring.TGW"
|
||||
default-tags = local.default-tags
|
||||
job-flow-id = split("/", each.value)[1]
|
||||
threshold-PacketDropCountNoRoute = 1
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,19 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "tgw-PacketDropCountNoRoute" {
|
||||
for_each = var.settings
|
||||
alarm_name = "${each.value["ecccode"]}-TGW_${var.tgw-id}-PacketDropCountNoRoute"
|
||||
comparison_operator = each.value["comparison_operator"]
|
||||
evaluation_periods = each.value["evaluation_periods"]
|
||||
metric_name = each.value["metric"]
|
||||
period = each.value["period"]
|
||||
statistic = each.value["statistic"]
|
||||
threshold = each.value["threshold"]
|
||||
alarm_description = "TGW:${each.value["metric"]}"
|
||||
namespace = "AWS/TransitGateway"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [each.value["action"]]
|
||||
ok_actions = [each.value["action"]]
|
||||
dimensions = {
|
||||
TransitGateway = var.tgw-id
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 4.47.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable tgw-id {}
|
||||
variable settings {}
|
||||
@@ -0,0 +1,47 @@
|
||||
<!-- This readme file is generated with terraform-docs -->
|
||||
## Requirements
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| terraform | >= 1.3.0 |
|
||||
| aws | >= 5.0 |
|
||||
|
||||
## Providers
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| aws | >= 5.0 |
|
||||
|
||||
## Modules
|
||||
|
||||
No modules.
|
||||
|
||||
## Resources
|
||||
|
||||
| Name | Type |
|
||||
|------|------|
|
||||
| [aws_sns_topic.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic) | resource |
|
||||
| [aws_sns_topic_subscription.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic_subscription) | resource |
|
||||
| [aws_caller_identity.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
|
||||
| [aws_region.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source |
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|------|---------|:--------:|
|
||||
| email-addresses | Email recipients of SNS notifications | `set(string)` | n/a | yes |
|
||||
| kms-key-id | KMS key id for SNS topic at-rest encryption. Make sure the sender has access to this key | `string` | n/a | yes |
|
||||
| sender | ARN of SNS sender or sending service name | `string` | n/a | yes |
|
||||
| sender-type | Sender principal type. Value should be either *AWS* or *Service* | `string` | n/a | yes |
|
||||
| sns-topic-description | SNS topic display name | `string` | n/a | yes |
|
||||
| sns-topic-name | Name of SNS topic | `string` | n/a | yes |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| sns-topic-arn | n/a |
|
||||
|
||||
---
|
||||
## Authorship
|
||||
This module was developed by xpk.
|
||||
@@ -0,0 +1,69 @@
|
||||
data "aws_caller_identity" "this" {}
|
||||
data "aws_region" "this" {}
|
||||
|
||||
resource "aws_sns_topic" "this" {
|
||||
name = var.sns-topic-name
|
||||
display_name = var.sns-topic-description
|
||||
kms_master_key_id = var.kms-key-id
|
||||
policy = jsonencode(
|
||||
{
|
||||
"Version" : "2008-10-17",
|
||||
"Id" : "SnsTopicPolicy",
|
||||
"Statement" : [
|
||||
{
|
||||
"Sid" : "SnsTopicAdmin",
|
||||
"Effect" : "Allow",
|
||||
"Principal" : {
|
||||
"AWS" : data.aws_caller_identity.this.account_id
|
||||
},
|
||||
"Action" : [
|
||||
"SNS:GetTopicAttributes",
|
||||
"SNS:SetTopicAttributes",
|
||||
"SNS:AddPermission",
|
||||
"SNS:RemovePermission",
|
||||
"SNS:DeleteTopic",
|
||||
"SNS:Subscribe",
|
||||
"SNS:ListSubscriptionsByTopic",
|
||||
"SNS:Publish",
|
||||
"SNS:Receive"
|
||||
],
|
||||
"Resource" : "arn:aws:sns:${data.aws_region.this.name}:${data.aws_caller_identity.this.account_id}:${var.sns-topic-name}",
|
||||
"Condition" : {
|
||||
"StringEquals" : {
|
||||
"AWS:SourceOwner" : data.aws_caller_identity.this.account_id
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"Sid" : "AllowPublishing",
|
||||
"Effect" : "Allow",
|
||||
"Principal" : {
|
||||
"${var.sender-type}" : var.sender
|
||||
},
|
||||
"Action" : "sns:Publish",
|
||||
"Resource" : "arn:aws:sns:${data.aws_region.this.name}:${data.aws_caller_identity.this.account_id}:${var.sns-topic-name}"
|
||||
},
|
||||
{
|
||||
"Sid" : "AllowPublishThroughSSLOnly",
|
||||
"Action" : "SNS:Publish",
|
||||
"Effect" : "Deny",
|
||||
"Resource" : "arn:aws:sns:${data.aws_region.this.name}:${data.aws_caller_identity.this.account_id}:${var.sns-topic-name}",
|
||||
"Condition" : {
|
||||
"Bool" : {
|
||||
"aws:SecureTransport" : "false"
|
||||
}
|
||||
},
|
||||
"Principal" : "*"
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
resource "aws_sns_topic_subscription" "this" {
|
||||
for_each = var.email-addresses
|
||||
topic_arn = aws_sns_topic.this.arn
|
||||
protocol = "email"
|
||||
endpoint = each.value
|
||||
}
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
output "sns-topic-arn" {
|
||||
value = aws_sns_topic.this.arn
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
variable "sender" {
|
||||
type = string
|
||||
description = "ARN of SNS sender or sending service name"
|
||||
}
|
||||
|
||||
variable "sender-type" {
|
||||
type = string
|
||||
description = "Sender principal type. Value should be either *AWS* or *Service*"
|
||||
validation {
|
||||
condition = var.sender-type == "AWS" || var.sender-type == "Service"
|
||||
error_message = "Valid values are AWS or Service"
|
||||
}
|
||||
}
|
||||
|
||||
variable "sns-topic-name" {
|
||||
type = string
|
||||
description = "Name of SNS topic"
|
||||
}
|
||||
|
||||
variable "sns-topic-description" {
|
||||
type = string
|
||||
description = "SNS topic display name"
|
||||
}
|
||||
|
||||
variable "kms-key-id" {
|
||||
type = string
|
||||
description = "KMS key id for SNS topic at-rest encryption. Make sure the sender has access to this key"
|
||||
}
|
||||
|
||||
variable "email-addresses" {
|
||||
type = set(string)
|
||||
description = "Email recipients of SNS notifications"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = ">= 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
ACM sends daily expiration events for all active certificates (public, private and imported) starting 45 days prior to expiration [1].
|
||||
This module sets up event rule and sns notification. Deliver email notifications for expiring certificates, useful for imported certificates.
|
||||
|
||||
## Notes
|
||||
* DaysToExpiry cannot be greater than 45
|
||||
|
||||
```bash
|
||||
❯ aws acm put-account-configuration --idempotency-token abcd123456 --expiry-events DaysBeforeExpiry=46 --region=ap-east-1
|
||||
|
||||
An error occurred (ValidationException) when calling the PutAccountConfiguration operation: Days before expiry cannot be over 45.
|
||||
```
|
||||
* KMS key for SNS must allow events.amazonaws.com. Check that this statement is present in the KMS key policy. Otherwise you will get FailedInvocation in event rule graph and there is no other debug info. The default alias/aws/sns managed key does not allow encryption / decryption from cloudwatch or events [2].
|
||||
```json
|
||||
{
|
||||
"Sid": "Allow publish from events",
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"Service": "events.amazonaws.com"
|
||||
},
|
||||
"Action": [
|
||||
"kms:Encrypt",
|
||||
"kms:Decrypt",
|
||||
"kms:ReEncrypt*",
|
||||
"kms:GenerateDataKey*",
|
||||
"kms:DescribeKey"
|
||||
],
|
||||
"Resource": "*"
|
||||
}
|
||||
```
|
||||
[1] https://docs.aws.amazon.com/acm/latest/userguide/supported-events.html
|
||||
[2] https://docs.gruntwork.io/discussions/knowledge-base/238/
|
||||
|
||||
## Sample Event bridge event
|
||||
```json
|
||||
{
|
||||
"version": "0",
|
||||
"id": "id",
|
||||
"detail-type": "ACM Certificate Approaching Expiration",
|
||||
"source": "aws.acm",
|
||||
"account": "account",
|
||||
"time": "2020-09-30T06:51:08Z",
|
||||
"region": "region",
|
||||
"resources": [
|
||||
"arn:aws:acm:region:account:certificate/certificate_ID"
|
||||
],
|
||||
"detail": {
|
||||
"DaysToExpiry": 31,
|
||||
"CommonName": "example.com"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| terraform | >= 1.3.0 |
|
||||
| aws | >= 5.0 |
|
||||
|
||||
## Providers
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| aws | >= 5.0 |
|
||||
| random | n/a |
|
||||
|
||||
## Modules
|
||||
|
||||
| Name | Source | Version |
|
||||
|------|--------|---------|
|
||||
| awscli | ../../util/terraform-aws-cli | n/a |
|
||||
|
||||
## Resources
|
||||
|
||||
| Name | Type |
|
||||
|------|------|
|
||||
| [aws_cloudwatch_event_rule.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource |
|
||||
| [aws_cloudwatch_event_target.sns](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource |
|
||||
| [aws_sns_topic.ssl-cert-expiry-notice](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic) | resource |
|
||||
| [aws_sns_topic_policy.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic_policy) | resource |
|
||||
| [aws_sns_topic_subscription.ssl-cert-expiry-notice-sub](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic_subscription) | resource |
|
||||
| [random_id.this](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/id) | resource |
|
||||
| [aws_caller_identity.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
|
||||
| [aws_iam_policy_document.sns_topic_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|----------------------------------------------------------------------------------|------|---------|:--------:|
|
||||
| days-before-expiry | ACM DaysBeforeExpiry account configuration | `number` | `45` | no |
|
||||
| email-addresses | Set of email addresses to receive SNS notifications | `set(string)` | n/a | yes |
|
||||
| res-prefix | Resource name prefix | `string` | `"aws"` | no |
|
||||
| sns-kms-key-arn | ARN of KMS key used for SNS encryption. This key must allow events.amazonaws.com | `string` | `null` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
No outputs.
|
||||
|
||||
---
|
||||
## Authorship
|
||||
This module was developed by xpk.
|
||||
@@ -0,0 +1,98 @@
|
||||
data "aws_caller_identity" "this" {}
|
||||
|
||||
resource "random_id" "this" {
|
||||
byte_length = 2
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_event_rule" "this" {
|
||||
name = "${var.res-prefix}-ssl-cert-expiry-${random_id.this.dec}"
|
||||
description = "Reminder of SSL expiring certificates"
|
||||
|
||||
event_pattern = jsonencode({
|
||||
"source" : ["aws.acm"],
|
||||
"detail-type" : ["ACM Certificate Approaching Expiration"]
|
||||
})
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_event_target" "sns" {
|
||||
rule = aws_cloudwatch_event_rule.this.name
|
||||
target_id = "ssl-cert-expiry-sns-${random_id.this.dec}"
|
||||
arn = aws_sns_topic.ssl-cert-expiry-notice.arn
|
||||
input_transformer {
|
||||
input_paths = {
|
||||
"cert" : "$.resources[0]",
|
||||
"days" : "$.detail.DaysToExpiry",
|
||||
"cn" : "$.detail.CommonName"
|
||||
}
|
||||
input_template = <<-EOT
|
||||
"The following ACM certificate will expire soon"
|
||||
|
||||
"ID: <cert>"
|
||||
"CommonName: <cn>"
|
||||
"Days to expiry: <days>"
|
||||
EOT
|
||||
}
|
||||
}
|
||||
|
||||
# Modify ACM DaysBeforeExpiry account setting if it should be set lower than the default 45 days
|
||||
module "awscli" {
|
||||
count = var.days-before-expiry < 45 ? 1 : 0
|
||||
source = "../../util/terraform-aws-cli"
|
||||
|
||||
role_session_name = "terraform-awscli"
|
||||
aws_cli_commands = ["acm", "put-account-configuration", "--idempotency-token", random_id.this.dec, "--expiry-events DaysBeforeExpiry=${var.days-before-expiry}"]
|
||||
}
|
||||
|
||||
# SNS topic and subscription
|
||||
resource "aws_sns_topic" "ssl-cert-expiry-notice" {
|
||||
name = "${var.res-prefix}-ssl-cert-expiry-notice-${random_id.this.dec}"
|
||||
kms_master_key_id = var.sns-kms-key-arn
|
||||
}
|
||||
|
||||
resource "aws_sns_topic_policy" "default" {
|
||||
arn = aws_sns_topic.ssl-cert-expiry-notice.arn
|
||||
policy = data.aws_iam_policy_document.sns_topic_policy.json
|
||||
}
|
||||
|
||||
data "aws_iam_policy_document" "sns_topic_policy" {
|
||||
statement {
|
||||
sid = "AllowPublishingFromEvents"
|
||||
effect = "Allow"
|
||||
actions = [
|
||||
"sns:Publish",
|
||||
"SNS:Publish"
|
||||
]
|
||||
|
||||
principals {
|
||||
type = "Service"
|
||||
identifiers = ["events.amazonaws.com"]
|
||||
}
|
||||
|
||||
resources = [aws_sns_topic.ssl-cert-expiry-notice.arn]
|
||||
}
|
||||
statement {
|
||||
sid = "AllowPublishThroughSSLOnly"
|
||||
effect = "Deny"
|
||||
principals {
|
||||
identifiers = ["*"]
|
||||
type = "AWS"
|
||||
}
|
||||
actions = [
|
||||
"sns:Publish",
|
||||
"SNS:Publish"
|
||||
]
|
||||
condition {
|
||||
test = "Bool"
|
||||
values = ["false"]
|
||||
variable = "aws:SecureTransport"
|
||||
}
|
||||
resources = [aws_sns_topic.ssl-cert-expiry-notice.arn]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_sns_topic_subscription" "ssl-cert-expiry-notice-sub" {
|
||||
for_each = var.email-addresses
|
||||
topic_arn = aws_sns_topic.ssl-cert-expiry-notice.arn
|
||||
protocol = "email"
|
||||
endpoint = each.value
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
variable "email-addresses" {
|
||||
type = set(string)
|
||||
description = "Set of email addresses to receive SNS notifications"
|
||||
}
|
||||
|
||||
variable "days-before-expiry" {
|
||||
type = number
|
||||
description = "ACM DaysBeforeExpiry account configuration"
|
||||
default = 45
|
||||
}
|
||||
|
||||
variable "res-prefix" {
|
||||
type = string
|
||||
description = "Resource name prefix"
|
||||
default = "aws"
|
||||
}
|
||||
|
||||
variable "sns-kms-key-arn" {
|
||||
type = string
|
||||
description = "ARN of KMS key used for SNS encryption. This key must allow events.amazonaws.com"
|
||||
default = null
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = ">= 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
<!-- This readme file is generated with terraform-docs -->
|
||||
## Requirements
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| terraform | >= 1.3.0 |
|
||||
| aws | >= 5.0 |
|
||||
|
||||
## Providers
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| aws | >= 5.0 |
|
||||
|
||||
## Modules
|
||||
|
||||
No modules.
|
||||
|
||||
## Resources
|
||||
|
||||
| Name | Type |
|
||||
|------|------|
|
||||
| [aws_cloudwatch_log_group.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
|
||||
| [aws_ssm_maintenance_window.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ssm_maintenance_window) | resource |
|
||||
| [aws_ssm_maintenance_window_target.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ssm_maintenance_window_target) | resource |
|
||||
| [aws_ssm_maintenance_window_task.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ssm_maintenance_window_task) | resource |
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|------|---------|:--------:|
|
||||
| cron-expression | Cron expression for SSM maintenance window schedule | `string` | n/a | yes |
|
||||
| description | Description of command to run | `string` | n/a | yes |
|
||||
| instance-id | Id of Ec2 instance to execute the script | `string` | n/a | yes |
|
||||
| schedule-name | Name of maintenance window. e.g. Daily0900UTC8 | `string` | n/a | yes |
|
||||
| shell-script-path | Full path to script | `string` | n/a | yes |
|
||||
|
||||
## Outputs
|
||||
|
||||
No outputs.
|
||||
|
||||
---
|
||||
## Authorship
|
||||
This module was developed by xpk.
|
||||
@@ -0,0 +1,80 @@
|
||||
# SSM run command
|
||||
#resource "aws_ssm_document" "this" {
|
||||
# name = replace(title(var.description), " ", "")
|
||||
# document_type = "Command"
|
||||
# target_type = "/AWS::EC2::Instance"
|
||||
# content = jsonencode(
|
||||
# {
|
||||
# "schemaVersion" : "2.2",
|
||||
# "description" : "Run script for ${var.description}",
|
||||
# "parameters" : {
|
||||
# },
|
||||
# "mainSteps" : [
|
||||
# {
|
||||
# "action" : "aws:runShellScript",
|
||||
# "name" : "RunShellScript",
|
||||
# "inputs" : {
|
||||
# "runCommand" : var.shell-script-path
|
||||
# }
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
# )
|
||||
#}
|
||||
|
||||
resource "aws_ssm_maintenance_window" "this" {
|
||||
name = replace(title(var.description), " ", "")
|
||||
description = var.description
|
||||
schedule = var.cron-expression
|
||||
duration = var.maintenance-window-duration
|
||||
cutoff = 1
|
||||
}
|
||||
|
||||
resource "aws_ssm_maintenance_window_target" "this" {
|
||||
window_id = aws_ssm_maintenance_window.this.id
|
||||
name = replace(title(var.description), " ", "")
|
||||
description = var.description
|
||||
resource_type = "INSTANCE"
|
||||
|
||||
targets {
|
||||
key = "InstanceIds"
|
||||
values = [var.instance-id]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_ssm_maintenance_window_task" "this" {
|
||||
name = replace(title(var.description), " ", "")
|
||||
max_concurrency = 1
|
||||
max_errors = 1
|
||||
priority = 1
|
||||
task_arn = "AWS-RunShellScript"
|
||||
task_type = "RUN_COMMAND"
|
||||
window_id = aws_ssm_maintenance_window.this.id
|
||||
|
||||
targets {
|
||||
key = "InstanceIds"
|
||||
values = [var.instance-id]
|
||||
}
|
||||
|
||||
task_invocation_parameters {
|
||||
run_command_parameters {
|
||||
timeout_seconds = 60 # If this time is reached and the command has not already started executing, it doesn't run.
|
||||
|
||||
cloudwatch_config {
|
||||
cloudwatch_log_group_name = aws_cloudwatch_log_group.this.name
|
||||
cloudwatch_output_enabled = true
|
||||
}
|
||||
|
||||
parameter {
|
||||
name = "commands"
|
||||
values = [var.shell-script-path]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_log_group" "this" {
|
||||
name = "/aws/ssm-maintenance/${replace(title(var.description), " ", "")}"
|
||||
retention_in_days = var.cloudwatch-log-retention-days
|
||||
log_group_class = "STANDARD" # infrequent access logs can only be viewed via insight
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
variable shell-script-path {
|
||||
type = string
|
||||
description = "Full path to script"
|
||||
}
|
||||
|
||||
variable cron-expression {
|
||||
type = string
|
||||
description = "Cron expression for SSM maintenance window schedule"
|
||||
}
|
||||
|
||||
variable instance-id {
|
||||
type = string
|
||||
description = "Id of Ec2 instance to execute the script"
|
||||
}
|
||||
|
||||
variable description {
|
||||
type = string
|
||||
description = "Description of command to run"
|
||||
}
|
||||
|
||||
variable schedule-name {
|
||||
type = string
|
||||
description = "Name of maintenance window. e.g. Daily0900UTC8"
|
||||
}
|
||||
|
||||
variable maintenance-window-duration {
|
||||
type = number
|
||||
description = "Duration of maintenance window, must be >= 2"
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable cloudwatch-log-retention-days {
|
||||
type = number
|
||||
description = "Days to retain logs on cloudwatch logs"
|
||||
default = 30
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = ">= 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user