Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/PRX/Infrastructure
Browse files Browse the repository at this point in the history
  • Loading branch information
farski committed Aug 29, 2024
2 parents e3d3d3b + 11d9f24 commit 4eed204
Showing 1 changed file with 112 additions and 6 deletions.
118 changes: 112 additions & 6 deletions spire/templates/shared-clickhouse/instance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,11 @@ Resources:
- logs:CreateLogStream
- logs:DescribeLogStreams
- logs:PutLogEvents
- logs:PutRetentionPolicy
Effect: Allow
Resource: !Sub arn:aws:logs:*:*:log-group:/prx/clickhouse/${EnvironmentTypeAbbreviation}/*
Resource:
- !GetAtt ClickhouseLogGroup.Arn
- !GetAtt ClickhouseErrorLogGroup.Arn
Version: "2012-10-17"
PolicyName: CloudWatchAgentPermissions
Tags:
Expand Down Expand Up @@ -185,14 +188,14 @@ Resources:
{
"auto_removal": true,
"file_path": "/var/log/clickhouse-server/clickhouse-server.log",
"log_group_name": "/prx/clickhouse/${EnvironmentTypeAbbreviation}/server.log",
"log_group_name": "${ClickhouseLogGroup}",
"log_stream_name": "{instance_id}",
"retention_in_days": 30
},
{
"auto_removal": true,
"file_path": "/var/log/clickhouse-server/clickhouse-server.err.log",
"log_group_name": "/prx/clickhouse/${EnvironmentTypeAbbreviation}/server.err.log",
"log_group_name": "${ClickhouseErrorLogGroup}",
"log_stream_name": "{instance_id}",
"retention_in_days": 30
}
Expand Down Expand Up @@ -224,9 +227,9 @@ Resources:
"measurement": ["mem_used_percent"],
"metrics_collection_interval": 60
}
}
},
"namespace": "PRX/Clickhouse"
},
"namespace": "PRX/Clickhouse"
}
}
owner: root
group: root
Expand Down Expand Up @@ -323,6 +326,109 @@ Resources:
# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-init.html
/opt/aws/bin/cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource ClickhouseServer5

ClickhouseLogGroup:
Type: AWS::Logs::LogGroup
DeletionPolicy: Delete
UpdateReplacePolicy: Delete
Properties:
LogGroupName: !Sub /prx/clickhouse/${EnvironmentTypeAbbreviation}/clickhouse-server.log
RetentionInDays: 14
Tags:
- { Key: prx:meta:tagging-version, Value: "2021-04-07" }
- { Key: prx:cloudformation:stack-name, Value: !Ref AWS::StackName }
- { Key: prx:cloudformation:stack-id, Value: !Ref AWS::StackId }
- { Key: prx:cloudformation:root-stack-name, Value: !Ref RootStackName }
- { Key: prx:cloudformation:root-stack-id, Value: !Ref RootStackId }
- { Key: prx:ops:environment, Value: !Ref EnvironmentType }
- { Key: prx:dev:family, Value: Dovetail }
- { Key: prx:dev:application, Value: Clickhouse }
ClickhouseErrorLogGroup:
Type: AWS::Logs::LogGroup
DeletionPolicy: Delete
UpdateReplacePolicy: Delete
Properties:
LogGroupName: !Sub /prx/clickhouse/${EnvironmentTypeAbbreviation}/clickhouse-server.err.log
RetentionInDays: 14
Tags:
- { Key: prx:meta:tagging-version, Value: "2021-04-07" }
- { Key: prx:cloudformation:stack-name, Value: !Ref AWS::StackName }
- { Key: prx:cloudformation:stack-id, Value: !Ref AWS::StackId }
- { Key: prx:cloudformation:root-stack-name, Value: !Ref RootStackName }
- { Key: prx:cloudformation:root-stack-id, Value: !Ref RootStackId }
- { Key: prx:ops:environment, Value: !Ref EnvironmentType }
- { Key: prx:dev:family, Value: Dovetail }
- { Key: prx:dev:application, Value: Clickhouse }
ClickhouseErrorsMetricFilter:
Type: AWS::Logs::MetricFilter
Properties:
FilterPattern: '{ $.level = "Error" }'
LogGroupName: !Ref ClickhouseErrorLogGroup
MetricTransformations:
- MetricName: !Sub clickhouse_errors_${EnvironmentTypeAbbreviation}
MetricNamespace: PRX/Clickhouse
MetricValue: "1"
ClickhouseErrorsAlarm:
Type: AWS::CloudWatch::Alarm
Condition: IsProduction
Properties:
AlarmName: !Sub WARN [Shared] Clickhouse <${EnvironmentTypeAbbreviation}> LOGGING ERRORS
AlarmDescription: !Sub >-
Clickhouse ${EnvironmentType} logged some errors. Go take a look!
ComparisonOperator: GreaterThanThreshold
EvaluationPeriods: 1
MetricName: !Sub clickhouse_errors_${EnvironmentTypeAbbreviation}
Namespace: PRX/Clickhouse
Period: 60
Statistic: Sum
Tags:
- { Key: prx:meta:tagging-version, Value: "2021-04-07" }
- { Key: prx:cloudformation:stack-name, Value: !Ref AWS::StackName }
- { Key: prx:cloudformation:stack-id, Value: !Ref AWS::StackId }
- { Key: prx:cloudformation:root-stack-name, Value: !Ref RootStackName }
- { Key: prx:cloudformation:root-stack-id, Value: !Ref RootStackId }
- { Key: prx:ops:environment, Value: !Ref EnvironmentType }
- { Key: prx:ops:cloudwatch-log-group-name, Value: !Ref ClickhouseErrorLogGroup }
- { Key: prx:dev:family, Value: Dovetail }
- { Key: prx:dev:application, Value: Clickhouse }
Threshold: 0
TreatMissingData: notBreaching
ClickhouseMemoryLimitExceededMetricFilter:
Type: AWS::Logs::MetricFilter
Properties:
FilterPattern: "{ $.message = %MEMORY_LIMIT_EXCEEDED% }"
LogGroupName: !Ref ClickhouseErrorLogGroup
MetricTransformations:
- MetricName: !Sub clickhouse_memory_limit_exceeded_${EnvironmentTypeAbbreviation}
MetricNamespace: PRX/Clickhouse
MetricValue: "1"
ClickhouseMemoryLimitExceededAlarm:
Type: AWS::CloudWatch::Alarm
Condition: IsProduction
Properties:
AlarmName: !Sub ERROR [Shared] Clickhouse <${EnvironmentTypeAbbreviation}> MEMORY LIMIT EXCEEDED
AlarmDescription: !Sub >-
Clickhouse ${EnvironmentType} failed to perform a background-merge
operation or query that would have exceeded its memory. These are
normally not a huge deal, but good to keep an eye on.
ComparisonOperator: GreaterThanThreshold
EvaluationPeriods: 1
MetricName: !Sub clickhouse_memory_limit_exceeded_${EnvironmentTypeAbbreviation}
Namespace: PRX/Clickhouse
Period: 60
Statistic: Sum
Tags:
- { Key: prx:meta:tagging-version, Value: "2021-04-07" }
- { Key: prx:cloudformation:stack-name, Value: !Ref AWS::StackName }
- { Key: prx:cloudformation:stack-id, Value: !Ref AWS::StackId }
- { Key: prx:cloudformation:root-stack-name, Value: !Ref RootStackName }
- { Key: prx:cloudformation:root-stack-id, Value: !Ref RootStackId }
- { Key: prx:ops:environment, Value: !Ref EnvironmentType }
- { Key: prx:ops:cloudwatch-log-group-name, Value: !Ref ClickhouseErrorLogGroup }
- { Key: prx:dev:family, Value: Dovetail }
- { Key: prx:dev:application, Value: Clickhouse }
Threshold: 0
TreatMissingData: notBreaching

Outputs:
HttpPort:
Description: Clickhouse HTTP Port
Expand Down

0 comments on commit 4eed204

Please sign in to comment.