Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Guide to run prometheus and grafana

## Guide for external users who want to see grafana dashboards next to their nodes
1) go into `<project_dir>/docker directory
2) run: `docker compose -f docker-compose.telemetry.user.yaml up`
3) go to `localhost:3000` for grafana
4) In `<project_dir>/resources/grafana/user` copy the json of any node you want to see and import it in to your grafana

PS: Do not forget to add telemetry config to your `rollup_config.toml`


## Guide for citrea developers to update grafana dashboard locally then update it on production
1) Go into `<project_dir>/docker` directory
2) to run prometheus and grafana run: `docker compose -f docker-compose.telemetry.yaml up`
3) go to localhost:3000 and do the updates to the dashboard

### To update prod dashboard
4) When exporting dashboard select export as code -> classic and disable `Export for sharing externally` as the local data source uid is the same with prod
5) To update the prod dashboard click edit then go to settings -> `JSON Model` and paste the new json.
6) Update the prod dashboard under `resources/grafana/prod`

### To Update user dashboards
7) After updating the prod dashboard invoke the python script:
```
$ python ./resource/grafana/remove_labels.py ./resources/grafana/prod/<node_type>.dashboard.json > ./resources/grafana/user/<node_type>.dashboard.json
```

8) Run the prometheus server in user mode and import the newly generated user dashboard to check all is well
43 changes: 43 additions & 0 deletions docker/docker-compose.telemetry.user.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
services:
prometheus:
container_name: prometheus.citrea
image: prom/prometheus
ports:
- 9090:9090
volumes:
- ./telemetry-user/prometheus.yml:/etc/prometheus/prometheus.yml
networks:
- monitoring
extra_hosts:
- "host.docker.internal:host-gateway"
grafana:
image: grafana/grafana-enterprise
ports:
- 3000:3000
environment:
- GF_SECURITY_ADMIN_PASSWORD=password
volumes:
- grafana_data_user:/var/lib/grafana
- ./telemetry-user/grafana/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml
networks:
- monitoring

cadvisor:
image: gcr.io/cadvisor/cadvisor
ports:
- 8080:8080
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /var/run/docker.sock:/var/run/docker.sock:ro # Add only if you have your containers running on Mac
networks:
- monitoring

networks:
monitoring:
driver: bridge

volumes:
grafana_data_user: {}
7 changes: 6 additions & 1 deletion docker/docker-compose.telemetry.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
services:
prometheus:
image: prom/prometheus
container_name: prometheus.citrea
build:
context: ./prometheus
dockerfile: Dockerfile
image: local-prometheus
ports:
- 9090:9090
volumes:
Expand All @@ -18,6 +22,7 @@ services:
- GF_SECURITY_ADMIN_PASSWORD=password
volumes:
- grafana_data:/var/lib/grafana
- ./telemetry/grafana/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml
networks:
- monitoring

Expand Down
23 changes: 23 additions & 0 deletions docker/prometheus/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM prom/prometheus:v3.4.0 AS source

FROM ubuntu:24.04

RUN apt update && apt -y upgrade && apt -y install python3 python3-pip && apt -y autoremove && apt clean && rm -rf /var/lib/apt/lists/*

RUN pip3 install --break-system-packages pyyaml

COPY --from=source /bin/prometheus /bin/prometheus
COPY --from=source /bin/promtool /bin/promtool

RUN mkdir -p /mnt/task/prometheus-data && mkdir -p /etc/prometheus

RUN apt update && apt install telnet -y

WORKDIR /srv

COPY start.sh /srv/start.sh
COPY init.py /srv/init.py

RUN chmod +x /srv/start.sh

ENTRYPOINT ["/srv/start.sh"]
165 changes: 165 additions & 0 deletions docker/prometheus/init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from copy import deepcopy

import yaml


LOCAL = {
"ENVIRONMENT": "CORE",
"NET_NAME": "DEV-NET",
"SERVICE:citrea-sequencer": "host.docker.internal:8001",
"SERVICE:citrea-full-node": "host.docker.internal:8002",
"SERVICE:citrea-prover": "host.docker.internal:8003",
"SERVICE:citrea-light-prover": "host.docker.internal:8004",
}


def main():
net_name = LOCAL["NET_NAME"]
env_name = LOCAL["ENVIRONMENT"]

services = [(k.split(":")[1], v) for k, v in LOCAL.items() if k.startswith("SERVICE:")]

prometheus_config_yaml = create_prometheus_config(net_name, env_name, services)

# print(prometheus_config_yaml)

with open("/etc/prometheus/prometheus.yml", "w") as f:
f.write(prometheus_config_yaml)


def create_prometheus_config(net_name: str, env_name: str, services: list[tuple[str, str]]):
net_name = net_name.lower()
env_name = env_name.lower()

global_config = {
"scrape_interval": "15s",
"evaluation_interval": "15s"
}

general_relabel_configs = [
{
"source_labels": [
"__address__"
],
"target_label": "net_name",
"replacement": net_name
},
{
"source_labels": [
"__address__"
],
"target_label": "env_name",
"replacement": env_name
}
]

self_scrape_config = {
"job_name": "prometheus",
"static_configs": [
{
"targets": [
"127.0.0.1:9090"
]
}
],
"relabel_configs": [
*deepcopy(general_relabel_configs),
*replacement_builder(".*", "prometheus"),
]
}

service_grouped = {
k: [f"{x[1]}" for x in services if x[0] == k]
for k in set(x[0] for x in services)
}

service_configs = [
{
"job_name": k,
"scrape_interval": "1s",
"static_configs": [
{
"targets": v
}
],
"relabel_configs": [
*deepcopy(general_relabel_configs),
*replacement_builder("(.*)\.?:.*", "$1"),
*replacement_builder("(.*)\.citrea\.?:.*", "$1"),
*replacement_builder(f"{env_name}-{net_name}-(.*)\.citrea\.?:.*", "$1"),
]
}
for k, v in service_grouped.items()
]

scrape_configs = [
self_scrape_config,
*service_configs,
]


prometheus_config = {
"global": global_config,
"scrape_configs": scrape_configs
}

prometheus_config_yaml = yaml.dump(prometheus_config, default_flow_style=False)
return prometheus_config_yaml


def replacement_builder(regex: str, replacement: str):
net_name = LOCAL["NET_NAME"].lower()
env_name = LOCAL["ENVIRONMENT"].lower()

if "dev" in net_name:
short_prefix_start = "dn"
elif "test" in net_name:
short_prefix_start = "tn"
elif "main" in net_name:
short_prefix_start = "mn"
elif "general" in net_name:
short_prefix_start = "g"
else:
short_prefix_start = "x"

if "core" in env_name:
short_prefix_end = "c"
elif "web" in env_name:
short_prefix_end = "w"
elif "pop" in env_name:
short_prefix_end = "p"
if "eu" in env_name:
short_prefix_end = f"{short_prefix_end}eu"
elif "ap" in env_name:
short_prefix_end = f"{short_prefix_end}ap"
else:
short_prefix_end = f"{short_prefix_end}xx"
elif "common" in env_name:
short_prefix_end = "c"
else:
short_prefix_end = "x"

short_prefix = f"{short_prefix_start}{short_prefix_end}"

return deepcopy([
{
"source_labels": [
"__address__"
],
"regex": regex,
"target_label": "instance",
"replacement": f"{short_prefix}-{replacement}",
},
{
"source_labels": [
"__address__"
],
"regex": regex,
"target_label": "service_name",
"replacement": f"{replacement}",
}
])


if __name__ == "__main__":
main()
15 changes: 15 additions & 0 deletions docker/prometheus/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
set -e

python3 ./init.py


# run with server mode
/bin/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/mnt/task/prometheus-data \
--web.external-url= \
--web.page-title="DEV-NET CORE Prometheus" \
--web.enable-lifecycle \
--web.enable-admin-api \
--storage.tsdb.retention.time=30d
14 changes: 14 additions & 0 deletions docker/telemetry-user/grafana/datasources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# NOTE: A specific UID is used to match the production environment's datasource UID
# to enable seamless dashboard import/export.

apiVersion: 1

datasources:
- name: prometheus
type: prometheus
access: proxy
uid: eed8s4geh3myob
url: http://prometheus.citrea:9090
isDefault: true
readOnly: false
editable: true
21 changes: 21 additions & 0 deletions docker/telemetry-user/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
global:
scrape_interval: 1s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
- job_name: 'sequencer'
static_configs:
- targets: ['host.docker.internal:8001']
- job_name: 'fullnode'
static_configs:
- targets: ['host.docker.internal:8002']
- job_name: 'batch-prover'
static_configs:
- targets: ['host.docker.internal:8003']
- job_name: 'light-client'
static_configs:
- targets: ['host.docker.internal:8004']
14 changes: 14 additions & 0 deletions docker/telemetry/grafana/datasources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# NOTE: A specific UID is used to match the production environment's datasource UID
# to enable seamless dashboard import/export.

apiVersion: 1

datasources:
- name: prometheus
type: prometheus
access: proxy
uid: eed8s4geh3myob
url: http://prometheus.citrea:9090
isDefault: true
readOnly: false
editable: true
Loading
Loading