Skip to content

Commit 1e4cd71

Browse files
committed
Report conntrack count for all namespaces
In addition to checking the container root namespace, nested namespaces inside the container are checked against the container maximum configured connection tracking count. Only the highest connection tracking count is reported, as MAAS limites the number of metrics it can process. Closes-Bug: TURTLES-1006
1 parent 203c56b commit 1e4cd71

File tree

3 files changed

+70
-4
lines changed

3 files changed

+70
-4
lines changed

playbooks/files/rax-maas/plugins/conntrack_count.py

+58-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
pass
2525

2626
import maas_common
27+
import os
28+
import subprocess
2729
import tempfile
2830

2931

@@ -63,9 +65,29 @@ def get_metrics():
6365
'nf_conntrack_max': {
6466
'path': '/proc/sys/net/netfilter/nf_conntrack_max'}}
6567

68+
# Retrieve root namespace count
6669
for data in metrics.viewvalues():
6770
data['value'] = get_value(data['path'])
6871

72+
# Retrieve conntrack count per namespace
73+
# and report the namespace with the highest count.
74+
# This is necessary to limit the number of metrics to report to MAAS,
75+
# as we can not report a metric per namespace, which by nature are
76+
# also volatile.
77+
try:
78+
namespaces = os.listdir('/var/run/netns')
79+
for ns in namespaces:
80+
ps = subprocess.check_output(['ip', 'netns', 'exec',
81+
ns, 'cat',
82+
'/proc/sys/net/netfilter/'
83+
'nf_conntrack_count'])
84+
nscount = int(ps.strip(os.linesep))
85+
86+
if nscount > metrics['nf_conntrack_count']['value']:
87+
metrics['nf_conntrack_count']['value'] = nscount
88+
except (OSError):
89+
pass
90+
6991
return metrics
7092

7193

@@ -89,19 +111,53 @@ def get_metrics_lxc_container(container_name=''):
89111
# Check if container is even running
90112
try:
91113
with tempfile.TemporaryFile() as tmpfile:
114+
# Retrieve root namespace count
92115
if cont.attach_wait(lxc.attach_run_command,
93116
['cat',
94117
'/proc/sys/net/netfilter/nf_conntrack_count',
95118
'/proc/sys/net/netfilter/nf_conntrack_max'],
96-
stdout=tmpfile) > -1:
119+
stdout=tmpfile,
120+
stderr=tempfile.TemporaryFile()) > -1:
97121

98122
tmpfile.seek(0)
99123
output = tmpfile.read()
100124
metrics = {
101125
'nf_conntrack_count': {'value': output.split('\n')[0]},
102126
'nf_conntrack_max': {'value': output.split('\n')[1]}}
103127

104-
return metrics
128+
# Retrieve conntrack count per namespace
129+
# and report the namespace with the highest count.
130+
# This is necessary to limit the number of metrics to report to MAAS,
131+
# as we can not report a metric per namespace, which by nature are
132+
# also volatile.
133+
with tempfile.TemporaryFile() as nsfile:
134+
if cont.attach_wait(lxc.attach_run_command,
135+
['ls',
136+
'-1',
137+
'/var/run/netns'],
138+
stdout=nsfile,
139+
stderr=tempfile.TemporaryFile()) > -1:
140+
nsfile.seek(0)
141+
142+
for line in nsfile.readlines():
143+
ns = line.strip(os.linesep)
144+
nscountfile = tempfile.TemporaryFile()
145+
146+
if cont.attach_wait(lxc.attach_run_command,
147+
['ip', 'netns', 'exec',
148+
ns, 'cat',
149+
'/proc/sys/net/netfilter/'
150+
'nf_conntrack_count'],
151+
stdout=nscountfile,
152+
stderr=tempfile.TemporaryFile()) > -1:
153+
154+
nscountfile.seek(0)
155+
nscount = int(nscountfile.read().strip(os.linesep))
156+
157+
if nscount > metrics['nf_conntrack_count']['value']:
158+
metrics['nf_conntrack_count']['value'] = nscount
159+
160+
return metrics
105161

106162
except maas_common.MaaSException as e:
107163
maas_common.status_err(str(e), m_name='maas_conntrack')

playbooks/templates/rax-maas/conntrack_count.yaml.j2

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ alarms :
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
2222
if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_critical_threshold }}) {
23-
return new AlarmStatus(CRITICAL, "Connection count is > {{ maas_nf_conntrack_critical_threshold }}% of maximum allowed.");
23+
return new AlarmStatus(CRITICAL, "Connection tracking count is > {{ maas_nf_conntrack_critical_threshold }}% of the critical threshold. Please check all namespaces listed at /var/run/netns including the host.");
2424
}
2525
if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_warning_threshold }}) {
26-
return new AlarmStatus(WARNING, "Connection count is > {{ maas_nf_conntrack_warning_threshold }}% of maximum allowed.");
26+
return new AlarmStatus(WARNING, "Connection tracking count is > {{ maas_nf_conntrack_warning_threshold }}% of the warning threshold. Please check all namespaces inside listed at /var/run/netns including the host.");
2727
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
fixes:
3+
- |
4+
* The `conntrack_count.py` plugin is now checking for network namespaces
5+
listed at `/var/run/netns` and retreives the iptables connection
6+
tracking infomation for each namespace.
7+
This ensures that embedded network namespaces are alerted in case
8+
connection tracking hashes are about to exceed a configurable threshold.
9+
Due to the limited availability of MAAS metrics per alarm, only the
10+
namespace with the higest connection tracking count is reported.

0 commit comments

Comments
 (0)