Skip to content

Commit 81140d3

Browse files
authored
Merge pull request #578 from BjoernT/master
Report conntrack count for all namespaces
2 parents a161335 + 1e4cd71 commit 81140d3

File tree

3 files changed

+70
-4
lines changed

3 files changed

+70
-4
lines changed

playbooks/files/rax-maas/plugins/conntrack_count.py

+58-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
pass
2525

2626
import maas_common
27+
import os
28+
import subprocess
2729
import tempfile
2830

2931

@@ -63,9 +65,29 @@ def get_metrics():
6365
'nf_conntrack_max': {
6466
'path': '/proc/sys/net/netfilter/nf_conntrack_max'}}
6567

68+
# Retrieve root namespace count
6669
for data in metrics.viewvalues():
6770
data['value'] = get_value(data['path'])
6871

72+
# Retrieve conntrack count per namespace
73+
# and report the namespace with the highest count.
74+
# This is necessary to limit the number of metrics to report to MAAS,
75+
# as we can not report a metric per namespace, which by nature are
76+
# also volatile.
77+
try:
78+
namespaces = os.listdir('/var/run/netns')
79+
for ns in namespaces:
80+
ps = subprocess.check_output(['ip', 'netns', 'exec',
81+
ns, 'cat',
82+
'/proc/sys/net/netfilter/'
83+
'nf_conntrack_count'])
84+
nscount = int(ps.strip(os.linesep))
85+
86+
if nscount > metrics['nf_conntrack_count']['value']:
87+
metrics['nf_conntrack_count']['value'] = nscount
88+
except (OSError):
89+
pass
90+
6991
return metrics
7092

7193

@@ -89,19 +111,53 @@ def get_metrics_lxc_container(container_name=''):
89111
# Check if container is even running
90112
try:
91113
with tempfile.TemporaryFile() as tmpfile:
114+
# Retrieve root namespace count
92115
if cont.attach_wait(lxc.attach_run_command,
93116
['cat',
94117
'/proc/sys/net/netfilter/nf_conntrack_count',
95118
'/proc/sys/net/netfilter/nf_conntrack_max'],
96-
stdout=tmpfile) > -1:
119+
stdout=tmpfile,
120+
stderr=tempfile.TemporaryFile()) > -1:
97121

98122
tmpfile.seek(0)
99123
output = tmpfile.read()
100124
metrics = {
101125
'nf_conntrack_count': {'value': output.split('\n')[0]},
102126
'nf_conntrack_max': {'value': output.split('\n')[1]}}
103127

104-
return metrics
128+
# Retrieve conntrack count per namespace
129+
# and report the namespace with the highest count.
130+
# This is necessary to limit the number of metrics to report to MAAS,
131+
# as we can not report a metric per namespace, which by nature are
132+
# also volatile.
133+
with tempfile.TemporaryFile() as nsfile:
134+
if cont.attach_wait(lxc.attach_run_command,
135+
['ls',
136+
'-1',
137+
'/var/run/netns'],
138+
stdout=nsfile,
139+
stderr=tempfile.TemporaryFile()) > -1:
140+
nsfile.seek(0)
141+
142+
for line in nsfile.readlines():
143+
ns = line.strip(os.linesep)
144+
nscountfile = tempfile.TemporaryFile()
145+
146+
if cont.attach_wait(lxc.attach_run_command,
147+
['ip', 'netns', 'exec',
148+
ns, 'cat',
149+
'/proc/sys/net/netfilter/'
150+
'nf_conntrack_count'],
151+
stdout=nscountfile,
152+
stderr=tempfile.TemporaryFile()) > -1:
153+
154+
nscountfile.seek(0)
155+
nscount = int(nscountfile.read().strip(os.linesep))
156+
157+
if nscount > metrics['nf_conntrack_count']['value']:
158+
metrics['nf_conntrack_count']['value'] = nscount
159+
160+
return metrics
105161

106162
except maas_common.MaaSException as e:
107163
maas_common.status_err(str(e), m_name='maas_conntrack')

playbooks/templates/rax-maas/conntrack_count.yaml.j2

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ alarms :
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
2222
if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_critical_threshold }}) {
23-
return new AlarmStatus(CRITICAL, "Connection count is > {{ maas_nf_conntrack_critical_threshold }}% of maximum allowed.");
23+
return new AlarmStatus(CRITICAL, "Connection tracking count is > {{ maas_nf_conntrack_critical_threshold }}% of the critical threshold. Please check all namespaces listed at /var/run/netns including the host.");
2424
}
2525
if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_warning_threshold }}) {
26-
return new AlarmStatus(WARNING, "Connection count is > {{ maas_nf_conntrack_warning_threshold }}% of maximum allowed.");
26+
return new AlarmStatus(WARNING, "Connection tracking count is > {{ maas_nf_conntrack_warning_threshold }}% of the warning threshold. Please check all namespaces inside listed at /var/run/netns including the host.");
2727
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
fixes:
3+
- |
4+
* The `conntrack_count.py` plugin is now checking for network namespaces
5+
listed at `/var/run/netns` and retreives the iptables connection
6+
tracking infomation for each namespace.
7+
This ensures that embedded network namespaces are alerted in case
8+
connection tracking hashes are about to exceed a configurable threshold.
9+
Due to the limited availability of MAAS metrics per alarm, only the
10+
namespace with the higest connection tracking count is reported.

0 commit comments

Comments
 (0)