Skip to content

Commit d0ec8a2

Browse files
poelziyonghong-song
authored andcommitted
Backport tcpstates to kernels < 4.15
The tracepoint inet_sock_set_state only exists in kernels 4.15. Backported the bpf tracepoint to use kprobes on older kernels.
1 parent 2b5fcc6 commit d0ec8a2

File tree

1 file changed

+110
-6
lines changed

1 file changed

+110
-6
lines changed

tools/tcpstates.py

Lines changed: 110 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
debug = 0
6060

6161
# define BPF program
62-
bpf_text = """
62+
bpf_header = """
6363
#include <uapi/linux/ptrace.h>
6464
#define KBUILD_MODNAME "foo"
6565
#include <linux/tcp.h>
@@ -101,7 +101,8 @@
101101
u32 pid;
102102
char task[TASK_COMM_LEN];
103103
};
104-
104+
"""
105+
bpf_text_tracepoint = """
105106
TRACEPOINT_PROBE(sock, inet_sock_set_state)
106107
{
107108
if (args->protocol != IPPROTO_TCP)
@@ -166,10 +167,113 @@
166167
}
167168
"""
168169

169-
if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")):
170-
print("ERROR: tracepoint sock:inet_sock_set_state missing "
171-
"(added in Linux 4.16). Exiting")
172-
exit()
170+
bpf_text_kprobe = """
171+
int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
172+
{
173+
// check this is TCP
174+
u8 protocol = 0;
175+
176+
// Following comments add by Joe Yin:
177+
// Unfortunately,it can not work since Linux 4.10,
178+
// because the sk_wmem_queued is not following the bitfield of sk_protocol.
179+
// And the following member is sk_gso_max_segs.
180+
// So, we can use this:
181+
// bpf_probe_read(&protocol, 1, (void *)((u64)&newsk->sk_gso_max_segs) - 3);
182+
// In order to diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime,
183+
// sk_lingertime is closed to the gso_max_segs_offset,and
184+
// the offset between the two members is 4
185+
186+
int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
187+
int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
188+
189+
if (sk_lingertime_offset - gso_max_segs_offset == 4)
190+
// 4.10+ with little endian
191+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
192+
bpf_probe_read(&protocol, 1, (void *)((u64)&sk->sk_gso_max_segs) - 3);
193+
else
194+
// pre-4.10 with little endian
195+
bpf_probe_read(&protocol, 1, (void *)((u64)&sk->sk_wmem_queued) - 3);
196+
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
197+
// 4.10+ with big endian
198+
bpf_probe_read(&protocol, 1, (void *)((u64)&sk->sk_gso_max_segs) - 1);
199+
else
200+
// pre-4.10 with big endian
201+
bpf_probe_read(&protocol, 1, (void *)((u64)&sk->sk_wmem_queued) - 1);
202+
#else
203+
# error "Fix your compiler's __BYTE_ORDER__?!"
204+
#endif
205+
206+
if (protocol != IPPROTO_TCP)
207+
return 0;
208+
209+
u32 pid = bpf_get_current_pid_tgid() >> 32;
210+
// sk is used as a UUID
211+
212+
// lport is either used in a filter here, or later
213+
u16 lport = sk->__sk_common.skc_num;
214+
FILTER_LPORT
215+
216+
// dport is either used in a filter here, or later
217+
u16 dport = sk->__sk_common.skc_dport;
218+
FILTER_DPORT
219+
220+
// calculate delta
221+
u64 *tsp, delta_us;
222+
tsp = last.lookup(&sk);
223+
if (tsp == 0)
224+
delta_us = 0;
225+
else
226+
delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
227+
228+
u16 family = sk->__sk_common.skc_family;
229+
230+
if (family == AF_INET) {
231+
struct ipv4_data_t data4 = {
232+
.span_us = delta_us,
233+
.oldstate = sk->__sk_common.skc_state,
234+
.newstate = state };
235+
data4.skaddr = (u64)sk;
236+
data4.ts_us = bpf_ktime_get_ns() / 1000;
237+
data4.saddr = sk->__sk_common.skc_rcv_saddr;
238+
data4.daddr = sk->__sk_common.skc_daddr;
239+
// a workaround until data4 compiles with separate lport/dport
240+
data4.ports = dport + ((0ULL + lport) << 16);
241+
data4.pid = pid;
242+
243+
bpf_get_current_comm(&data4.task, sizeof(data4.task));
244+
ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
245+
246+
} else /* 6 */ {
247+
struct ipv6_data_t data6 = {
248+
.span_us = delta_us,
249+
.oldstate = sk->__sk_common.skc_state,
250+
.newstate = state };
251+
data6.skaddr = (u64)sk;
252+
data6.ts_us = bpf_ktime_get_ns() / 1000;
253+
bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
254+
sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
255+
bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
256+
sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
257+
// a workaround until data6 compiles with separate lport/dport
258+
data6.ports = dport + ((0ULL + lport) << 16);
259+
data6.pid = pid;
260+
bpf_get_current_comm(&data6.task, sizeof(data6.task));
261+
ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
262+
}
263+
264+
u64 ts = bpf_ktime_get_ns();
265+
last.update(&sk, &ts);
266+
267+
return 0;
268+
269+
};
270+
"""
271+
272+
bpf_text = bpf_header
273+
if (BPF.tracepoint_exists("sock", "inet_sock_set_state")):
274+
bpf_text += bpf_text_tracepoint
275+
else:
276+
bpf_text += bpf_text_kprobe
173277

174278
# code substitutions
175279
if args.remoteport:

0 commit comments

Comments
 (0)