|
59 | 59 | debug = 0 |
60 | 60 |
|
61 | 61 | # define BPF program |
62 | | -bpf_text = """ |
| 62 | +bpf_header = """ |
63 | 63 | #include <uapi/linux/ptrace.h> |
64 | 64 | #define KBUILD_MODNAME "foo" |
65 | 65 | #include <linux/tcp.h> |
|
101 | 101 | u32 pid; |
102 | 102 | char task[TASK_COMM_LEN]; |
103 | 103 | }; |
104 | | -
|
| 104 | +""" |
| 105 | +bpf_text_tracepoint = """ |
105 | 106 | TRACEPOINT_PROBE(sock, inet_sock_set_state) |
106 | 107 | { |
107 | 108 | if (args->protocol != IPPROTO_TCP) |
|
166 | 167 | } |
167 | 168 | """ |
168 | 169 |
|
169 | | -if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")): |
170 | | - print("ERROR: tracepoint sock:inet_sock_set_state missing " |
171 | | - "(added in Linux 4.16). Exiting") |
172 | | - exit() |
| 170 | +bpf_text_kprobe = """ |
| 171 | +int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) |
| 172 | +{ |
| 173 | + // check this is TCP |
| 174 | + u8 protocol = 0; |
| 175 | +
|
| 176 | + // Following comments add by Joe Yin: |
| 177 | + // Unfortunately,it can not work since Linux 4.10, |
| 178 | + // because the sk_wmem_queued is not following the bitfield of sk_protocol. |
| 179 | + // And the following member is sk_gso_max_segs. |
| 180 | + // So, we can use this: |
| 181 | + // bpf_probe_read(&protocol, 1, (void *)((u64)&newsk->sk_gso_max_segs) - 3); |
| 182 | + // In order to diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime, |
| 183 | + // sk_lingertime is closed to the gso_max_segs_offset,and |
| 184 | + // the offset between the two members is 4 |
| 185 | +
|
| 186 | + int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs); |
| 187 | + int sk_lingertime_offset = offsetof(struct sock, sk_lingertime); |
| 188 | +
|
| 189 | + if (sk_lingertime_offset - gso_max_segs_offset == 4) |
| 190 | + // 4.10+ with little endian |
| 191 | +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
| 192 | + bpf_probe_read(&protocol, 1, (void *)((u64)&sk->sk_gso_max_segs) - 3); |
| 193 | +else |
| 194 | + // pre-4.10 with little endian |
| 195 | + bpf_probe_read(&protocol, 1, (void *)((u64)&sk->sk_wmem_queued) - 3); |
| 196 | +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
| 197 | + // 4.10+ with big endian |
| 198 | + bpf_probe_read(&protocol, 1, (void *)((u64)&sk->sk_gso_max_segs) - 1); |
| 199 | +else |
| 200 | + // pre-4.10 with big endian |
| 201 | + bpf_probe_read(&protocol, 1, (void *)((u64)&sk->sk_wmem_queued) - 1); |
| 202 | +#else |
| 203 | +# error "Fix your compiler's __BYTE_ORDER__?!" |
| 204 | +#endif |
| 205 | +
|
| 206 | + if (protocol != IPPROTO_TCP) |
| 207 | + return 0; |
| 208 | +
|
| 209 | + u32 pid = bpf_get_current_pid_tgid() >> 32; |
| 210 | + // sk is used as a UUID |
| 211 | +
|
| 212 | + // lport is either used in a filter here, or later |
| 213 | + u16 lport = sk->__sk_common.skc_num; |
| 214 | + FILTER_LPORT |
| 215 | +
|
| 216 | + // dport is either used in a filter here, or later |
| 217 | + u16 dport = sk->__sk_common.skc_dport; |
| 218 | + FILTER_DPORT |
| 219 | +
|
| 220 | + // calculate delta |
| 221 | + u64 *tsp, delta_us; |
| 222 | + tsp = last.lookup(&sk); |
| 223 | + if (tsp == 0) |
| 224 | + delta_us = 0; |
| 225 | + else |
| 226 | + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; |
| 227 | +
|
| 228 | + u16 family = sk->__sk_common.skc_family; |
| 229 | +
|
| 230 | + if (family == AF_INET) { |
| 231 | + struct ipv4_data_t data4 = { |
| 232 | + .span_us = delta_us, |
| 233 | + .oldstate = sk->__sk_common.skc_state, |
| 234 | + .newstate = state }; |
| 235 | + data4.skaddr = (u64)sk; |
| 236 | + data4.ts_us = bpf_ktime_get_ns() / 1000; |
| 237 | + data4.saddr = sk->__sk_common.skc_rcv_saddr; |
| 238 | + data4.daddr = sk->__sk_common.skc_daddr; |
| 239 | + // a workaround until data4 compiles with separate lport/dport |
| 240 | + data4.ports = dport + ((0ULL + lport) << 16); |
| 241 | + data4.pid = pid; |
| 242 | +
|
| 243 | + bpf_get_current_comm(&data4.task, sizeof(data4.task)); |
| 244 | + ipv4_events.perf_submit(ctx, &data4, sizeof(data4)); |
| 245 | +
|
| 246 | + } else /* 6 */ { |
| 247 | + struct ipv6_data_t data6 = { |
| 248 | + .span_us = delta_us, |
| 249 | + .oldstate = sk->__sk_common.skc_state, |
| 250 | + .newstate = state }; |
| 251 | + data6.skaddr = (u64)sk; |
| 252 | + data6.ts_us = bpf_ktime_get_ns() / 1000; |
| 253 | + bpf_probe_read(&data6.saddr, sizeof(data6.saddr), |
| 254 | + sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); |
| 255 | + bpf_probe_read(&data6.daddr, sizeof(data6.daddr), |
| 256 | + sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); |
| 257 | + // a workaround until data6 compiles with separate lport/dport |
| 258 | + data6.ports = dport + ((0ULL + lport) << 16); |
| 259 | + data6.pid = pid; |
| 260 | + bpf_get_current_comm(&data6.task, sizeof(data6.task)); |
| 261 | + ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); |
| 262 | + } |
| 263 | +
|
| 264 | + u64 ts = bpf_ktime_get_ns(); |
| 265 | + last.update(&sk, &ts); |
| 266 | +
|
| 267 | + return 0; |
| 268 | +
|
| 269 | +}; |
| 270 | +""" |
| 271 | + |
| 272 | +bpf_text = bpf_header |
| 273 | +if (BPF.tracepoint_exists("sock", "inet_sock_set_state")): |
| 274 | + bpf_text += bpf_text_tracepoint |
| 275 | +else: |
| 276 | + bpf_text += bpf_text_kprobe |
173 | 277 |
|
174 | 278 | # code substitutions |
175 | 279 | if args.remoteport: |
|
0 commit comments