Skip to content

Commit 89a2c97

Browse files
committed
Report ddinjector telemetry via RAR.
1 parent 681899c commit 89a2c97

File tree

14 files changed

+1015
-2
lines changed

14 files changed

+1015
-2
lines changed

.gitlab/test/e2e_install_packages/windows.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,5 @@ new-e2e-installer-windows:
267267
- EXTRA_PARAMS: --run "TestAgentMSIInstallsAPMInject/TestEnableDisable$"
268268
- EXTRA_PARAMS: --run "TestAgentMSIInstallsAPMInject/TestInstallFromMSIWithIIS$"
269269
- EXTRA_PARAMS: --run "TestAgentMSIInstallsAPMInject/TestInstallFromMSIWithJava$"
270+
- EXTRA_PARAMS: --run "TestInjectorStats/TestQueryStatsViaSystemProbe$"
271+
- EXTRA_PARAMS: --run "TestInjectorStats/TestQueryStatsAfterInjection$"
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
// Unless explicitly stated otherwise all files in this repository are licensed
2+
// under the Apache License Version 2.0.
3+
// This product includes software developed at Datadog (https://www.datadoghq.com/).
4+
// Copyright 2026-present Datadog, Inc.
5+
6+
//go:build windows
7+
8+
package modules
9+
10+
import (
11+
"time"
12+
13+
"go.uber.org/atomic"
14+
15+
"github.com/DataDog/datadog-agent/comp/core/sysprobeconfig"
16+
"github.com/DataDog/datadog-agent/comp/core/telemetry"
17+
"github.com/DataDog/datadog-agent/pkg/system-probe/api/module"
18+
"github.com/DataDog/datadog-agent/pkg/system-probe/config"
19+
sysconfigtypes "github.com/DataDog/datadog-agent/pkg/system-probe/config/types"
20+
"github.com/DataDog/datadog-agent/pkg/util/log"
21+
"github.com/DataDog/datadog-agent/pkg/windowsdriver/ddinjector"
22+
"github.com/prometheus/client_golang/prometheus"
23+
)
24+
25+
func init() { registerModule(Injector) }
26+
27+
var _ module.Module = &injectorModule{}
28+
29+
// Injector Factory
30+
var Injector = &module.Factory{
31+
Name: config.InjectorModule,
32+
ConfigNamespaces: []string{},
33+
Fn: func(_ *sysconfigtypes.Config, deps module.FactoryDependencies) (module.Module, error) {
34+
log.Infof("Creating Windows Injector module")
35+
36+
m := &injectorModule{
37+
telemetry: deps.Telemetry,
38+
sysProbeConfig: deps.SysprobeConfig,
39+
}
40+
41+
m.initializeMetrics()
42+
43+
return m, nil
44+
},
45+
}
46+
47+
type injectorModule struct {
48+
lastCheck atomic.Int64
49+
telemetry telemetry.Component
50+
counters ddinjector.InjectorCounters
51+
sysProbeConfig sysprobeconfig.Component
52+
}
53+
54+
// Register registers the endpoint for this module
55+
func (m *injectorModule) Register(httpMux *module.Router) error {
56+
if m.sysProbeConfig.GetBool("injector.enable_telemetry") {
57+
m.telemetry.RegisterCollector(m)
58+
}
59+
60+
return nil
61+
}
62+
63+
// Close cleans up module resources
64+
func (m *injectorModule) Close() {
65+
if m.sysProbeConfig.GetBool("injector.enable_telemetry") {
66+
m.telemetry.UnregisterCollector(m)
67+
}
68+
}
69+
70+
// GetStats prints the injector stats as part of /debug/stats.
71+
func (m *injectorModule) GetStats() map[string]interface{} {
72+
// Sanity check in case the metrics have not been initialized.
73+
if m.counters.ProcessesAddedToInjectionTracker == nil {
74+
return map[string]interface{}{
75+
"last_check_timestamp": m.lastCheck.Load(),
76+
}
77+
}
78+
79+
return map[string]interface{}{
80+
"last_check_timestamp": m.lastCheck.Load(),
81+
"processes_added_to_injection_tracker": m.counters.ProcessesAddedToInjectionTracker.Get(),
82+
"processes_removed_from_injection_tracker": m.counters.ProcessesRemovedFromInjectionTracker.Get(),
83+
"processes_skipped_subsystem": m.counters.ProcessesSkippedSubsystem.Get(),
84+
"processes_skipped_container": m.counters.ProcessesSkippedContainer.Get(),
85+
"processes_skipped_protected": m.counters.ProcessesSkippedProtected.Get(),
86+
"processes_skipped_system": m.counters.ProcessesSkippedSystem.Get(),
87+
"processes_skipped_excluded": m.counters.ProcessesSkippedExcluded.Get(),
88+
"injection_attempts": m.counters.InjectionAttempts.Get(),
89+
"injection_attempt_failures": m.counters.InjectionAttemptFailures.Get(),
90+
"injection_max_time_us": m.counters.InjectionMaxTimeUs.Get(),
91+
"injection_successes": m.counters.InjectionSuccesses.Get(),
92+
"injection_failures": m.counters.InjectionFailures.Get(),
93+
"pe_caching_failures": m.counters.PeCachingFailures.Get(),
94+
"import_directory_restoration_failures": m.counters.ImportDirectoryRestorationFailures.Get(),
95+
"pe_memory_allocation_failures": m.counters.PeMemoryAllocationFailures.Get(),
96+
"pe_injection_context_allocated": m.counters.PeInjectionContextAllocated.Get(),
97+
"pe_injection_context_cleanedup": m.counters.PeInjectionContextCleanedup.Get(),
98+
}
99+
}
100+
101+
////////////////////////////////////////////////////
102+
// RAR/prometheus/telemetry related implementations
103+
104+
func (m *injectorModule) initializeMetrics() {
105+
const subsystem = "injector"
106+
107+
m.counters.ProcessesAddedToInjectionTracker = m.telemetry.NewSimpleGauge(
108+
subsystem, "processes_added_to_injection_tracker",
109+
"Number of processes added to injection tracker")
110+
111+
m.counters.ProcessesRemovedFromInjectionTracker = m.telemetry.NewSimpleGauge(
112+
subsystem, "processes_removed_from_injection_tracker",
113+
"Number of processes removed from injection tracker")
114+
115+
m.counters.ProcessesSkippedSubsystem = m.telemetry.NewSimpleGauge(
116+
subsystem, "processes_skipped_subsystem",
117+
"Number of skipped subsystem processes")
118+
119+
m.counters.ProcessesSkippedContainer = m.telemetry.NewSimpleGauge(
120+
subsystem, "processes_skipped_container",
121+
"Number of skipped container processes")
122+
123+
m.counters.ProcessesSkippedProtected = m.telemetry.NewSimpleGauge(
124+
subsystem, "processes_skipped_protected",
125+
"Number of skipped protected processes")
126+
127+
m.counters.ProcessesSkippedSystem = m.telemetry.NewSimpleGauge(
128+
subsystem, "processes_skipped_system",
129+
"Number of skipped system processes")
130+
131+
m.counters.ProcessesSkippedExcluded = m.telemetry.NewSimpleGauge(
132+
subsystem, "processes_skipped_excluded",
133+
"Number of skipped processes due to exclusion")
134+
135+
m.counters.InjectionAttempts = m.telemetry.NewSimpleGauge(
136+
subsystem, "injection_attempts",
137+
"Number of injection attempts")
138+
139+
m.counters.InjectionAttemptFailures = m.telemetry.NewSimpleGauge(
140+
subsystem, "injection_attempt_failures",
141+
"Number of injection attempt failures")
142+
143+
m.counters.InjectionMaxTimeUs = m.telemetry.NewSimpleGauge(
144+
subsystem, "injection_max_time_us",
145+
"Maximum injection time in microseconds")
146+
147+
m.counters.InjectionSuccesses = m.telemetry.NewSimpleGauge(
148+
subsystem, "injection_successes",
149+
"Number of successful injections")
150+
151+
m.counters.InjectionFailures = m.telemetry.NewSimpleGauge(
152+
subsystem, "injection_failures",
153+
"Number of failed injections")
154+
155+
m.counters.PeCachingFailures = m.telemetry.NewSimpleGauge(
156+
subsystem, "pe_caching_failures",
157+
"Number of PE caching failures")
158+
159+
m.counters.ImportDirectoryRestorationFailures = m.telemetry.NewSimpleGauge(
160+
subsystem, "import_directory_restoration_failures",
161+
"Number of import directory restoration failures")
162+
163+
m.counters.PeMemoryAllocationFailures = m.telemetry.NewSimpleGauge(
164+
subsystem, "pe_memory_allocation_failures",
165+
"Number of PE memory allocation failures")
166+
167+
m.counters.PeInjectionContextAllocated = m.telemetry.NewSimpleGauge(
168+
subsystem, "pe_injection_context_allocated",
169+
"Number of PE injection contexts allocated")
170+
171+
m.counters.PeInjectionContextCleanedup = m.telemetry.NewSimpleGauge(
172+
subsystem, "pe_injection_context_cleanedup",
173+
"Number of PE injection contexts cleaned up")
174+
}
175+
176+
// Describe implements prometheus.Collector - no-op for dynamic metrics
177+
func (m *injectorModule) Describe(ch chan<- *prometheus.Desc) {
178+
}
179+
180+
// Collect implements prometheus.Collector. Fetches stats from the injector.
181+
func (m *injectorModule) Collect(ch chan<- prometheus.Metric) {
182+
if m.telemetry == nil {
183+
return
184+
}
185+
186+
// Query the driver for current counters
187+
injector, err := ddinjector.NewInjector()
188+
if err != nil {
189+
log.Debugf("unable to open Windows Injector: %v", err)
190+
return
191+
}
192+
defer injector.Close()
193+
194+
m.lastCheck.Store(time.Now().Unix())
195+
196+
err = injector.GetCounters(&m.counters)
197+
if err != nil {
198+
log.Debugf("error getting Injector counters: %v", err)
199+
return
200+
}
201+
}

comp/core/remoteagent/impl-systemprobe/remoteagent.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,27 @@ type remoteagentImpl struct {
7979

8080
func (r *remoteagentImpl) GetTelemetry(_ context.Context, _ *pbcore.GetTelemetryRequest) (*pbcore.GetTelemetryResponse, error) {
8181
prometheusText, err := r.telemetry.GatherText(false, telemetry.StaticMetricFilter(
82-
// Add here the metric names that should be included in the telemetry response.
83-
// This is useful to avoid sending too many metrics to the Core Agent.
82+
// Add here the metric names that should be included in the telemetry response.
83+
// This is useful to avoid sending too many metrics to the Core Agent.
84+
85+
// Windows Injector metrics
86+
"injector_processes_added_to_injection_tracker",
87+
"injector_processes_removed_from_injection_tracker",
88+
"injector_processes_skipped_subsystem",
89+
"injector_processes_skipped_container",
90+
"injector_processes_skipped_protected",
91+
"injector_processes_skipped_system",
92+
"injector_processes_skipped_excluded",
93+
"injector_injection_attempts",
94+
"injector_injection_attempt_failures",
95+
"injector_injection_max_time_us",
96+
"injector_injection_successes",
97+
"injector_injection_failures",
98+
"injector_pe_caching_failures",
99+
"injector_import_directory_restoration_failures",
100+
"injector_pe_memory_allocation_failures",
101+
"injector_pe_injection_context_allocated",
102+
"injector_pe_injection_context_cleanedup",
84103
))
85104
if err != nil {
86105
return nil, err

out.txt

104 KB
Binary file not shown.

pkg/config/setup/system_probe.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Setup) {
373373
cfg.BindEnvAndSetDefault("gpu_monitoring.cgroup_reapply_interval", 30*time.Second)
374374
cfg.BindEnvAndSetDefault("gpu_monitoring.cgroup_reapply_infinitely", false)
375375

376+
// Windows Injector telemetry
377+
cfg.BindEnvAndSetDefault("injector.enable_telemetry", false)
378+
376379
// gpu - stream config
377380
cfg.BindEnvAndSetDefault("gpu_monitoring.streams.max_kernel_launches", 1000)
378381
cfg.BindEnvAndSetDefault("gpu_monitoring.streams.max_mem_alloc_events", 1000)

pkg/system-probe/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ const (
4242
GPUMonitoringModule types.ModuleName = "gpu"
4343
SoftwareInventoryModule types.ModuleName = "software_inventory"
4444
PrivilegedLogsModule types.ModuleName = "privileged_logs"
45+
InjectorModule types.ModuleName = "injector"
4546
)
4647

4748
// New creates a config object for system-probe. It assumes no configuration has been loaded as this point.
@@ -197,6 +198,10 @@ func load() (*types.Config, error) {
197198
if swEnabled {
198199
c.EnabledModules[SoftwareInventoryModule] = struct{}{}
199200
}
201+
202+
if cfg.GetBool("injector.enable_telemetry") {
203+
c.EnabledModules[InjectorModule] = struct{}{}
204+
}
200205
}
201206

202207
// Enable discovery by default if system-probe has any modules enabled,

0 commit comments

Comments
 (0)