Skip to content

Commit

Permalink
usermetric: add initial user-facing metrics
Browse files Browse the repository at this point in the history
This commit adds a new usermetric package and wires
up metrics across the tailscale client.

Updates tailscale/corp#22075

Co-authored-by: Anton Tolchanov <[email protected]>
Signed-off-by: Kristoffer Dalby <[email protected]>
  • Loading branch information
kradalby and knyar committed Aug 27, 2024
1 parent 06c31f4 commit a2c42d3
Show file tree
Hide file tree
Showing 17 changed files with 368 additions and 22 deletions.
6 changes: 6 additions & 0 deletions client/tailscale/localclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,12 @@ func (lc *LocalClient) DaemonMetrics(ctx context.Context) ([]byte, error) {
return lc.get200(ctx, "/localapi/v0/metrics")
}

// UserMetrics returns the user metrics in
// the Prometheus text exposition format.
func (lc *LocalClient) UserMetrics(ctx context.Context) ([]byte, error) {
return lc.get200(ctx, "/localapi/v0/usermetrics")
}

// IncrementCounter increments the value of a Tailscale daemon's counter
// metric by the given delta. If the metric has yet to exist, a new counter
// metric is created and initialized to delta.
Expand Down
6 changes: 6 additions & 0 deletions client/web/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,12 @@ func (s *Server) serve(w http.ResponseWriter, r *http.Request) {
}
}

if r.URL.Path == "/metrics" {
r.URL.Path = "/api/local/v0/usermetrics"
s.proxyRequestToLocalAPI(w, r)
return
}

if strings.HasPrefix(r.URL.Path, "/api/") {
switch {
case r.URL.Path == "/api/auth" && r.Method == httpm.GET:
Expand Down
1 change: 1 addition & 0 deletions cmd/derper/depaware.txt
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
tailscale.com/util/syspolicy from tailscale.com/ipn
tailscale.com/util/syspolicy/internal from tailscale.com/util/syspolicy/setting
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy
tailscale.com/util/usermetric from tailscale.com/health
tailscale.com/util/vizerror from tailscale.com/tailcfg+
W 💣 tailscale.com/util/winutil from tailscale.com/hostinfo+
W 💣 tailscale.com/util/winutil/winenv from tailscale.com/hostinfo+
Expand Down
2 changes: 2 additions & 0 deletions cmd/k8s-operator/depaware.txt
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/tstime from tailscale.com/cmd/k8s-operator+
tailscale.com/tstime/mono from tailscale.com/net/tstun+
tailscale.com/tstime/rate from tailscale.com/derp+
tailscale.com/tsweb/varz from tailscale.com/util/usermetric
tailscale.com/types/appctype from tailscale.com/ipn/ipnlocal
tailscale.com/types/dnstype from tailscale.com/ipn/ipnlocal+
tailscale.com/types/empty from tailscale.com/ipn+
Expand Down Expand Up @@ -812,6 +813,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/util/testenv from tailscale.com/control/controlclient+
tailscale.com/util/truncate from tailscale.com/logtail
tailscale.com/util/uniq from tailscale.com/ipn/ipnlocal+
tailscale.com/util/usermetric from tailscale.com/health+
tailscale.com/util/vizerror from tailscale.com/tailcfg+
💣 tailscale.com/util/winutil from tailscale.com/clientupdate+
W 💣 tailscale.com/util/winutil/authenticode from tailscale.com/clientupdate+
Expand Down
4 changes: 3 additions & 1 deletion cmd/tailscale/depaware.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/ipn from tailscale.com/client/tailscale+
tailscale.com/ipn/ipnstate from tailscale.com/client/tailscale+
tailscale.com/licenses from tailscale.com/client/web+
tailscale.com/metrics from tailscale.com/derp
tailscale.com/metrics from tailscale.com/derp+
tailscale.com/net/captivedetection from tailscale.com/net/netcheck
tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback
tailscale.com/net/dnscache from tailscale.com/control/controlhttp+
Expand Down Expand Up @@ -132,6 +132,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/tstime from tailscale.com/control/controlhttp+
tailscale.com/tstime/mono from tailscale.com/tstime/rate
tailscale.com/tstime/rate from tailscale.com/cmd/tailscale/cli+
tailscale.com/tsweb/varz from tailscale.com/util/usermetric
tailscale.com/types/dnstype from tailscale.com/tailcfg
tailscale.com/types/empty from tailscale.com/ipn
tailscale.com/types/ipproto from tailscale.com/net/flowtrack+
Expand Down Expand Up @@ -173,6 +174,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy
tailscale.com/util/testenv from tailscale.com/cmd/tailscale/cli
tailscale.com/util/truncate from tailscale.com/cmd/tailscale/cli
tailscale.com/util/usermetric from tailscale.com/health
tailscale.com/util/vizerror from tailscale.com/tailcfg+
💣 tailscale.com/util/winutil from tailscale.com/clientupdate+
W 💣 tailscale.com/util/winutil/authenticode from tailscale.com/clientupdate
Expand Down
3 changes: 2 additions & 1 deletion cmd/tailscaled/depaware.txt
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/tstime from tailscale.com/control/controlclient+
tailscale.com/tstime/mono from tailscale.com/net/tstun+
tailscale.com/tstime/rate from tailscale.com/derp+
tailscale.com/tsweb/varz from tailscale.com/cmd/tailscaled
tailscale.com/tsweb/varz from tailscale.com/cmd/tailscaled+
tailscale.com/types/appctype from tailscale.com/ipn/ipnlocal
tailscale.com/types/dnstype from tailscale.com/ipn/ipnlocal+
tailscale.com/types/empty from tailscale.com/ipn+
Expand Down Expand Up @@ -403,6 +403,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/util/testenv from tailscale.com/ipn/ipnlocal+
tailscale.com/util/truncate from tailscale.com/logtail
tailscale.com/util/uniq from tailscale.com/ipn/ipnlocal+
tailscale.com/util/usermetric from tailscale.com/health+
tailscale.com/util/vizerror from tailscale.com/tailcfg+
💣 tailscale.com/util/winutil from tailscale.com/clientupdate+
W 💣 tailscale.com/util/winutil/authenticode from tailscale.com/clientupdate+
Expand Down
25 changes: 25 additions & 0 deletions health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package health
import (
"context"
"errors"
"expvar"
"fmt"
"maps"
"net/http"
Expand All @@ -25,6 +26,7 @@ import (
"tailscale.com/util/mak"
"tailscale.com/util/multierr"
"tailscale.com/util/set"
"tailscale.com/util/usermetric"
"tailscale.com/version"
)

Expand Down Expand Up @@ -1202,6 +1204,18 @@ func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats {
}

func (t *Tracker) doOnceInit() {
metricHealthMessage.Set(metricHealthMessageLabel{
Type: "warning",
}, expvar.Func(func() any {
if t.nil() {
return 0
}
t.mu.Lock()
defer t.mu.Unlock()
t.updateBuiltinWarnablesLocked()
return int64(len(t.stringsLocked()))
}))

for i := range t.MagicSockReceiveFuncs {
f := &t.MagicSockReceiveFuncs[i]
f.name = (ReceiveFunc(i)).String()
Expand Down Expand Up @@ -1232,3 +1246,14 @@ func (t *Tracker) checkReceiveFuncsLocked() {
f.missing = true
}
}

type metricHealthMessageLabel struct {
// TODO: break down by warnable.severity as well?
Type string
}

var metricHealthMessage = usermetric.NewMultiLabelMap[metricHealthMessageLabel](
"tailscaled_health_messages",
"gauge",
"Number of health messages broken down by type.",
)
13 changes: 13 additions & 0 deletions ipn/ipnlocal/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ import (
"tailscale.com/util/systemd"
"tailscale.com/util/testenv"
"tailscale.com/util/uniq"
"tailscale.com/util/usermetric"
"tailscale.com/version"
"tailscale.com/version/distro"
"tailscale.com/wgengine"
Expand All @@ -117,6 +118,9 @@ import (
"tailscale.com/wgengine/wgcfg/nmcfg"
)

var metricAdvertisedRoutes = usermetric.NewGauge(
"tailscaled_advertised_routes", "Number of advertised network routes (e.g. by a subnet router)")

var controlDebugFlags = getControlDebugFlags()

func getControlDebugFlags() []string {
Expand Down Expand Up @@ -4646,6 +4650,15 @@ func (b *LocalBackend) applyPrefsToHostinfoLocked(hi *tailcfg.Hostinfo, prefs ip
hi.ShieldsUp = prefs.ShieldsUp()
hi.AllowsUpdate = envknob.AllowsRemoteUpdate() || prefs.AutoUpdate().Apply.EqualBool(true)

// count routes without exit node routes
var routes int64
for _, route := range hi.RoutableIPs {
if route.Bits() != 0 {
routes++
}
}
metricAdvertisedRoutes.Set(float64(routes))

var sshHostKeys []string
if prefs.RunSSH() && envknob.CanSSHD() {
// TODO(bradfitz): this is called with b.mu held. Not ideal.
Expand Down
15 changes: 15 additions & 0 deletions ipn/localapi/localapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ import (
"tailscale.com/util/osuser"
"tailscale.com/util/progresstracking"
"tailscale.com/util/rands"
"tailscale.com/util/testenv"
"tailscale.com/util/usermetric"
"tailscale.com/version"
"tailscale.com/wgengine/magicsock"
)
Expand Down Expand Up @@ -141,6 +143,7 @@ var handler = map[string]localAPIHandler{
"update/install": (*Handler).serveUpdateInstall,
"update/progress": (*Handler).serveUpdateProgress,
"upload-client-metrics": (*Handler).serveUploadClientMetrics,
"usermetrics": (*Handler).serveUserMetrics,
"watch-ipn-bus": (*Handler).serveWatchIPNBus,
"whois": (*Handler).serveWhoIs,
}
Expand Down Expand Up @@ -571,6 +574,18 @@ func (h *Handler) serveMetrics(w http.ResponseWriter, r *http.Request) {
clientmetric.WritePrometheusExpositionFormat(w)
}

// TODO(kradalby): Remove this once we have landed on a final set of
// metrics to export to clients and consider the metrics stable.
var debugUsermetricsEndpoint = envknob.RegisterBool("TS_DEBUG_USER_METRICS")

func (h *Handler) serveUserMetrics(w http.ResponseWriter, r *http.Request) {
if !testenv.InTest() && !debugUsermetricsEndpoint() {
http.Error(w, "usermetrics debug flag not enabled", http.StatusForbidden)
return
}
usermetric.Handler(w, r)
}

func (h *Handler) serveDebug(w http.ResponseWriter, r *http.Request) {
if !h.PermitWrite {
http.Error(w, "debug access denied", http.StatusForbidden)
Expand Down
32 changes: 27 additions & 5 deletions metrics/multilabelmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func NewMultiLabelMap[T comparable](name string, promType, helpText string) *Mul
Help: helpText,
}
var zero T
_ = labelString(zero) // panic early if T is invalid
_ = LabelString(zero) // panic early if T is invalid
expvar.Publish(name, m)
return m
}
Expand All @@ -50,8 +50,10 @@ type labelsAndValue[T comparable] struct {
val expvar.Var
}

// labelString returns a Prometheus-formatted label string for the given key.
func labelString(k any) string {
// LabelString returns a Prometheus-formatted label string for the given key.
// k must be a struct type with scalar fields, as required by MultiLabelMap,
// if k is not a struct, it will panic.
func LabelString(k any) string {
rv := reflect.ValueOf(k)
t := rv.Type()
if t.Kind() != reflect.Struct {
Expand Down Expand Up @@ -150,7 +152,7 @@ func (v *MultiLabelMap[T]) Init() *MultiLabelMap[T] {
//
// v.mu must be held.
func (v *MultiLabelMap[T]) addKeyLocked(key T, val expvar.Var) {
ls := labelString(key)
ls := LabelString(key)

ent := labelsAndValue[T]{key, ls, val}
// Using insertion sort to place key into the already-sorted v.keys.
Expand Down Expand Up @@ -209,6 +211,26 @@ func (v *MultiLabelMap[T]) Set(key T, val expvar.Var) {
v.m.Store(key, val)
}

// SetInt sets val to the *[expvar.Int] value stored under the given map key,
// creating it if it doesn't exist yet.
// It does nothing if key exists but is of the wrong type.
func (v *MultiLabelMap[T]) SetInt(key T, val int64) {
// Set to Int; ignore otherwise.
if iv, ok := v.getOrFill(key, newInt).(*expvar.Int); ok {
iv.Set(val)
}
}

// SetFloat sets val to the *[expvar.Float] value stored under the given map key,
// creating it if it doesn't exist yet.
// It does nothing if key exists but is of the wrong type.
func (v *MultiLabelMap[T]) SetFloat(key T, val float64) {
// Set to Float; ignore otherwise.
if iv, ok := v.getOrFill(key, newFloat).(*expvar.Float); ok {
iv.Set(val)
}
}

// Add adds delta to the *[expvar.Int] value stored under the given map key,
// creating it if it doesn't exist yet.
// It does nothing if key exists but is of the wrong type.
Expand All @@ -234,7 +256,7 @@ func (v *MultiLabelMap[T]) AddFloat(key T, delta float64) {
// This is not optimized for highly concurrent usage; it's presumed to only be
// used rarely, at startup.
func (v *MultiLabelMap[T]) Delete(key T) {
ls := labelString(key)
ls := LabelString(key)

v.mu.Lock()
defer v.mu.Unlock()
Expand Down
14 changes: 12 additions & 2 deletions metrics/multilabelmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package metrics

import (
"bytes"
"expvar"
"fmt"
"io"
"testing"
Expand All @@ -22,6 +23,12 @@ func TestMultilabelMap(t *testing.T) {
m.Add(L2{"b", "b"}, 3)
m.Add(L2{"a", "a"}, 1)

m.SetFloat(L2{"sf", "sf"}, 3.5)
m.SetFloat(L2{"sf", "sf"}, 5.5)
m.Set(L2{"sfunc", "sfunc"}, expvar.Func(func() any { return 3 }))
m.SetInt(L2{"si", "si"}, 3)
m.SetInt(L2{"si", "si"}, 5)

cur := func() string {
var buf bytes.Buffer
m.Do(func(kv KeyValue[L2]) {
Expand All @@ -33,7 +40,7 @@ func TestMultilabelMap(t *testing.T) {
return buf.String()
}

if g, w := cur(), "a/a=1,a/b=2,b/b=3,b/c=4"; g != w {
if g, w := cur(), "a/a=1,a/b=2,b/b=3,b/c=4,sf/sf=5.5,sfunc/sfunc=3,si/si=5"; g != w {
t.Errorf("got %q; want %q", g, w)
}

Expand All @@ -43,14 +50,17 @@ func TestMultilabelMap(t *testing.T) {
metricname{foo="a",bar="b"} 2
metricname{foo="b",bar="b"} 3
metricname{foo="b",bar="c"} 4
metricname{foo="sf",bar="sf"} 5.5
metricname{foo="sfunc",bar="sfunc"} 3
metricname{foo="si",bar="si"} 5
`
if got := buf.String(); got != want {
t.Errorf("promtheus output = %q; want %q", got, want)
}

m.Delete(L2{"b", "b"})

if g, w := cur(), "a/a=1,a/b=2,b/c=4"; g != w {
if g, w := cur(), "a/a=1,a/b=2,b/c=4,sf/sf=5.5,sfunc/sfunc=3,si/si=5"; g != w {
t.Errorf("got %q; want %q", g, w)
}

Expand Down
Loading

0 comments on commit a2c42d3

Please sign in to comment.