From ff41eb20046985c7ff9ae2304393a4e054e72e70 Mon Sep 17 00:00:00 2001 From: Matthew Green Date: Tue, 13 Aug 2024 16:53:09 +1000 Subject: [PATCH] add similarity function (#51) * add similarity function and test --- .../server/testcases/functions.in.yaml | 8 ++ .../server/testcases/functions.out.yaml | 7 ++ vql/functions/similarity.go | 97 +++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 vql/functions/similarity.go diff --git a/artifacts/testdata/server/testcases/functions.in.yaml b/artifacts/testdata/server/testcases/functions.in.yaml index c5275ff115e..0a21a56b79f 100644 --- a/artifacts/testdata/server/testcases/functions.in.yaml +++ b/artifacts/testdata/server/testcases/functions.in.yaml @@ -127,3 +127,11 @@ Queries: filter(list=List, regex="."), filter(list=List, regex=".+") FROM scope() + + # Test similarity function + - LET teststoredquery = SELECT 1 as field1, 2 as field2 FROM scope() + - SELECT similarity(set1=dict(field1=1,field2=4,field2=3),set2=dict(field2=4,field1=1,field2=3)), + similarity(set1=dict(field1=1,field2=4,field2=3),set2=dict(field1=1)), + similarity(set1=teststoredquery[0],set2=teststoredquery[0]), + similarity(set1="yolo!",set2=dict(field1=1)) + FROM scope() \ No newline at end of file diff --git a/artifacts/testdata/server/testcases/functions.out.yaml b/artifacts/testdata/server/testcases/functions.out.yaml index 14844bfc6b8..be9ea901e76 100644 --- a/artifacts/testdata/server/testcases/functions.out.yaml +++ b/artifacts/testdata/server/testcases/functions.out.yaml @@ -146,4 +146,11 @@ LET rows <= SELECT * FROM scope()[]SELECT len(list=["a", "b"]), len(list="hello" 0 ] } +]LET teststoredquery = SELECT 1 as field1, 2 as field2 FROM scope()[]SELECT similarity(set1=dict(field1=1,field2=4,field2=3),set2=dict(field2=4,field1=1,field2=3)), similarity(set1=dict(field1=1,field2=4,field2=3),set2=dict(field1=1)), similarity(set1=teststoredquery[0],set2=teststoredquery[0]), similarity(set1="yolo!",set2=dict(field1=1)) FROM scope()[ + { + "similarity(set1=dict(field1=1, field2=4, field2=3), set2=dict(field2=4, field1=1, field2=3))": 1, + "similarity(set1=dict(field1=1, field2=4, field2=3), set2=dict(field1=1))": 0.5, + "similarity(set1=teststoredquery[0], set2=teststoredquery[0])": 1, + "similarity(set1=\"yolo!\", set2=dict(field1=1))": false + } ] \ No newline at end of file diff --git a/vql/functions/similarity.go b/vql/functions/similarity.go new file mode 100644 index 00000000000..fcfc94281a9 --- /dev/null +++ b/vql/functions/similarity.go @@ -0,0 +1,97 @@ +/* +Velociraptor - Dig Deeper +Copyright (C) 2019-2024 Rapid7 Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package functions + +import ( + "context" + + "github.com/Velocidex/ordereddict" + vql_subsystem "www.velocidex.com/golang/velociraptor/vql" + "www.velocidex.com/golang/vfilter" + "www.velocidex.com/golang/vfilter/arg_parser" +) + +type similarityArgs struct { + Set1 vfilter.Any `vfilter:"required,field=set1,doc=The first set to compare. *ordereddict.Dict vfilter.Any"` + Set2 vfilter.Any `vfilter:"required,field=set2,doc=The second set to compare."` +} + +type SimilarityFunction struct{} + +func (self *SimilarityFunction) Call( + ctx context.Context, + scope vfilter.Scope, + args *ordereddict.Dict) vfilter.Any { + + defer vql_subsystem.RegisterMonitor("similarity", args)() + + // Parse arguments using arg_parser + arg := &similarityArgs{} + err := arg_parser.ExtractArgsWithContext(ctx, scope, args, arg) + if err != nil { + scope.Log("similarity: %s", err.Error()) + return false + } + + setA, okA := arg.Set1.(*ordereddict.Dict) + setB, okB := arg.Set2.(*ordereddict.Dict) + + if !okA || !okB { + if !okA { scope.Log("similarity: set1 parameter invalid") } + if !okB { scope.Log("similarity: set2 parameter invalid") } + return false + } + + if scope.Eq(setA, setB){ return 1 } + + allKeys := ordereddict.NewDict() + + // Collect all unique keys from both sets + for _, key := range setA.Keys() { + allKeys.Set(key, nil) + } + for _, key := range setB.Keys() { + allKeys.Set(key, nil) + } + + // Calculate differences + differences := 0 + for _, key := range allKeys.Keys() { + valueA, okA := setA.Get(key) + valueB, okB := setB.Get(key) + //if !okA || !okB || valueA != valueB { + if !okA || !okB || !scope.Eq(valueA, valueB) { + differences++ + } + } + + similarity := 1.0 - float64(differences)/float64(allKeys.Len()) + return similarity +} + +func (self SimilarityFunction) Info(scope vfilter.Scope, type_map *vfilter.TypeMap) *vfilter.FunctionInfo { + return &vfilter.FunctionInfo{ + Name: "similarity", + Doc: "Compare two Dicts for similarity.", + ArgType: type_map.AddType(scope, &similarityArgs{}), + } +} + +func init() { + vql_subsystem.RegisterFunction(&SimilarityFunction{}) +}