You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
@@ -509,7 +511,7 @@ async function pushResultsToDB({
509
511
}
510
512
}
511
513
512
-
consttests=[
514
+
consttests: TestOptions[]=[
513
515
{
514
516
type: 'query',
515
517
databaseName: 'netflix',
@@ -750,6 +752,106 @@ const tests = [
750
752
},
751
753
]),
752
754
},
755
+
{
756
+
type: 'aggregation',
757
+
databaseName: 'sample_airbnb',
758
+
collectionName: 'listingsAndReviews',
759
+
// TODO(COMPASS-7763): GPT-4 generates better results for this input.
760
+
// When we've swapped over we can increase the accuracy for this test.
761
+
// For now it will be giving low accuracy. gpt-3.5-turbo usually tries to
762
+
// use $expr in a $project stage which is not valid syntax.
763
+
minAccuracyForTest: 0,
764
+
userInput:
765
+
'what percentage of listings have a "Washer" in their amenities? Only consider listings with more than 2 beds. Return is as a string named "washerPercentage" like "75%", rounded to the nearest whole number.',
766
+
assertResult: anyOf([
767
+
isDeepStrictEqualTo([
768
+
{
769
+
_id: null,
770
+
tvPercentage: '67%',
771
+
},
772
+
]),
773
+
isDeepStrictEqualTo([
774
+
{
775
+
tvPercentage: '67%',
776
+
},
777
+
]),
778
+
]),
779
+
},
780
+
781
+
{
782
+
type: 'query',
783
+
databaseName: 'NYC',
784
+
collectionName: 'parking_2015',
785
+
// TODO(COMPASS-7763): GPT-4 generates better results for this input.
786
+
// When we've swapped over we can increase the accuracy for this test.
787
+
// For now it will be giving low accuracy.
788
+
minAccuracyForTest: 0.5,
789
+
userInput:
790
+
'Write a query that does the following: "find all of the parking incidents that occurred on an ave (match all ways to write ave). Give me an array of all of the plate ids involved, in an object with their summons number and vehicle make and body type. Put the vehicle make and body type into lower case. No _id, sorted by the summons number lowest first.',
0 commit comments