@@ -140,6 +140,7 @@ def __init__(
140
140
_api_key : str = "" ,
141
141
_app_key : str = "" ,
142
142
_override_url : str = "" ,
143
+ _default_project_id : str = "" ,
143
144
) -> None :
144
145
super (BaseLLMObsWriter , self ).__init__ (interval = interval )
145
146
self ._lock = forksafe .RLock ()
@@ -150,6 +151,7 @@ def __init__(
150
151
self ._site : str = _site or config ._dd_site
151
152
self ._app_key : str = _app_key
152
153
self ._override_url : str = _override_url or os .environ .get ("DD_LLMOBS_OVERRIDE_ORIGIN" , "" )
154
+ self ._default_project_id : str = _default_project_id
153
155
154
156
self ._agentless : bool = is_agentless
155
157
self ._intake : str = self ._override_url or (
@@ -362,23 +364,28 @@ def dataset_delete(self, dataset_id: str) -> None:
362
364
raise ValueError (f"Failed to delete dataset { id } : { resp .get_json ()} " )
363
365
return None
364
366
365
- def dataset_create (self , name : str , description : str ) -> Dataset :
366
- path = "/api/unstable/llm-obs/v1/datasets"
367
+ def dataset_create (
368
+ self , dataset_name : str , project_name : Optional [str ], description : str ,
369
+ ) -> Dataset :
370
+ project_id = self .project_create_or_get (project_name )
371
+ logger .debug ("getting records with project ID %s for %s" , project_id , project_name )
372
+
373
+ path = f"/api/unstable/llm-obs/v1/{ project_id } /datasets"
367
374
body : JSONType = {
368
375
"data" : {
369
376
"type" : "datasets" ,
370
- "attributes" : {"name" : name , "description" : description },
377
+ "attributes" : {"name" : dataset_name , "description" : description },
371
378
}
372
379
}
373
380
resp = self .request ("POST" , path , body )
374
381
if resp .status != 200 :
375
- raise ValueError (f"Failed to create dataset { name } : { resp .status } { resp .get_json ()} " )
382
+ raise ValueError (f"Failed to create dataset { dataset_name } : { resp .status } { resp .get_json ()} " )
376
383
response_data = resp .get_json ()
377
384
dataset_id = response_data ["data" ]["id" ]
378
385
if dataset_id is None or dataset_id == "" :
379
386
raise ValueError (f"unexpected dataset state, invalid ID (is None: { dataset_id is None } )" )
380
387
curr_version = response_data ["data" ]["attributes" ]["current_version" ]
381
- return Dataset (name , dataset_id , [], description , curr_version , _dne_client = self )
388
+ return Dataset (dataset_name , dataset_id , [], description , curr_version , _dne_client = self )
382
389
383
390
@staticmethod
384
391
def _get_record_json (record : Union [UpdatableDatasetRecord , DatasetRecordRaw ], is_update : bool ) -> JSONType :
@@ -436,16 +443,19 @@ def dataset_batch_update(
436
443
new_record_ids : List [str ] = [r ["id" ] for r in data ] if data else []
437
444
return new_version , new_record_ids
438
445
439
- def dataset_get_with_records (self , name : str ) -> Dataset :
440
- path = f"/api/unstable/llm-obs/v1/datasets?filter[name]={ quote (name )} "
446
+ def dataset_get_with_records (self , dataset_name : str , project_name : Optional [str ] = None ) -> Dataset :
447
+ project_id = self .project_create_or_get (project_name )
448
+ logger .debug ("getting records with project ID %s for %s" , project_id , project_name )
449
+
450
+ path = f"/api/unstable/llm-obs/v1/{ project_id } /datasets?filter[name]={ quote (dataset_name )} "
441
451
resp = self .request ("GET" , path )
442
452
if resp .status != 200 :
443
- raise ValueError (f"Failed to pull dataset { name } : { resp .status } " )
453
+ raise ValueError (f"Failed to pull dataset { dataset_name } from project { project_name } : { resp .status } " )
444
454
445
455
response_data = resp .get_json ()
446
456
data = response_data ["data" ]
447
457
if not data :
448
- raise ValueError (f"Dataset '{ name } ' not found" )
458
+ raise ValueError (f"Dataset '{ dataset_name } ' not found in project { project_name } " )
449
459
450
460
curr_version = data [0 ]["attributes" ]["current_version" ]
451
461
dataset_description = data [0 ]["attributes" ].get ("description" , "" )
@@ -460,7 +470,8 @@ def dataset_get_with_records(self, name: str) -> Dataset:
460
470
resp = self .request ("GET" , list_path , timeout = self .LIST_RECORDS_TIMEOUT )
461
471
if resp .status != 200 :
462
472
raise ValueError (
463
- f"Failed to pull { page_num } th page of dataset records { name } : { resp .status } { resp .get_json ()} "
473
+ f"Failed to pull { page_num } th page of dataset records { dataset_name } : "
474
+ f"{ resp .status } { resp .get_json ()} "
464
475
)
465
476
records_data = resp .get_json ()
466
477
@@ -481,7 +492,7 @@ def dataset_get_with_records(self, name: str) -> Dataset:
481
492
list_path = f"{ list_base_path } ?page[cursor]={ next_cursor } "
482
493
logger .debug ("next list records request path %s" , list_path )
483
494
page_num += 1
484
- return Dataset (name , dataset_id , class_records , dataset_description , curr_version , _dne_client = self )
495
+ return Dataset (dataset_name , dataset_id , class_records , dataset_description , curr_version , _dne_client = self )
485
496
486
497
def dataset_bulk_upload (self , dataset_id : str , records : List [DatasetRecord ]):
487
498
with tempfile .NamedTemporaryFile (suffix = ".csv" ) as tmp :
@@ -534,7 +545,10 @@ def dataset_bulk_upload(self, dataset_id: str, records: List[DatasetRecord]):
534
545
raise ValueError (f"Failed to upload dataset from file: { resp .status } { resp .get_json ()} " )
535
546
logger .debug ("successfully uploaded with code %d" , resp .status )
536
547
537
- def project_create_or_get (self , name : str ) -> str :
548
+ def project_create_or_get (self , name : Optional [str ] = None ) -> str :
549
+ if name is None or name == "" :
550
+ return self ._default_project_id
551
+
538
552
path = "/api/unstable/llm-obs/v1/projects"
539
553
resp = self .request (
540
554
"POST" ,
@@ -544,7 +558,12 @@ def project_create_or_get(self, name: str) -> str:
544
558
if resp .status != 200 :
545
559
raise ValueError (f"Failed to create project { name } : { resp .status } { resp .get_json ()} " )
546
560
response_data = resp .get_json ()
547
- return response_data ["data" ]["id" ]
561
+ project_id = response_data ["data" ]["id" ]
562
+
563
+ if project_id is None or project_id == "" :
564
+ raise ValueError (f"project ID is required for dataset & experiments features (project name: { name } )" )
565
+
566
+ return project_id
548
567
549
568
def experiment_create (
550
569
self ,
0 commit comments