@@ -394,13 +394,12 @@ def get_schema(self, table_name, column_indices=None):
394
394
column_indices = column_indices ,
395
395
)["columns" ]
396
396
397
- def search_schema (self , table_name , filters , start_index , max_results ):
397
+ def search_schema (self , table_name , filters , sort_order = "original" ):
398
398
return self .do_json_rpc (
399
399
table_name ,
400
400
"search_schema" ,
401
401
filters = filters ,
402
- start_index = start_index ,
403
- max_results = max_results ,
402
+ sort_order = sort_order ,
404
403
)
405
404
406
405
def get_state (self , table_name ):
@@ -907,11 +906,11 @@ def _match_types_filter(data_types):
907
906
def test_search_schema (dxf : DataExplorerFixture ):
908
907
# Test search_schema RPC for pandas and polars
909
908
910
- # Make a few thousand column names we can search for
909
+ # Make a smaller set of column names for easier testing
911
910
column_names = [
912
911
f"{ prefix } _{ i } "
913
- for prefix in ["aaa " , "bbb " , "ccc " , "ddd " ]
914
- for i in range ({"aaa " : 1000 , "bbb " : 100 , "ccc " : 50 , "ddd " : 10 }[prefix ])
912
+ for prefix in ["apple " , "banana " , "cherry " , "date " ]
913
+ for i in range ({"apple " : 10 , "banana " : 5 , "cherry " : 3 , "date " : 2 }[prefix ])
915
914
]
916
915
917
916
data_examples = {
@@ -939,54 +938,94 @@ def test_search_schema(dxf: DataExplorerFixture):
939
938
dxf .register_table ("test_df" , test_df )
940
939
dxf .register_table ("dfp" , dfp )
941
940
942
- aaa_filter = _text_search_filter ("aaa" )
943
- bbb_filter = _text_search_filter ("bbb" )
944
- ccc_filter = _text_search_filter ("ccc" )
945
- ddd_filter = _text_search_filter ("ddd" )
941
+ apple_filter = _text_search_filter ("apple" )
942
+ banana_filter = _text_search_filter ("banana" )
946
943
947
944
for name in ["test_df" , "dfp" ]:
948
- full_schema = dxf .get_schema (name , list (range (len (column_names ))))
945
+ # Test filtering by text
946
+ result = dxf .search_schema (name , [apple_filter ])
947
+ expected_apple_indices = [i for i , col in enumerate (column_names ) if "apple" in col ]
948
+ assert result ["matches" ] == expected_apple_indices
949
+
950
+ result = dxf .search_schema (name , [banana_filter ])
951
+ expected_banana_indices = [i for i , col in enumerate (column_names ) if "banana" in col ]
952
+ assert result ["matches" ] == expected_banana_indices
953
+
954
+ # Test filtering by data type
955
+ string_filter = _match_types_filter ([ColumnDisplayType .String ])
956
+ result = dxf .search_schema (name , [string_filter ])
957
+ # String columns should be at indices 1, 6, 11, 16 (every 5th starting from 1)
958
+ expected_string_indices = [i for i in range (len (column_names )) if i % 5 == 1 ]
959
+ assert result ["matches" ] == expected_string_indices
960
+
961
+ # Test combining filters
962
+ result = dxf .search_schema (name , [apple_filter , string_filter ])
963
+ # Apple columns that are also strings
964
+ expected_combined = [i for i in expected_apple_indices if i % 5 == 1 ]
965
+ assert result ["matches" ] == expected_combined
966
+
967
+ # Test sorting
968
+ result = dxf .search_schema (name , [], "original" )
969
+ expected_all_indices = list (range (len (column_names )))
970
+ assert result ["matches" ] == expected_all_indices
971
+
972
+ result = dxf .search_schema (name , [], "ascending" )
973
+ # Should be sorted by column name alphabetically
974
+ expected_sorted = sorted (range (len (column_names )), key = lambda i : column_names [i ])
975
+ assert result ["matches" ] == expected_sorted
976
+
977
+ result = dxf .search_schema (name , [], "descending" )
978
+ # Should be sorted by column name reverse alphabetically
979
+ expected_reverse_sorted = sorted (
980
+ range (len (column_names )), key = lambda i : column_names [i ], reverse = True
981
+ )
982
+ assert result ["matches" ] == expected_reverse_sorted
949
983
950
- # (search_term, start_index, max_results, ex_total, ex_matches)
951
- cases = [
952
- ([aaa_filter ], 0 , 100 , 1000 , full_schema [:100 ]),
953
- (
954
- [aaa_filter , _match_types_filter ([ColumnDisplayType .String ])],
955
- 0 ,
956
- 100 ,
957
- 200 ,
958
- full_schema [:500 ][1 ::5 ],
959
- ),
960
- (
961
- [
962
- aaa_filter ,
963
- _match_types_filter ([ColumnDisplayType .Boolean , ColumnDisplayType .Number ]),
964
- ],
965
- 0 ,
966
- 120 ,
967
- 600 ,
968
- [x for i , x in enumerate (full_schema [:200 ]) if i % 5 in (0 , 2 , 3 )],
969
- ),
970
- ([aaa_filter ], 100 , 100 , 1000 , full_schema [100 :200 ]),
971
- ([aaa_filter ], 950 , 100 , 1000 , full_schema [950 :1000 ]),
972
- ([aaa_filter ], 1000 , 100 , 1000 , []),
973
- ([bbb_filter ], 0 , 10 , 100 , full_schema [1000 :1010 ]),
974
- ([ccc_filter ], 0 , 10 , 50 , full_schema [1100 :1110 ]),
975
- ([ddd_filter ], 0 , 10 , 10 , full_schema [1150 :1160 ]),
976
- ]
977
984
978
- for (
979
- filters ,
980
- start_index ,
981
- max_results ,
982
- ex_total ,
983
- ex_matches ,
984
- ) in cases :
985
- result = dxf .search_schema (name , filters , start_index , max_results )
986
-
987
- assert result ["total_num_matches" ] == ex_total
988
- matches = result ["matches" ]["columns" ]
989
- assert matches == ex_matches
985
+ def test_search_schema_sort_by_name (dxf : DataExplorerFixture ):
986
+ # Test comprehensive sort-by-name functionality
987
+
988
+ # Create a dataframe with deliberately mixed-case and varied column names
989
+ column_names = ["Zebra" , "apple" , "BANANA" , "Cherry" , "date" , "Elephant" , "fig" ]
990
+ data = {name : [1 , 2 , 3 , 4 , 5 ] for name in column_names }
991
+
992
+ test_df = pd .DataFrame (data )
993
+ dfp = pl .DataFrame (data )
994
+
995
+ dxf .register_table ("sort_test_df" , test_df )
996
+ dxf .register_table ("sort_test_dfp" , dfp )
997
+
998
+ for name in ["sort_test_df" , "sort_test_dfp" ]:
999
+ # Test original order (should be same as column order)
1000
+ result = dxf .search_schema (name , [], "original" )
1001
+ expected_original = list (range (len (column_names )))
1002
+ assert result ["matches" ] == expected_original
1003
+
1004
+ # Test ascending sort (case-sensitive alphabetical)
1005
+ result = dxf .search_schema (name , [], "ascending" )
1006
+ expected_ascending = sorted (range (len (column_names )), key = lambda i : column_names [i ])
1007
+ assert result ["matches" ] == expected_ascending
1008
+
1009
+ # Test descending sort
1010
+ result = dxf .search_schema (name , [], "descending" )
1011
+ expected_descending = sorted (
1012
+ range (len (column_names )), key = lambda i : column_names [i ], reverse = True
1013
+ )
1014
+ assert result ["matches" ] == expected_descending
1015
+
1016
+ # Test that sorting works with filters too
1017
+ filter_with_a = _text_search_filter ("a" ) # Should match "Zebra", "apple", "BANANA"
1018
+
1019
+ result = dxf .search_schema (name , [filter_with_a ], "ascending" )
1020
+ filtered_indices = [i for i , col in enumerate (column_names ) if "a" in col .lower ()]
1021
+ expected_filtered_ascending = sorted (filtered_indices , key = lambda i : column_names [i ])
1022
+ assert result ["matches" ] == expected_filtered_ascending
1023
+
1024
+ result = dxf .search_schema (name , [filter_with_a ], "descending" )
1025
+ expected_filtered_descending = sorted (
1026
+ filtered_indices , key = lambda i : column_names [i ], reverse = True
1027
+ )
1028
+ assert result ["matches" ] == expected_filtered_descending
990
1029
991
1030
992
1031
def test_pandas_get_data_values (dxf : DataExplorerFixture ):
0 commit comments