@@ -2784,6 +2784,52 @@ def test_extend_enumerations(tmp_path):
27842784 assert (readback_df [c ] == written_df [c ]).all ()
27852785
27862786
2787+ def test_write_dictionary_to_non_enum_column (tmp_path ):
2788+ written_df = pd .DataFrame (
2789+ {
2790+ "soma_joinid" : pd .Series ([0 , 1 , 2 , 3 , 4 , 5 ], dtype = np .int64 ),
2791+ "str" : pd .Series (["A" , "B" , "A" , "B" , "B" , None ], dtype = "category" ),
2792+ "byte" : pd .Series ([b"A" , b"B" , b"A" , b"B" , b"B" , None ], dtype = "category" ),
2793+ "bool" : pd .Series ([True , False , True , False , False , None ], dtype = "category" ),
2794+ "int64" : pd .Series ([0 , 1 , 2 , 0 , 1 , None ], dtype = "Int64" ).astype ("category" ),
2795+ "uint64" : pd .Series ([0 , 1 , 2 , 0 , 1 , None ], dtype = "UInt64" ).astype ("category" ),
2796+ "int32" : pd .Series ([0 , 1 , 2 , 0 , 1 , None ], dtype = "Int32" ).astype ("category" ),
2797+ "uint32" : pd .Series ([0 , 1 , 2 , 0 , 1 , None ], dtype = "UInt32" ).astype ("category" ),
2798+ "int16" : pd .Series ([0 , 1 , 2 , 0 , 1 , None ], dtype = "Int16" ).astype ("category" ),
2799+ "uint16" : pd .Series ([0 , 1 , 2 , 0 , 1 , None ], dtype = "UInt16" ).astype ("category" ),
2800+ "int8" : pd .Series ([0 , 1 , 2 , 0 , 1 , None ], dtype = "Int8" ).astype ("category" ),
2801+ "uint8" : pd .Series ([0 , 1 , 2 , 0 , 1 , None ], dtype = "UInt8" ).astype ("category" ),
2802+ "float32" : pd .Series ([0 , 1.1 , 2.1 , 0 , 1.1 , None ], dtype = "Float32" ).astype ("category" ),
2803+ "float64" : pd .Series ([0 , 1.1 , 2.1 , 0 , 1.1 , None ], dtype = "Float64" ).astype ("category" ),
2804+ },
2805+ )
2806+
2807+ schema = pa .schema ([
2808+ pa .field ("soma_joinid" , pa .int64 ()),
2809+ pa .field ("str" , pa .large_string (), nullable = True ),
2810+ pa .field ("byte" , pa .large_binary (), nullable = True ),
2811+ pa .field ("bool" , pa .bool_ (), nullable = True ),
2812+ pa .field ("int64" , pa .int64 (), nullable = True ),
2813+ pa .field ("uint64" , pa .uint64 (), nullable = True ),
2814+ pa .field ("int32" , pa .int32 (), nullable = True ),
2815+ pa .field ("uint32" , pa .uint32 (), nullable = True ),
2816+ pa .field ("int16" , pa .int16 (), nullable = True ),
2817+ pa .field ("uint16" , pa .uint16 (), nullable = True ),
2818+ pa .field ("int8" , pa .int8 (), nullable = True ),
2819+ pa .field ("uint8" , pa .uint8 (), nullable = True ),
2820+ pa .field ("float32" , pa .float32 (), nullable = True ),
2821+ pa .field ("float64" , pa .float64 (), nullable = True ),
2822+ ])
2823+
2824+ with soma .DataFrame .create (str (tmp_path ), schema = schema , domain = [[0 , 9 ]]) as soma_dataframe :
2825+ tbl = pa .Table .from_pandas (written_df , preserve_index = False )
2826+ soma_dataframe .write (tbl )
2827+
2828+ with soma .open (str (tmp_path )) as soma_dataframe :
2829+ readback_tbl = soma_dataframe .read ().concat ()
2830+ assert tbl .to_pylist () == readback_tbl .to_pylist ()
2831+
2832+
27872833def test_multiple_writes_with_str_enums (tmp_path ):
27882834 uri = tmp_path .as_posix ()
27892835
0 commit comments