From a3d2b4e357257d99c100225f4c10e4bf291082e9 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sat, 20 Jul 2024 02:20:49 +0200 Subject: [PATCH] XLSX: support documents whose XML elements have a prefix Fixes https://github.com/duckdb/duckdb_spatial/issues/362 --- autotest/ogr/data/xlsx/with_xml_prefix.xlsx | Bin 0 -> 2457 bytes autotest/ogr/ogr_xlsx.py | 15 +++++++ ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp | 45 ++++++++++++++------ 3 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 autotest/ogr/data/xlsx/with_xml_prefix.xlsx diff --git a/autotest/ogr/data/xlsx/with_xml_prefix.xlsx b/autotest/ogr/data/xlsx/with_xml_prefix.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4d23ef8ddf26261784663b877b684c626a460d78 GIT binary patch literal 2457 zcmWIWW@Zs#U|`^2C|~h9;;~b|)q5cCE+YejFoO(3v~zx5Norn6d`M+MYH_SyMQ%=L z2qy#cB%KLqi+{~ZE3M#WU}Sm0%)kI9)=sqbJ7OTvx;|;A>&FyL0VmIG5gCn5+!5z2 zHeRd$WZ`Wg&FcP$t75MP>mtWRQa3-}`yF-?nx@SwrzLF9Tzh^~0!Y(WOikx=j-IfkGnEqww z$>%(FCEr&(O){>IIQ^A1`pq0!y7^$MRDFKWi?}^>S>zxUg4h-wtkShw2{vx z;I!YL_nV^snQ>e>FSU;$02p>$z|dk~*u?uK0?dFCY{0OKFG|fR)&~dI0$@;8GtNd2 zE|7ji*mtOXjNb{=uK^5uZn%CukSXvm|A%Qv?|I+6Lk0q@4^~HPR!^R^ic5n}+AK#< z+~Ax==cEdq+kd{~#D4ME%M{1-YSHQCm7hJ|fB(Jw=H2O$F`7pv&TT4Hbev#(r*~a= zfoECh;dQxvCag-k5EpgVNyN|WH=x36pj@46%sxP;EcB$(u{d43) z&VkjMe)04EDR0`kc%kj=(@uJS8TXqy%qe-Hsd?ack2&AvnUz|bxE@{5vs7Dkp#D${+F9gyrL`qM=$%MoH+F$lNj5^ zDaFEn8$QhJTOj>I>v#Q>mAR$8>Z{b7&1#m|_GulNawq$^Ldwk3SKDHQr_B~e1RW<@ z(D5OBU!0MeS^`agmk5O-JJ6hntiUDTfuZ=72^f+X=75bega_wY>?WQKI-Pe|L152s zZGrDBx7$~3N?K(0x54W#)1wCxM>mM&p-d?Y&Ej=0)wj(~-}U@v-a54n;@3SpkEBb4pEZ3e(B5Tuy(EZxd#j_AeD=Yt zM!8$Y#m!S6iCiu@f6AslDN4h+(ye2?By&EK>w$?I4Fq35U%)NAQ_Vo1RW%|wwJP-q z`>oXqUQ>1-TzXDQ|4j8E*Ka3;6n{IuEIq-0L0qw`eD00d)eQNIS*|Rxi~HZ$|LMHl zBEDrMu}{C3PQAFyo40QdD@W+D`}Hpqe+Axff4TbMq2kgQ#<=&>)8i2lVucnVoREM- zDh%|QW~UheFp)ubN zJ=bX|3CE{3=QSpVK6-RA>nl@$HzSiAGonmHt&b#t0m8rl3=xJcjUX1*+KClXJE4UH z#9&;7HN;>B1_6e@jy6E^VX*}?3siJtH4EWL)Pft?ku`9$pixEGahPQxvg4|NSrc3` z0u2HcXn0(PSuP^GY&*mtTiA*k%_1M>iMUMDaB literal 0 HcmV?d00001 diff --git a/autotest/ogr/ogr_xlsx.py b/autotest/ogr/ogr_xlsx.py index 197563a1c964..632df94bcfab 100755 --- a/autotest/ogr/ogr_xlsx.py +++ b/autotest/ogr/ogr_xlsx.py @@ -650,3 +650,18 @@ def test_ogr_xlsx_write_sheet_without_row(): assert ds.GetLayer(2).GetFeatureCount() == 1 ds = None gdal.Unlink(tmpfilename) + + +############################################################################### +# Test reading a XLSX file with XML element prefixes + + +def test_ogr_xlsx_read_xml_prefix(): + + ds = ogr.Open("data/xlsx/with_xml_prefix.xlsx") + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetFieldDefn(0).GetName() == "Col1" + assert lyr.GetLayerDefn().GetFieldDefn(1).GetName() == "Col2" + f = lyr.GetNextFeature() + assert f["Col1"] == "foo" + assert f["Col2"] == "bar" diff --git a/ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp b/ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp index 7222ca2ef468..9590b7b5f20b 100644 --- a/ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp +++ b/ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp @@ -418,6 +418,18 @@ int OGRXLSXDataSource::Create(const char *pszFilename, return TRUE; } +/************************************************************************/ +/* GetUnprefixed() */ +/************************************************************************/ + +static const char *GetUnprefixed(const char *pszStr) +{ + const char *pszColumn = strchr(pszStr, ':'); + if (pszColumn) + return pszColumn + 1; + return pszStr; +} + /************************************************************************/ /* startElementCbk() */ /************************************************************************/ @@ -434,6 +446,8 @@ void OGRXLSXDataSource::startElementCbk(const char *pszNameIn, if (bStopParsing) return; + pszNameIn = GetUnprefixed(pszNameIn); + nWithoutEventCounter = 0; switch (stateStack[nStackDepth].eVal) { @@ -474,6 +488,8 @@ void OGRXLSXDataSource::endElementCbk(const char *pszNameIn) if (bStopParsing) return; + pszNameIn = GetUnprefixed(pszNameIn); + nWithoutEventCounter = 0; nDepth--; @@ -1346,6 +1362,8 @@ void OGRXLSXDataSource::startElementSSCbk(const char *pszNameIn, if (bStopParsing) return; + pszNameIn = GetUnprefixed(pszNameIn); + nWithoutEventCounter = 0; switch (stateStack[nStackDepth].eVal) { @@ -1381,11 +1399,14 @@ static void XMLCALL endElementSSCbk(void *pUserData, const char *pszNameIn) ((OGRXLSXDataSource *)pUserData)->endElementSSCbk(pszNameIn); } -void OGRXLSXDataSource::endElementSSCbk(CPL_UNUSED const char *pszNameIn) +void OGRXLSXDataSource::endElementSSCbk(const char * /*pszNameIn*/) { if (bStopParsing) return; + // If we were to use pszNameIn, then we need: + // pszNameIn = GetUnprefixed(pszNameIn); + nWithoutEventCounter = 0; nDepth--; @@ -1529,6 +1550,8 @@ void OGRXLSXDataSource::startElementWBRelsCbk(const char *pszNameIn, if (bStopParsing) return; + pszNameIn = GetUnprefixed(pszNameIn); + nWithoutEventCounter = 0; if (strcmp(pszNameIn, "Relationship") == 0) { @@ -1593,18 +1616,6 @@ void OGRXLSXDataSource::AnalyseWorkbookRels(VSILFILE *fpWorkbookRels) VSIFCloseL(fpWorkbookRels); } -/************************************************************************/ -/* GetUnprefixed() */ -/************************************************************************/ - -static const char *GetUnprefixed(const char *pszStr) -{ - const char *pszColumn = strchr(pszStr, ':'); - if (pszColumn) - return pszColumn + 1; - return pszStr; -} - /************************************************************************/ /* startElementWBCbk() */ /************************************************************************/ @@ -1621,8 +1632,10 @@ void OGRXLSXDataSource::startElementWBCbk(const char *pszNameIn, if (bStopParsing) return; + pszNameIn = GetUnprefixed(pszNameIn); + nWithoutEventCounter = 0; - if (strcmp(GetUnprefixed(pszNameIn), "sheet") == 0) + if (strcmp(pszNameIn, "sheet") == 0) { const char *pszSheetName = GetAttributeValue(ppszAttr, "name", nullptr); const char *pszId = GetAttributeValue(ppszAttr, "r:id", nullptr); @@ -1725,6 +1738,8 @@ void OGRXLSXDataSource::startElementStylesCbk(const char *pszNameIn, if (bStopParsing) return; + pszNameIn = GetUnprefixed(pszNameIn); + nWithoutEventCounter = 0; if (strcmp(pszNameIn, "numFmt") == 0) { @@ -1810,6 +1825,8 @@ void OGRXLSXDataSource::endElementStylesCbk(const char *pszNameIn) if (bStopParsing) return; + pszNameIn = GetUnprefixed(pszNameIn); + nWithoutEventCounter = 0; if (strcmp(pszNameIn, "cellXfs") == 0) {