From f2cb2c9ceb833d331fe2f78d9d3c9a5bcaeff7dd Mon Sep 17 00:00:00 2001 From: Zoltan Borok-Nagy Date: Tue, 7 Mar 2023 18:43:53 +0100 Subject: [PATCH] IMPALA-11964: Make sure Impala returns error for Iceberg tables with equality deletes Impala only supports position deletes currently. It should raise an error when equality deletes are encountered. We already had a check for this when the query was planned by Iceberg. But when we were using cached metadata the check was missing. This means that Impala could return bogus results in the presence of equality delete files. This patch adds check for the latter case as well. Tables with equality delete files are still loadable by Impala, and users can still query snapshots of it if they don't have equality deletes. Testing: * added e2e tests Change-Id: I14d7116692c0e47d0799be650dc323811e2ee0fb Reviewed-on: http://gerrit.cloudera.org:8080/19601 Reviewed-by: Impala Public Jenkins Tested-by: Impala Public Jenkins --- common/thrift/CatalogObjects.thrift | 3 +- .../apache/impala/catalog/FeIcebergTable.java | 12 ++- .../catalog/IcebergContentFileStore.java | 45 ++++++--- .../impala/planner/IcebergScanPlanner.java | 15 ++- testdata/data/README | 6 ++ ...-b12a-4c5f-a66e-a8e9375daeba-00001.parquet | Bin 0 -> 662 bytes ...-527b-4911-8c6e-88d416adac57-00001.parquet | Bin 0 -> 1581 bytes ...adf173-0c84-4378-a9d0-5d7f47183978-m0.avro | Bin 0 -> 3933 bytes ...bef400-daea-478a-858a-2baf2438f644-m0.avro | Bin 0 -> 3617 bytes ...-0eadf173-0c84-4378-a9d0-5d7f47183978.avro | Bin 0 -> 2314 bytes ...-8cbef400-daea-478a-858a-2baf2438f644.avro | Bin 0 -> 2160 bytes .../metadata/v1.metadata.json | 66 +++++++++++++ .../metadata/v2.metadata.json | 93 ++++++++++++++++++ .../metadata/version-hint.text | 1 + .../functional/functional_schema_template.sql | 15 +++ .../functional/schema_constraints.csv | 1 + .../queries/QueryTest/iceberg-negative.test | 21 ++++ .../queries/QueryTest/iceberg-query.test | 10 ++ 18 files changed, 269 insertions(+), 19 deletions(-) create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/data/00000-0-fb178c51-b12a-4c5f-a66e-a8e9375daeba-00001.parquet create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/data/00191-4-6e780302-527b-4911-8c6e-88d416adac57-00001.parquet create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/0eadf173-0c84-4378-a9d0-5d7f47183978-m0.avro create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/8cbef400-daea-478a-858a-2baf2438f644-m0.avro create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/snap-5725822353600261755-1-0eadf173-0c84-4378-a9d0-5d7f47183978.avro create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/snap-6816997371555012807-1-8cbef400-daea-478a-858a-2baf2438f644.avro create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v1.metadata.json create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v2.metadata.json create mode 100644 testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/version-hint.text diff --git a/common/thrift/CatalogObjects.thrift b/common/thrift/CatalogObjects.thrift index 7dc214d3a..467191450 100644 --- a/common/thrift/CatalogObjects.thrift +++ b/common/thrift/CatalogObjects.thrift @@ -617,7 +617,8 @@ struct TIcebergPartitionStats { struct TIcebergContentFileStore { 1: optional map path_hash_to_data_file_without_deletes 2: optional map path_hash_to_data_file_with_deletes - 3: optional map path_hash_to_delete_file + 3: optional map path_hash_to_position_delete_file + 7: optional map path_hash_to_equality_delete_file 4: optional bool has_avro 5: optional bool has_orc 6: optional bool has_parquet diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java index fbda3fd06..ca9e2e05c 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java @@ -712,11 +712,15 @@ public interface FeIcebergTable extends FeFsTable { fileStore.addDataFileWithDeletes(pathHashAndFd.first, pathHashAndFd.second); } for (DeleteFile deleteFile : icebergFiles.deleteFiles) { - Preconditions.checkState( - deleteFile.content().equals(FileContent.EQUALITY_DELETES) || - deleteFile.content().equals(FileContent.POSITION_DELETES)); pathHashAndFd = getPathHashAndFd(deleteFile, table, hdfsFileDescMap); - fileStore.addDeleteFileDescriptor(pathHashAndFd.first, pathHashAndFd.second); + if (deleteFile.content().equals(FileContent.POSITION_DELETES)) { + fileStore.addPositionDeleteFile(pathHashAndFd.first, pathHashAndFd.second); + } else if (deleteFile.content().equals(FileContent.EQUALITY_DELETES)) { + fileStore.addEqualityDeleteFile(pathHashAndFd.first, pathHashAndFd.second); + } else { + Preconditions.checkState(false, + "Delete file with unknown kind: " + deleteFile.path().toString()); + } } return fileStore; } diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java index d869665d5..8f169ae74 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java @@ -95,7 +95,8 @@ public class IcebergContentFileStore { // Separate map-list containers for the different content files. private MapListContainer dataFilesWithoutDeletes_ = new MapListContainer(); private MapListContainer dataFilesWithDeletes_ = new MapListContainer(); - private MapListContainer deleteFiles_ = new MapListContainer(); + private MapListContainer positionDeleteFiles_ = new MapListContainer(); + private MapListContainer equalityDeleteFiles_ = new MapListContainer(); // Caches file descriptors loaded during time-travel queries. private final ConcurrentMap oldFileDescMap_ = @@ -120,8 +121,14 @@ public class IcebergContentFileStore { } } - public void addDeleteFileDescriptor(String pathHash, FileDescriptor desc) { - if (deleteFiles_.add(pathHash, desc)) { + public void addPositionDeleteFile(String pathHash, FileDescriptor desc) { + if (positionDeleteFiles_.add(pathHash, desc)) { + updateFileFormats(desc); + } + } + + public void addEqualityDeleteFile(String pathHash, FileDescriptor desc) { + if (equalityDeleteFiles_.add(pathHash, desc)) { updateFileFormats(desc); } } @@ -139,7 +146,9 @@ public class IcebergContentFileStore { } public FileDescriptor getDeleteFileDescriptor(String pathHash) { - return deleteFiles_.get(pathHash); + FileDescriptor ret = positionDeleteFiles_.get(pathHash); + if (ret != null) return ret; + return equalityDeleteFiles_.get(pathHash); } public FileDescriptor getOldFileDescriptor(String pathHash) { @@ -154,19 +163,27 @@ public class IcebergContentFileStore { return dataFilesWithDeletes_.getList(); } - public List getDeleteFiles() { return deleteFiles_.getList(); } + public List getPositionDeleteFiles() { + return positionDeleteFiles_.getList(); + } + + public List getEqualityDeleteFiles() { + return equalityDeleteFiles_.getList(); + } public long getNumFiles() { return dataFilesWithoutDeletes_.getNumFiles() + dataFilesWithDeletes_.getNumFiles() + - deleteFiles_.getNumFiles(); + positionDeleteFiles_.getNumFiles() + + equalityDeleteFiles_.getNumFiles(); } public Iterable getAllFiles() { return Iterables.concat( dataFilesWithoutDeletes_.getList(), dataFilesWithDeletes_.getList(), - deleteFiles_.getList()); + positionDeleteFiles_.getList(), + equalityDeleteFiles_.getList()); } public Iterable getAllDataFiles() { @@ -194,7 +211,8 @@ public class IcebergContentFileStore { TIcebergContentFileStore ret = new TIcebergContentFileStore(); ret.setPath_hash_to_data_file_without_deletes(dataFilesWithoutDeletes_.toThrift()); ret.setPath_hash_to_data_file_with_deletes(dataFilesWithDeletes_.toThrift()); - ret.setPath_hash_to_delete_file(deleteFiles_.toThrift()); + ret.setPath_hash_to_position_delete_file(positionDeleteFiles_.toThrift()); + ret.setPath_hash_to_equality_delete_file(equalityDeleteFiles_.toThrift()); ret.setHas_avro(hasAvro_); ret.setHas_orc(hasOrc_); ret.setHas_parquet(hasParquet_); @@ -215,9 +233,14 @@ public class IcebergContentFileStore { tFileStore.getPath_hash_to_data_file_with_deletes(), networkAddresses, hostIndex); } - if (tFileStore.isSetPath_hash_to_delete_file()) { - ret.deleteFiles_ = MapListContainer.fromThrift( - tFileStore.getPath_hash_to_delete_file(), + if (tFileStore.isSetPath_hash_to_position_delete_file()) { + ret.positionDeleteFiles_ = MapListContainer.fromThrift( + tFileStore.getPath_hash_to_position_delete_file(), + networkAddresses, hostIndex); + } + if (tFileStore.isSetPath_hash_to_equality_delete_file()) { + ret.equalityDeleteFiles_ = MapListContainer.fromThrift( + tFileStore.getPath_hash_to_equality_delete_file(), networkAddresses, hostIndex); } ret.hasAvro_ = tFileStore.isSetHas_avro() ? tFileStore.isHas_avro() : false; diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java index dea0ebcc2..29accb498 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java @@ -158,11 +158,20 @@ public class IcebergScanPlanner { tblRef_.getTimeTravelSpec() != null; } - private void setFileDescriptorsBasedOnFileStore() { + private void setFileDescriptorsBasedOnFileStore() throws ImpalaException { IcebergContentFileStore fileStore = getIceTable().getContentFileStore(); + if (!fileStore.getEqualityDeleteFiles().isEmpty()) { + // TODO(IMPALA-11388): Add support for equality deletes. + FileDescriptor firstEqualityDeleteFile = fileStore.getEqualityDeleteFiles().get(0); + throw new ImpalaRuntimeException(String.format( + "Iceberg table %s has EQUALITY delete file which is currently " + + "not supported by Impala, for example: %s", + getIceTable().getFullName(), + firstEqualityDeleteFile.getAbsolutePath(getIceTable().getLocation()))); + } dataFilesWithoutDeletes_ = fileStore.getDataFilesWithoutDeletes(); dataFilesWithDeletes_ = fileStore.getDataFilesWithDeletes(); - deleteFiles_ = new HashSet<>(fileStore.getDeleteFiles()); + deleteFiles_ = new HashSet<>(fileStore.getPositionDeleteFiles()); updateDeleteStatistics(); } @@ -342,7 +351,7 @@ public class IcebergScanPlanner { if (delFile.content() == FileContent.EQUALITY_DELETES) { throw new ImpalaRuntimeException(String.format( "Iceberg table %s has EQUALITY delete file which is currently " + - "not supported by Impala: %s", getIceTable().getFullName(), + "not supported by Impala, for example: %s", getIceTable().getFullName(), delFile.path())); } Pair delFileDesc = getFileDescriptor(delFile); diff --git a/testdata/data/README b/testdata/data/README index b52ba0c4e..ac22c7cf0 100644 --- a/testdata/data/README +++ b/testdata/data/README @@ -747,6 +747,12 @@ to make these tables correspond to an Iceberg table in a HadoopCatalog instead o HiveCatalog. The table has a positional delete file. +iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality: +Since Hive/Spark is unable to write equality delete files we've copied the contents of +'iceberg_v2_delete_positional' and manually edited the metadata to have equality delete +files in it. Only modified the metadata files, the actual delete files are still +positional. + iceberg_test/iceberg_migrated_alter_test Generated and migrated by Hive CREATE TABLE iceberg_migrated_alter_test (int_col int, string_col string, double_col double) stored as parquet; diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/data/00000-0-fb178c51-b12a-4c5f-a66e-a8e9375daeba-00001.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/data/00000-0-fb178c51-b12a-4c5f-a66e-a8e9375daeba-00001.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d27d6b3f66dfa0768c573715d1580fa63aedf4e8 GIT binary patch literal 662 zcmaJ)>9R?>yX~?Rgd*ap6!ap(PA0S4NOs#MS-~v@ z(UW-6ga5#b2Y-V{5B283g9j0h{s!@7ckO!W9ESJa_r7nwH#7Tp4gw(1gFX29{o5Pe z0RvM5OhRtH#%;D_-0$}X!{j;f>R)Q_>A}a(=Qj|+&JDN=FaNyQy^N>7edY42d$(4| z#`^6(Akn_A*_KIX;+R?H$|3~KW($}K+oEz&aF_`>BE)vsbRHqnyLY7&ai%~gps#6` zmL?B00yPZvM3iEUv~&4OwAUteDlIDhXa1;|A2i^EeWJ|pO!kYeR>N7> zQmJB<4;zI%R3l-%qo=%lcC2`p7iB(?CHMIp+DsGPZTftWsw^pZ_t7b@DuK_(VpR2G z=JQ;gOwwE-Fc4Xx{C{HE(Rj$u{5dDQ;8eGE2@5Tx74PvGvF;y>{A8la%~9??Re6yf zjoqLTgpJUx$CETm+)hVDO}X9NiK6YclC4liav<6=+KM}B5Jh1Sg`wQpTDN_Cr0Si% K;+NRRANd!+RE6>Y literal 0 HcmV?d00001 diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/data/00191-4-6e780302-527b-4911-8c6e-88d416adac57-00001.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/data/00191-4-6e780302-527b-4911-8c6e-88d416adac57-00001.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2fb34f0b4065dec6b3ae1f332cf09c3c357d7549 GIT binary patch literal 1581 zcmeHI&2G~`5MFOglt6(Nm{nHt!3PorDUN>d+VVp5#>y@w%4^K7l+s>uYlkc zP!1L1loLWc1VV_1=mT)*nX;Rff&}zH%K?d%tX<8lX1|&5oAGe{fek8r!0-`o0tOYK z*UIP3eS}aPM)|5~rdhz!ILnJw$8s&pEO=fRPuYaW*)->75^$eSc1F@{#w^UT@hD&g zOS2v6liwvh-g8DFPkF&d<19~#Bzw$KGZfcMOHn=K5MeCvFzH(t%E~;=wo)Y{YC0hFzKqA@_Jg9(?;6-joZp(1%L{t?` z5hl6LRPrrDYu3%bQJ1@284>1Nz2|^1v zr+Ddi=qGNC9AfaqMMO1nP(3~h-`$OJ9>jc?iME8LI{rb+b>zPNa&&ZE(_~3i;gj{M z-*(?Dtn-^|E5boA)oAN~H~eNW{k4|;S(4=0H6Ubu1!iJ8U5v`fOon`a749MOTqPJa zx02m4OPRjMC%G80dbjPgEn|CYeYm}G|DoRWr%4*>-Hy#1&vreB4T3NT`(4fkea{}a doiJpP*RiGk;?+7jZD`g$SSc_@AqUjb|Mrau4x literal 0 HcmV?d00001 diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/0eadf173-0c84-4378-a9d0-5d7f47183978-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/0eadf173-0c84-4378-a9d0-5d7f47183978-m0.avro new file mode 100644 index 0000000000000000000000000000000000000000..5fa4ae11d339aa942687de7cd915e1694951584a GIT binary patch literal 3933 zcmd5aVPshQmw<75SgWrwPI_5S?c@4b3+Klo&4 z_g#8gh+8F_@svJypBeHz=f>11I1@!^^o)$AxK3#nN4%6i&t!29`%%o3ur#KRpEV+- zq_QgEXs#m8WFu$Bp)s{CdJEZ%=H*OC9}b!ekBzKK5?Cf8J2sy5jF3mPN~AHx{kcV@ z$<{UTPb;1=?q^jR@L~&XuBI)dlKN4c@K(=SL<$FFE~CIiCIO@NzEQ&v3I-^?Umuo4 zL`2RwK}%C&?V;HovG;OO#xfQe33h1#B5V<$j#`!8v5S zjlHgd#=wz~G*NURD30nIaXo1|8JI|_H1kn>p|z>87Fk*p6w5i5JcVj*_gCxx#J0!& zscm~rG)tv_!coVXx`&$@nx{0W7S+{6sRdzs7nsgO9LJ0%M|C?&X>MF#OHeHSkmiVW z9U+NBA_e>*LJAzLpSU~$kJfh(%UkuSPVn_WH36s`Ya6I0t8OiDoik2@%CTMpD#hm> zFo`8q2t9&emnAd{$pJmsjGgWz-ttvauMt4UMgW&eynQ-0bIs8^%0F_=akc_bA*GZ9 z2kWk|o@ut;Ni&*VJ6NsjeqBnY&Fl zbM!{BQjAxCg;V^NEirHm@DT~pSySr4^ge80gPV@-Rs+ttB*F#YzbrdzwoG@ zXqO!tEgaf!2396g7_#CL-edJ>k-HzDl?}e>G%iH}XcZxF`KliY=tP^KLy(0b6J){0 zMiYaYV~a-=Ux0ViZy+W@&Te3D@5rBv-`=?Y$LG&){+hiv{p$DbPdB>V7e9RaFVu;P;V( zeao^ZwmC3IeC%1S<(NZf9GHWNZJQn&antj{fjt7gvfi&7NyZilLFJHd(WA{7!`nWS+*UN6R?5NXDr``B3q5%(0%Zvhh?6IM32DU@Z0UJgp m9YHkI;}ds03@H!jax}`fw|6@8&pd+vv4?v5T5mpq7_1b5aQhOd1l9tWL{DZJhIYC5SC3MA+1(cyWP%sW4n#JooF`7 zi8~S!7dUg|2fzVw;lK~!26v87)s8(qlgzNpCW^rOKY-{p*O@lWTh7UsgP4 zJjkmooyP%+IwumlOnE;vqrcU4! z!Ke;!^7JveNYsoxIwHFHSh23A_UoA4(tcx6X4I%_#q%SGO==s0oS70NR*#61;U7}a z5*oG42-h(?2^LCM-)6rad)HY)|M^u@;FHD)K}?CNij?>23MmPxNt>vLntRyQ+E|jL4uZ(7xHjpgftATi9wd1X&Y|ON?e!Ei0gi>D zsiq4-b9CRBn@uyxz(iVQd4T2%ug#3L$kMW;IL?XU8BB9~e|7%PTz3?lyY9HDW@+@# zIQm#K_wc5H<~dEP#dI}Q8b!F{D{N;fP7_Ac<64d~TG&@837W;9(gLxrDYsMK+dCqG3KAg%Q&yVE22?r<*}aNjb!sp@kqr5FM|*;ht;DI zoY&AaZ-_h_Z2_G(TtBF{D6==(3Oijd(e-Ww$=frssfyxmK|(e2w^1|C6pEE$yagnG zJ2}(!z6wYt+d)Nwj4O&;!VMBuquPu`j-HYX%3gczI1G=_t)d~UUkl&s<;XWzn`XC!HT-{MRByD)2@Mws9W)Co z6B#U7iG=f5A6n$`#~5WpZYGVTC;_c90xsY7B>|IY8*~J=uw;TQ#Ml^Ox8~U55XBSV zJ^dYsiHNfYsO_D1|M}rE`%ZoM=G(u0@$#o{PORVWTh`ZK{r$(!zyA6CFZVi~Z><3= zYpQ=iOFk2oz!L<%2h;u+9d^hgKBeXjRuZ$xW0@XGqG=?$I`&QD;2~< zCrC2>h!HNxc&1oZr&|H{4B!c*itBv*Y{CK`nVZD9P)*jmxJhp9kUV3QC`qO%SBv>> z8nA&BiWTQ86-=-j^k&-E71Sn;?@}jAwO1*)>m2lMOTzt3X6fX&>gKJvsgjwkV)ir~v zNX5?tOtMI-xrWRaRz>Kps0dV9!wMFszr>=&F79*Y2QojcUDvfZ1iP8eNF8Pf<+uhe z`;BJ`ue*Of=$lpe3Usp#<)xtK1m7vH(5lRrbx-AVNsBqzOEGlVm$c}aGtg*pCTJtW zskmnEY8B3etpzw0)$Ckj1lldiq204Y9>*5CP&LGmrZB^8oc#o?uUTL8%5>m(^%bA9 zNNo$iMTRr0fot5clraN=3#CWiO>;D_kbsz#ZWieo#aW7D(&R}FAkG&(Xv?^ZCu9Zm zd`(u(dH*a@5MSV=EQCB()=1|x?;DW%RZo5FVUBmwjeT?&IBI+1f_ zJ0DT^e&P+cA{ zd=d72z1)p}N;&cI)rDB@>%C_GVAC&$$wzO8n5$VgVG6W&b-iHBLf3I@3Jh#_=z;AG z@NaVr;ilX7!fUQ;C(edW@TJ#kwcCIH_3_Qoi+4|+ueDpPi@4JIKAAZTXxJO}Z71+t u+wBiM8@8xp59ly-hdr;qg)3EN|8KY2oz91&XaD?p{N%$^omuf)l<_kIpbq)~ literal 0 HcmV?d00001 diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/snap-6816997371555012807-1-8cbef400-daea-478a-858a-2baf2438f644.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/snap-6816997371555012807-1-8cbef400-daea-478a-858a-2baf2438f644.avro new file mode 100644 index 0000000000000000000000000000000000000000..c90a29cc0392f64259e161326193615f8c74089f GIT binary patch literal 2160 zcmbVNO>fgc5KSY5_yLF`TKSwbPMWkPQV$gcsiG2ylv7n%6MG#mvUh{MZU|KnD);^X z4*Uuv!~r4r7jT6WoH%i1)?ZmCZCbck^6u<=GjDd@J{&$>yLJnXBe5PkJ`LgR?Gr;z z$JFQ>5p_i58V!R(sQyFX%%iciJr+=0_81M^*ytagEJch#`WTS|#WG@ig#WH^5NBUF zAerFg$>TjTVbUk}2P9=LQJc&)`aRQ}HI}gh%11hSAT;a)At4|tK@zhUlrT<)Q%U0@ z-5j{B0goY)Oyy(8W9rz*Tqe%AEVCZqCfVC1NlaZL1(`%lF6P_OU>(6FP0!~l=wNrV z&6o&}NS~5I+CDM2U69Zq?QNF^bSd-pmPQfNX9?vFwRsW_X*AEfBFP|+B3Trf_>@;j z$pm5ou1i(rX;U+m+C7acWT?-T`1ZvFF9|7v3tH@iUg-kWrbd;fV#gfDu`gt$A=Se2 z2-OwogUo7J!D4PKu_&?A`yAVjNG@vEjuMApH=`*j!Ze}Gu7*pWvsmJFx330$FAraV zuD2n(6z&zlchf7hD)VLCBQaUhq9S`Hh6+1Ri?%)kr52ZhmNHz3tM{%@;ZoQ_fD2Le z&Xq=>+#(y=LxU$lV4w@-Lkv*_Q{2Yc57GL{^+m6YdZtxe@rp$%TL8vmoLL22;0}cd zC~#cJJn|q)(7ZeXe44qLuVxfyDF{fJCpm^7S@fVg+FhKJrO?%yjDqv%GE(5@aJtlk zB#=g5S&zAeus74k*%2;vZYZg1 zr<>ouemgsT|M};=-!FHj59;sguj(y)DP(g35%mSWP%Y+Ad=W;rT5kE^3Ng0v)rCNe z)LyxN+_AG^veDZiU~<|DsRZR+Ez22FuWOo37brBlTNX5}9)3DQ@H*WM%iHXBo1wX` e68vJ-YW4cr>pwp}eERj~<66B|yN)YM$^HfBPunE` literal 0 HcmV?d00001 diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v1.metadata.json new file mode 100644 index 000000000..b658df4e0 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v1.metadata.json @@ -0,0 +1,66 @@ +{ + "format-version" : 2, + "table-uuid" : "3deb545a-5a19-48f1-ad07-a4d80c677e3e", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality", + "last-sequence-number" : 1, + "last-updated-ms" : 1649071501670, + "last-column-id" : 2, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "id", + "required" : false, + "type" : "long" + }, { + "id" : 2, + "name" : "data", + "required" : false, + "type" : "string" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "owner" : "tamasmate", + "write.delete.mode" : "merge-on-read" + }, + "current-snapshot-id" : 6816997371555012807, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 6816997371555012807, + "timestamp-ms" : 1649071501670, + "summary" : { + "operation" : "append", + "spark.app.id" : "local-1649071493099", + "added-data-files" : "1", + "added-records" : "3", + "added-files-size" : "662", + "changed-partition-count" : "1", + "total-records" : "3", + "total-files-size" : "662", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/snap-6816997371555012807-1-8cbef400-daea-478a-858a-2baf2438f644.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1649071501670, + "snapshot-id" : 6816997371555012807 + } ], + "metadata-log" : [ ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v2.metadata.json new file mode 100644 index 000000000..bed83e8b8 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v2.metadata.json @@ -0,0 +1,93 @@ +{ + "format-version" : 2, + "table-uuid" : "3deb545a-5a19-48f1-ad07-a4d80c677e3e", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality", + "last-sequence-number" : 2, + "last-updated-ms" : 1649071557501, + "last-column-id" : 2, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "id", + "required" : false, + "type" : "long" + }, { + "id" : 2, + "name" : "data", + "required" : false, + "type" : "string" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "owner" : "tamasmate", + "write.delete.mode" : "merge-on-read" + }, + "current-snapshot-id" : 5725822353600261755, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 6816997371555012807, + "timestamp-ms" : 1649071501670, + "summary" : { + "operation" : "append", + "spark.app.id" : "local-1649071493099", + "added-data-files" : "1", + "added-records" : "3", + "added-files-size" : "662", + "changed-partition-count" : "1", + "total-records" : "3", + "total-files-size" : "662", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/snap-6816997371555012807-1-8cbef400-daea-478a-858a-2baf2438f644.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 5725822353600261755, + "parent-snapshot-id" : 6816997371555012807, + "timestamp-ms" : 1649071557501, + "summary" : { + "operation" : "overwrite", + "spark.app.id" : "local-1649071493099", + "added-delete-files" : "1", + "added-files-size" : "1598", + "added-position-deletes" : "1", + "changed-partition-count" : "1", + "total-records" : "3", + "total-files-size" : "2260", + "total-data-files" : "1", + "total-delete-files" : "1", + "total-position-deletes" : "1", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/snap-5725822353600261755-1-0eadf173-0c84-4378-a9d0-5d7f47183978.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1649071501670, + "snapshot-id" : 6816997371555012807 + }, { + "timestamp-ms" : 1649071557501, + "snapshot-id" : 5725822353600261755 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1649071501670, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/v1.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/version-hint.text b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/version-hint.text new file mode 100644 index 000000000..d8263ee98 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality/metadata/version-hint.text @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 68d5453dd..ada17381a 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -3466,6 +3466,21 @@ hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/i ---- DATASET functional ---- BASE_TABLE_NAME +iceberg_v2_delete_equality +---- CREATE +CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} +STORED AS ICEBERG +TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.catalog', + 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', + 'iceberg.table_identifier'='ice.iceberg_v2_delete_equality', + 'format-version'='2', 'write.update.mode'='merge-on-read'); +---- DEPENDENT_LOAD +`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \ +hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality /test-warehouse/iceberg_test/hadoop_catalog/ice +==== +---- DATASET +functional +---- BASE_TABLE_NAME iceberg_multiple_storage_locations ---- CREATE CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index 6cf3b16a0..4c23e23dd 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -82,6 +82,7 @@ table_name:iceberg_timestamp_part, constraint:restrict_to, table_format:parquet/ table_name:iceberg_timestamptz_part, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_uppercase_col, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_v2_delete_positional, constraint:restrict_to, table_format:parquet/none/none +table_name:iceberg_v2_delete_equality, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_v2_no_deletes, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_v2_no_deletes_orc, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_v2_positional_update_all_rows, constraint:restrict_to, table_format:parquet/none/none diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test index 2231c8e15..a4c1836f4 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test @@ -662,3 +662,24 @@ select * from functional_parquet.iceberg_alltypes_part for system_time as of '20 ---- CATCH IllegalArgumentException: Cannot find a snapshot older than 2000-01-01 01:02:03 ==== +---- QUERY +# Querying a table with equality deletes is not allowed. +# We don't use time-travel, so we plan the query from cached metadata. +select * from functional_parquet.iceberg_v2_delete_equality; +---- CATCH +ImpalaRuntimeException: Iceberg table functional_parquet.iceberg_v2_delete_equality has EQUALITY delete file which is currently not supported by Impala +==== +---- QUERY +# Querying a table with equality deletes is not allowed. +# Use time-travel based on snapshot id. +select * from functional_parquet.iceberg_v2_delete_equality for system_version as of 5725822353600261755; +---- CATCH +ImpalaRuntimeException: Iceberg table functional_parquet.iceberg_v2_delete_equality has EQUALITY delete file which is currently not supported by Impala +==== +---- QUERY +# Querying a table with equality deletes is not allowed. +# Use time-travel based on timestamp. +select * from functional_parquet.iceberg_v2_delete_equality for system_time as of now(); +---- CATCH +ImpalaRuntimeException: Iceberg table functional_parquet.iceberg_v2_delete_equality has EQUALITY delete file which is currently not supported by Impala +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test index a6b28cfc8..b90f02c0b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test @@ -1160,3 +1160,13 @@ where i.action in ('view') and j.id=1 and j.id=i.id; ---- TYPES int ==== +---- QUERY +# We can query a snapshot if it doesn't have equality deletes. +select * from iceberg_v2_delete_equality for system_version as of 6816997371555012807; +---- RESULTS +1,'a' +2,'b' +3,'c' +---- TYPES +BIGINT,STRING +====