Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#4731] feat(paimon-spark-conenctor): Support delete, update and mergeInto paimon table in paimon spark connector #5864

Open
wants to merge 55 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
ed8e5b7
basic ddl
Dec 1, 2024
e4a11d1
basic ddl
Dec 1, 2024
a16e949
basic dml
Dec 1, 2024
e28d323
support partition
Dec 1, 2024
6de61c2
basic dml
Dec 1, 2024
17690df
basic schema ddl
Dec 1, 2024
dae7ccf
Merge branch 'support-paimon-connector-ddl' of github.com:caican00/gr…
Dec 2, 2024
7c6013c
fix
Dec 2, 2024
0915488
fix
Dec 2, 2024
4190fd9
Merge branch 'support-paimon-connector-ddl' into support-paimon-conne…
Dec 2, 2024
01bf325
Merge branch 'main' of github.com:apache/gravitino into support-paimo…
Dec 2, 2024
a83a406
fix
Dec 2, 2024
8846dc9
support partition management
Dec 2, 2024
c114d1c
support partition management
Dec 2, 2024
af0b32a
fix
Dec 3, 2024
79403e2
fix
Dec 3, 2024
1aa58cd
fix
Dec 3, 2024
33c6956
Merge branch 'support-paimon-connector-dml' of github.com:caican00/gr…
Dec 3, 2024
3512934
support hive backend
Dec 4, 2024
4499371
support hive backend
Dec 4, 2024
84f48f0
support hive backend
Dec 4, 2024
0960a27
support jdbc backend
Dec 4, 2024
c665d8b
support metadata columns
Dec 4, 2024
d0d0e6d
support metadata columns
Dec 4, 2024
7f7bbb7
support functions
Dec 4, 2024
a4ba49b
support functions
Dec 4, 2024
3e2d893
support time travel
Dec 4, 2024
10f6a38
support row level operations
Dec 4, 2024
8e540c2
support row level operations
Dec 4, 2024
b8235c9
support functions
Dec 5, 2024
70b8900
support functions
Dec 5, 2024
15732fa
support time travel
Dec 5, 2024
2a1833f
support extensions
Dec 5, 2024
659338f
fix
Dec 2, 2024
069b748
fix
Dec 6, 2024
d354e2c
Merge branch 'support-paimon-connector-ddl' of github.com:caican00/gr…
Dec 9, 2024
658f687
support paimon connector dml together
Dec 9, 2024
03f1a2d
Merge branch 'main' into support-paimon-connector-ddl
caican00 Dec 9, 2024
f0bfd60
fix
Dec 9, 2024
22de28e
fix
Dec 9, 2024
3195e8c
Merge branch 'support-paimon-connector-ddl' of github.com:caican00/gr…
Dec 9, 2024
e337024
fix
Dec 9, 2024
ce0d591
fix
Dec 9, 2024
c77b2f4
Merge branch 'support-paimon-connector-ddl' of github.com:caican00/gr…
Dec 9, 2024
1ce5e4e
fix
Dec 9, 2024
a5413d1
Merge branch 'support-paimon-connector-partition-management' of githu…
Dec 9, 2024
4f92ddd
Merge branch 'support-paimon-connector-partition-management' of githu…
Dec 9, 2024
eb9eab7
Merge branch 'support-paimon-connector-hive-backend' of github.com:ca…
Dec 9, 2024
8fa01ad
Merge branch 'support-paimon-connector-jdbc-backend' of github.com:ca…
Dec 9, 2024
54b41fc
Merge branch 'support-paimon-connector-function' of github.com:caican…
Dec 9, 2024
de7f9ae
Merge branch 'support-paimon-connector-time-travel' of github.com:cai…
Dec 9, 2024
977a6b9
fix
Dec 9, 2024
f8220e5
fix
Dec 9, 2024
b723ad1
Merge branch 'support-paimon-connector-ddl' of github.com:caican00/gr…
Dec 15, 2024
735aa2b
fix
Dec 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.gravitino.catalog.lakehouse.paimon;

public class PaimonConstants {

// Paimon catalog properties constants
public static final String CATALOG_BACKEND = "catalog-backend";
public static final String METASTORE = "metastore";
public static final String URI = "uri";
public static final String WAREHOUSE = "warehouse";
public static final String CATALOG_BACKEND_NAME = "catalog-backend-name";

public static final String GRAVITINO_JDBC_USER = "jdbc-user";
public static final String PAIMON_JDBC_USER = "jdbc.user";

public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password";
public static final String PAIMON_JDBC_PASSWORD = "jdbc.password";

public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver";

// S3 properties needed by Paimon
public static final String S3_ENDPOINT = "s3.endpoint";
public static final String S3_ACCESS_KEY = "s3.access-key";
public static final String S3_SECRET_KEY = "s3.secret-key";

// OSS related properties
public static final String OSS_ENDPOINT = "fs.oss.endpoint";
public static final String OSS_ACCESS_KEY = "fs.oss.accessKeyId";
public static final String OSS_SECRET_KEY = "fs.oss.accessKeySecret";

// Iceberg Table properties constants
public static final String COMMENT = "comment";
public static final String OWNER = "owner";
public static final String BUCKET_KEY = "bucket-key";
public static final String MERGE_ENGINE = "merge-engine";
public static final String SEQUENCE_FIELD = "sequence.field";
public static final String ROWKIND_FIELD = "rowkind.field";
public static final String PRIMARY_KEY = "primary-key";
public static final String PARTITION = "partition";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.gravitino.catalog.lakehouse.paimon;

import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import org.apache.gravitino.storage.OSSProperties;
import org.apache.gravitino.storage.S3Properties;

public class PaimonPropertiesUtils {

// Map that maintains the mapping of keys in Gravitino to that in Paimon, for example, users
// will only need to set the configuration 'catalog-backend' in Gravitino and Gravitino will
// change it to `catalogType` automatically and pass it to Paimon.
public static final Map<String, String> GRAVITINO_CONFIG_TO_PAIMON;

static {
Map<String, String> map = new HashMap();
map.put(PaimonConstants.CATALOG_BACKEND, PaimonConstants.CATALOG_BACKEND);
map.put(PaimonConstants.GRAVITINO_JDBC_DRIVER, PaimonConstants.GRAVITINO_JDBC_DRIVER);
map.put(PaimonConstants.GRAVITINO_JDBC_USER, PaimonConstants.PAIMON_JDBC_USER);
map.put(PaimonConstants.GRAVITINO_JDBC_PASSWORD, PaimonConstants.PAIMON_JDBC_PASSWORD);
map.put(PaimonConstants.URI, PaimonConstants.URI);
map.put(PaimonConstants.WAREHOUSE, PaimonConstants.WAREHOUSE);
map.put(PaimonConstants.CATALOG_BACKEND_NAME, PaimonConstants.CATALOG_BACKEND_NAME);
// S3
map.put(S3Properties.GRAVITINO_S3_ENDPOINT, PaimonConstants.S3_ENDPOINT);
map.put(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, PaimonConstants.S3_ACCESS_KEY);
map.put(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, PaimonConstants.S3_SECRET_KEY);
// OSS
map.put(OSSProperties.GRAVITINO_OSS_ENDPOINT, PaimonConstants.OSS_ENDPOINT);
map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_ID, PaimonConstants.OSS_ACCESS_KEY);
map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_SECRET, PaimonConstants.OSS_SECRET_KEY);
GRAVITINO_CONFIG_TO_PAIMON = Collections.unmodifiableMap(map);
}

/**
* Converts Gravitino properties to Paimon catalog properties, the common transform logic shared
* by Spark connector, Gravitino Paimon catalog.
*
* @param gravitinoProperties a map of Gravitino configuration properties.
* @return a map containing Paimon catalog properties.
*/
public static Map<String, String> toPaimonCatalogProperties(
Map<String, String> gravitinoProperties) {
Map<String, String> paimonProperties = new HashMap<>();
gravitinoProperties.forEach(
(key, value) -> {
if (GRAVITINO_CONFIG_TO_PAIMON.containsKey(key)) {
paimonProperties.put(GRAVITINO_CONFIG_TO_PAIMON.get(key), value);
}
});
return paimonProperties;
}

/**
* Get catalog backend name from Gravitino catalog properties.
*
* @param catalogProperties a map of Gravitino catalog properties.
* @return catalog backend name.
*/
public static String getCatalogBackendName(Map<String, String> catalogProperties) {
String backendName = catalogProperties.get(PaimonConstants.CATALOG_BACKEND_NAME);
if (backendName != null) {
return backendName;
}

String catalogBackend = catalogProperties.get(PaimonConstants.CATALOG_BACKEND);
return Optional.ofNullable(catalogBackend)
.map(s -> s.toLowerCase(Locale.ROOT))
.orElseThrow(
() ->
new UnsupportedOperationException(
String.format("Unsupported catalog backend: %s", catalogBackend)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,22 @@
*/
public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata {

@VisibleForTesting public static final String GRAVITINO_CATALOG_BACKEND = "catalog-backend";
public static final String PAIMON_METASTORE = "metastore";
public static final String WAREHOUSE = "warehouse";
public static final String URI = "uri";
public static final String GRAVITINO_JDBC_USER = "jdbc-user";
public static final String PAIMON_JDBC_USER = "jdbc.user";
public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password";
public static final String PAIMON_JDBC_PASSWORD = "jdbc.password";
public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver";
@VisibleForTesting
public static final String GRAVITINO_CATALOG_BACKEND = PaimonConstants.CATALOG_BACKEND;

public static final String PAIMON_METASTORE = PaimonConstants.METASTORE;
public static final String WAREHOUSE = PaimonConstants.WAREHOUSE;
public static final String URI = PaimonConstants.URI;
public static final String GRAVITINO_JDBC_USER = PaimonConstants.GRAVITINO_JDBC_USER;
public static final String PAIMON_JDBC_USER = PaimonConstants.PAIMON_JDBC_USER;
public static final String GRAVITINO_JDBC_PASSWORD = PaimonConstants.GRAVITINO_JDBC_PASSWORD;
public static final String PAIMON_JDBC_PASSWORD = PaimonConstants.PAIMON_JDBC_PASSWORD;
public static final String GRAVITINO_JDBC_DRIVER = PaimonConstants.GRAVITINO_JDBC_DRIVER;

// S3 properties needed by Paimon
public static final String S3_ENDPOINT = "s3.endpoint";
public static final String S3_ACCESS_KEY = "s3.access-key";
public static final String S3_SECRET_KEY = "s3.secret-key";
public static final String S3_ENDPOINT = PaimonConstants.S3_ENDPOINT;
public static final String S3_ACCESS_KEY = PaimonConstants.S3_ACCESS_KEY;
public static final String S3_SECRET_KEY = PaimonConstants.S3_SECRET_KEY;

public static final Map<String, String> GRAVITINO_CONFIG_TO_PAIMON =
ImmutableMap.of(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

import static org.apache.gravitino.meta.AuditInfo.EMPTY;

import com.google.common.collect.Maps;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import lombok.ToString;
Expand Down Expand Up @@ -79,15 +81,18 @@ protected PaimonSchema internalBuild() {
PaimonSchema paimonSchema = new PaimonSchema();
paimonSchema.name = name;

Map<String, String> propertiesWithComment =
Maps.newHashMap(Optional.ofNullable(properties).orElse(new HashMap<>()));
if (comment != null) {
paimonSchema.comment = comment;
propertiesWithComment.put(PaimonSchemaPropertiesMetadata.COMMENT, comment);
} else if (properties != null) {
paimonSchema.comment = properties.get(PaimonSchemaPropertiesMetadata.COMMENT);
} else {
paimonSchema.comment = null;
}

paimonSchema.properties = properties;
paimonSchema.properties = propertiesWithComment;
paimonSchema.auditInfo = auditInfo;
return paimonSchema;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
*/
public class PaimonSchemaPropertiesMetadata extends BasePropertiesMetadata {

public static final String COMMENT = "comment";
public static final String COMMENT = PaimonConstants.COMMENT;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have this constant copied into PaimonSchemaPropertiesMetadata?


private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@
*/
public class PaimonTablePropertiesMetadata extends BasePropertiesMetadata {

public static final String COMMENT = "comment";
public static final String OWNER = "owner";
public static final String BUCKET_KEY = "bucket-key";
public static final String MERGE_ENGINE = "merge-engine";
public static final String SEQUENCE_FIELD = "sequence.field";
public static final String ROWKIND_FIELD = "rowkind.field";
public static final String PRIMARY_KEY = "primary-key";
public static final String PARTITION = "partition";
public static final String COMMENT = PaimonConstants.COMMENT;
public static final String OWNER = PaimonConstants.OWNER;
public static final String BUCKET_KEY = PaimonConstants.BUCKET_KEY;
public static final String MERGE_ENGINE = PaimonConstants.MERGE_ENGINE;
public static final String SEQUENCE_FIELD = PaimonConstants.SEQUENCE_FIELD;
public static final String ROWKIND_FIELD = PaimonConstants.ROWKIND_FIELD;
public static final String PRIMARY_KEY = PaimonConstants.PRIMARY_KEY;
public static final String PARTITION = PaimonConstants.PARTITION;
Comment on lines +38 to +45
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above. Why do we replicate these constants?


private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.gravitino.Config;
import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants;
import org.apache.gravitino.config.ConfigBuilder;
import org.apache.gravitino.config.ConfigConstants;
import org.apache.gravitino.config.ConfigEntry;
import org.apache.gravitino.connector.PropertyEntry;

public class PaimonOSSFileSystemConfig extends Config {
// OSS related properties
public static final String OSS_ENDPOINT = "fs.oss.endpoint";
public static final String OSS_ACCESS_KEY = "fs.oss.accessKeyId";
public static final String OSS_SECRET_KEY = "fs.oss.accessKeySecret";
public static final String OSS_ENDPOINT = PaimonConstants.OSS_ENDPOINT;
public static final String OSS_ACCESS_KEY = PaimonConstants.OSS_ACCESS_KEY;
public static final String OSS_SECRET_KEY = PaimonConstants.OSS_SECRET_KEY;

public PaimonOSSFileSystemConfig(Map<String, String> properties) {
super(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.gravitino.Config;
import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants;
import org.apache.gravitino.config.ConfigBuilder;
import org.apache.gravitino.config.ConfigConstants;
import org.apache.gravitino.config.ConfigEntry;
import org.apache.gravitino.connector.PropertyEntry;

public class PaimonS3FileSystemConfig extends Config {
// S3 related properties
public static final String S3_ENDPOINT = "s3.endpoint";
public static final String S3_ACCESS_KEY = "s3.access-key";
public static final String S3_SECRET_KEY = "s3.secret-key";
public static final String S3_ENDPOINT = PaimonConstants.S3_ENDPOINT;
public static final String S3_ACCESS_KEY = PaimonConstants.S3_ACCESS_KEY;
public static final String S3_SECRET_KEY = PaimonConstants.S3_SECRET_KEY;

public PaimonS3FileSystemConfig(Map<String, String> properties) {
super(false);
Expand Down
Loading
Loading