Skip to content

Commit

Permalink
Add Flink-Hive use case in Jupyter notebook (apache#91)
Browse files Browse the repository at this point in the history
This commit adds a Flink-Hive use case to the Jupyter notebook
  • Loading branch information
TungYuChiang committed Nov 5, 2024
1 parent 440ffd2 commit 833c4e0
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 3 deletions.
4 changes: 4 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ services:
entrypoint: /bin/bash /tmp/gravitino/init.sh
environment:
- HADOOP_CLASSPATH=/tmp/gravitino/packages/hadoop-2.7.3/etc/hadoop:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/common/lib/*:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/common/*:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/hdfs:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/hdfs/lib/*:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/hdfs/*:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/yarn/lib/*:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/yarn/*:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/mapreduce/lib/*:/tmp/gravitino/packages/hadoop-2.7.3/share/hadoop/mapreduce/*:/tmp/gravitino/packages/contrib/capacity-scheduler/*.jar
- NB_USER=my-username
- GRANT_SUDO=yes
- CHOWN_HOME=yes
user: root
depends_on:
hive :
condition: service_healthy
Expand Down
33 changes: 33 additions & 0 deletions init/jupyter/gravitino-flink-hive-example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,39 @@
"## Setting Up PyFlink with Hive and Gravitino Connectors"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b13b0b0b-6aca-4cbb-8771-a10f4c79a017",
"metadata": {},
"outputs": [],
"source": [
"!sudo apt-get update && sudo apt-get install -y openjdk-17-jdk"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c66b8f51-9e9e-41b6-b815-50c96a6d6226",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-17-openjdk-arm64\"\n",
"os.environ[\"PATH\"] = f\"{os.environ['JAVA_HOME']}/bin:\" + os.environ[\"PATH\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a3c975dc-afa1-4057-9990-6d4b8c06749b",
"metadata": {},
"outputs": [],
"source": [
"!python3 -m pip install apache-flink"
]
},
{
"cell_type": "code",
"execution_count": 1,
Expand Down
19 changes: 16 additions & 3 deletions init/jupyter/jupyter-dependency.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,25 @@ fi

FLINK_HIVE_CONNECTOR_JAR="https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-hive-2.3.10_2.12/1.20.0/flink-sql-connector-hive-2.3.10_2.12-1.20.0.jar"
FLINK_HIVE_CONNECTOR_MD5="${FLINK_HIVE_CONNECTOR_JAR}.md5"
download_and_verify "${FLINK_HIVE_CONNECTOR_JAR}" "${FLINK_HIVE_CONNECTOR_MD5}" "${script_dir}/packages"
download_and_verify "${FLINK_HIVE_CONNECTOR_JAR}" "${FLINK_HIVE_CONNECTOR_MD5}" "${jupyter_dir}"

GRAVITINO_FLINK_JAR="https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-flink-1.18_2.12/0.6.1-incubating/gravitino-flink-1.18_2.12-0.6.1-incubating.jar"
GRAVITINO_FLINK_MD5="${GRAVITINO_FLINK_JAR}.md5"
download_and_verify "${GRAVITINO_FLINK_JAR}" "${GRAVITINO_FLINK_MD5}" "${script_dir}/packages"
download_and_verify "${GRAVITINO_FLINK_JAR}" "${GRAVITINO_FLINK_MD5}" "${jupyter_dir}"

GRAVITINO_FLINK_CONNECTOR_RUNTIME_JAR="https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-flink-connector-runtime-1.18_2.12/0.6.1-incubating/gravitino-flink-connector-runtime-1.18_2.12-0.6.1-incubating.jar"
GRAVITINO_FLINK_CONNECTOR_RUNTIME_MD5="${GRAVITINO_FLINK_CONNECTOR_RUNTIME_JAR}.md5"
download_and_verify "${GRAVITINO_FLINK_CONNECTOR_RUNTIME_JAR}" "${GRAVITINO_FLINK_CONNECTOR_RUNTIME_MD5}" "${script_dir}/packages"
download_and_verify "${GRAVITINO_FLINK_CONNECTOR_RUNTIME_JAR}" "${GRAVITINO_FLINK_CONNECTOR_RUNTIME_MD5}" "${jupyter_dir}"


HADOOP_VERSION="2.7.3"
HADOOP_URL="https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz"
echo "Downloading Hadoop ${HADOOP_VERSION}..."

curl -fLo "${jupyter_dir}/packages/hadoop-${HADOOP_VERSION}.tar.gz" "$HADOOP_URL" || { echo "Failed to download Hadoop ${HADOOP_VERSION}"; exit 1; }
echo "Extracting Hadoop ${HADOOP_VERSION}..."

tar -xzf "${jupyter_dir}/packages/hadoop-${HADOOP_VERSION}.tar.gz" -C "${jupyter_dir}/packages"
rm "${jupyter_dir}/packages/hadoop-${HADOOP_VERSION}.tar.gz"

echo "Hadoop ${HADOOP_VERSION} downloaded and extracted to ${jupyter_dir}/packages/hadoop-${HADOOP_VERSION}"

0 comments on commit 833c4e0

Please sign in to comment.