From ac68bba0d336c3eca0cd32d5a85b0a31a02a054b Mon Sep 17 00:00:00 2001 From: badrinathpatchikolla <57659308+badrinathpatchikolla@users.noreply.github.com> Date: Fri, 20 May 2022 11:54:59 +0530 Subject: [PATCH 1/9] Added Spark 3.2 Changes --- README.md | 2 +- build.sbt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1288b69..e7fe3ae 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ libraryDependencies += "com.github.music-of-the-ainur" %% "http-almaren" % "1.2. ``` ``` -spark-shell --master "local[*]" --packages "com.github.music-of-the-ainur:almaren-framework_2.12:0.9.3-$SPARK_VERSION,com.github.music-of-the-ainur:http-almaren_2.12:1.2.4-$SPARK_VERSION" +spark-shell --master "local[*]" --packages "com.github.music-of-the-ainur:almaren-framework_2.12:0.9.4-$SPARK_VERSION,com.github.music-of-the-ainur:http-almaren_2.12:1.2.4-$SPARK_VERSION" ``` ## Table of Contents diff --git a/build.sbt b/build.sbt index 42f3754..c173d0a 100644 --- a/build.sbt +++ b/build.sbt @@ -5,7 +5,7 @@ lazy val scala212 = "2.12.15" ThisBuild / scalaVersion := scala212 -val sparkVersion = "3.1.3" +val sparkVersion = "3.2.1" val majorVersionReg = "([0-9]+\\.[0-9]+).{0,}".r val majorVersionReg(majorVersion) = sparkVersion @@ -15,7 +15,7 @@ scalacOptions ++= Seq("-deprecation", "-feature") libraryDependencies ++= Seq( "org.apache.spark" %% "spark-core" % sparkVersion % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", - "com.github.music-of-the-ainur" %% "almaren-framework" % s"0.9.3-${majorVersion}" % "provided", + "com.github.music-of-the-ainur" %% "almaren-framework" % s"0.9.4-${majorVersion}" % "provided", "com.lihaoyi" %% "requests" % "0.7.0", "com.typesafe.scala-logging" %% "scala-logging" % "3.9.0", "org.scalatest" %% "scalatest" % "3.0.5" % "test" From b0c4bdfaf6845522aac6a45e0dc19fb030004851 Mon Sep 17 00:00:00 2001 From: badrinathpatchikolla <57659308+badrinathpatchikolla@users.noreply.github.com> Date: Fri, 20 May 2022 12:13:51 +0530 Subject: [PATCH 2/9] Fixed Alias --- README.md | 6 +++--- .../music/of/the/ainur/almaren/http/Test.scala | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e7fe3ae..c4d5177 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ val df = almaren.builder to_json(named_struct('data',named_struct('name',firstName + " " + lastName))) as __DATA__ FROM DATA""") .http(method = "POST", threadPoolSize = 10, batchSize = 10000) - .deserializer("JSON","__BODY__",httpOutpustSchema) + .deserializer("JSON","__BODY__",httpOutpustSchema).alias("TABLE") .sql("""SELECT T.origin, D.firstName, @@ -90,7 +90,7 @@ val df = almaren.builder T.url, T.__ERROR__ as error, T.__ELAPSED_TIME__ as request_time - FROM __TABLE__ T JOIN DATA D ON d.id = t.__ID__""") + FROM TABLE T JOIN DATA D ON d.id = t.__ID__""") .batch df.show(false) @@ -115,7 +115,7 @@ Output: T.url, T.__ERROR__ as error, T.__ELAPSED_TIME__ as request_time - FROM __TABLE__ T JOIN DATA D ON d.id = t.__ID__} + FROM TABLE T JOIN DATA D ON d.id = t.__ID__} +-----------+---------+--------+---+-----------+---------------------------------------------+-----+------------+ |origin |firstName|lastName|age|status_code|url |error|request_time| diff --git a/src/test/scala/com/github/music/of/the/ainur/almaren/http/Test.scala b/src/test/scala/com/github/music/of/the/ainur/almaren/http/Test.scala index fbfe46a..59c1c39 100644 --- a/src/test/scala/com/github/music/of/the/ainur/almaren/http/Test.scala +++ b/src/test/scala/com/github/music/of/the/ainur/almaren/http/Test.scala @@ -73,8 +73,8 @@ class Test extends FunSuite with BeforeAndAfter { } tempDf - .deserializer("JSON", "__BODY__", Some(schema)) - .sql("select __ID__,data,__STATUS_CODE__ as status_code,__ELAPSED_TIME__ as elapsed_time from __TABLE__") + .deserializer("JSON", "__BODY__", Some(schema)).alias("TABLE") + .sql("select __ID__,data,__STATUS_CODE__ as status_code,__ELAPSED_TIME__ as elapsed_time from TABLE").alias("TABLE1") .dsl( """__ID__$__ID__:StringType |elapsed_time$elapsed_time:LongType @@ -83,7 +83,7 @@ class Test extends FunSuite with BeforeAndAfter { |data.age$age:LongType |data.salary$salary:DoubleType |status_code$status_code:IntegerType""".stripMargin - ) + ).alias("TABLE2") .sql( """select T.__ID__ as id , full_name , @@ -91,7 +91,7 @@ class Test extends FunSuite with BeforeAndAfter { age, salary, status_code - from __TABLE__ T join PERSON_DATA P on T.__ID__ = P.__ID__""") + from TABLE2 T join PERSON_DATA P on T.__ID__ = P.__ID__""") .batch } @@ -105,17 +105,17 @@ class Test extends FunSuite with BeforeAndAfter { method = "POST", batchSize = 3, batchDelimiter = (rows: Seq[Row]) => s"""[${rows.map(row => row.getAs[String](Alias.DataCol)).mkString(",")}]""") - .deserializer("JSON", "__BODY__", Some("`data` ARRAY>")) - .sql("select explode(arrays_zip(__ID__, data)) as vars , __STATUS_CODE__ as status_code,__ELAPSED_TIME__ as elapsed_time from __TABLE__") - .sql("select vars.__ID__ as __ID__ ,vars.data as data ,status_code, elapsed_time from __TABLE__ ") + .deserializer("JSON", "__BODY__", Some("`data` ARRAY>")).alias("TABLE") + .sql("select explode(arrays_zip(__ID__, data)) as vars , __STATUS_CODE__ as status_code,__ELAPSED_TIME__ as elapsed_time from TABLE").alias("TABLE1") + .sql("select vars.__ID__ as __ID__ ,vars.data as data ,status_code, elapsed_time from TABLE1 ").alias("TABLE2") .dsl( """__ID__$__ID__:StringType |data.country$country:StringType |data.first_name$first_name:StringType |data.last_name$last_name:StringType |status_code$status_code:IntegerType - |elapsed_time$elapsed_time:LongType""".stripMargin) - .sql("select __TABLE__.__ID__ as id ,first_name,last_name,country,status_code from __TABLE__ inner join BATCH_DATA on __TABLE__.__ID__ = BATCH_DATA.__ID__ ") + |elapsed_time$elapsed_time:LongType""".stripMargin).alias("TABLE3") + .sql("select TABLE3.__ID__ as id ,first_name,last_name,country,status_code from TABLE3 inner join BATCH_DATA on TABLE3.__ID__ = BATCH_DATA.__ID__ ") .batch val getBatchDf = spark.read.parquet("src/test/resources/data/httpBatch.parquet") From 99e769f06305ca6f1ec59bb94a0c04ae2867b1b8 Mon Sep 17 00:00:00 2001 From: sharath-sg2706 Date: Mon, 17 Oct 2022 11:18:01 +0530 Subject: [PATCH 3/9] Upgraded to spark-3.3 --- .bsp/sbt.json | 1 - .../workflows/http.almaren-githubactions.yml | 47 +++++++++++++++++++ .travis.yml | 37 --------------- build.sbt | 14 +++--- .../of/the/ainur/almaren/http/Test.scala | 3 +- 5 files changed, 57 insertions(+), 45 deletions(-) delete mode 100644 .bsp/sbt.json create mode 100644 .github/workflows/http.almaren-githubactions.yml delete mode 100644 .travis.yml diff --git a/.bsp/sbt.json b/.bsp/sbt.json deleted file mode 100644 index 35c64c6..0000000 --- a/.bsp/sbt.json +++ /dev/null @@ -1 +0,0 @@ -{"name":"sbt","version":"1.6.2","bspVersion":"2.0.0-M5","languages":["scala"],"argv":["/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java","-Xms100m","-Xmx100m","-classpath","/home/badri/.local/share/JetBrains/IdeaIC2021.3/Scala/launcher/sbt-launch.jar","-Dsbt.script=/usr/bin/sbt","xsbt.boot.Boot","-bsp"]} \ No newline at end of file diff --git a/.github/workflows/http.almaren-githubactions.yml b/.github/workflows/http.almaren-githubactions.yml new file mode 100644 index 0000000..4da3641 --- /dev/null +++ b/.github/workflows/http.almaren-githubactions.yml @@ -0,0 +1,47 @@ +name: HTTP.Almaren +on: [push, pull_request] + +jobs: + Build: + runs-on: ubuntu-20.04 + services: + postgres: + image: postgres:13.4 + env: + POSTGRES_PASSWORD: postgres + POSTGRES_HOST_AUTH_METHOD: trust + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - name : Check out repository code + uses: actions/checkout@v2 + - name: Setup JDK + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: 8 + cache: sbt + - name: Setup web environment + run: | + cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib) + cpanm Mojolicious + cpanm JSON::Parse + perl src/test/resources/script/mock_api.pl daemon -m dev -l http://\*:3000 & + - name: Build and test scala version + run: | + PGPASSWORD="postgres" psql -c 'create database almaren;' -U postgres -h localhost + PGPASSWORD="postgres" psql -c "ALTER USER postgres PASSWORD 'foo' ;" -U postgres -h localhost + PGPASSWORD="postgres" psql -c 'create role runner;' -U postgres -h localhost + PGPASSWORD="postgres" psql -c 'ALTER ROLE "runner" WITH LOGIN SUPERUSER INHERIT CREATEDB CREATEROLE REPLICATION;' -U postgres -h localhost + sbt ++2.12.15 test + sbt ++2.13.9 test + rm -rf "$HOME/.ivy2/local" || true + find $HOME/Library/Caches/Coursier/v1 -name "ivydata-*.properties" -delete || true + find $HOME/.ivy2/cache -name "ivydata-*.properties" -delete || true + find $HOME/.cache/coursier/v1 -name "ivydata-*.properties" -delete || true + find $HOME/.sbt -name "*.lock" -delete || true \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 406127c..0000000 --- a/.travis.yml +++ /dev/null @@ -1,37 +0,0 @@ -language: scala - -jdk: openjdk8 - -scala: - - 2.12.15 - -cache: - directories: - - $HOME/.cache/coursier - - $HOME/.ivy2/cache - - $HOME/.sbt - -before_cache: - - sudo apt-get -y install perl - - rm -fv $HOME/.ivy2/.sbt.ivy.lock - - find $HOME/.ivy2/cache -name "ivydata-*.properties" -print -delete - - find $HOME/.sbt -name "*.lock" -print -delete - -services: - - postgresql - - -before_script: - - cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib) - - cpanm Mojolicious - - cpanm JSON::Parse - - perl src/test/resources/script/mock_api.pl daemon -m dev -l http://\*:3000 & - -after_script: - - killall -9 perl - -script: - - sbt +test - - - diff --git a/build.sbt b/build.sbt index c173d0a..ec2eb83 100644 --- a/build.sbt +++ b/build.sbt @@ -2,10 +2,12 @@ ThisBuild / name := "http.almaren" ThisBuild / organization := "com.github.music-of-the-ainur" lazy val scala212 = "2.12.15" +lazy val scala213 = "2.13.9" -ThisBuild / scalaVersion := scala212 +crossScalaVersions := Seq(scala212,scala213) +ThisBuild / scalaVersion := scala213 -val sparkVersion = "3.2.1" +val sparkVersion = "3.3.0" val majorVersionReg = "([0-9]+\\.[0-9]+).{0,}".r val majorVersionReg(majorVersion) = sparkVersion @@ -15,10 +17,10 @@ scalacOptions ++= Seq("-deprecation", "-feature") libraryDependencies ++= Seq( "org.apache.spark" %% "spark-core" % sparkVersion % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", - "com.github.music-of-the-ainur" %% "almaren-framework" % s"0.9.4-${majorVersion}" % "provided", - "com.lihaoyi" %% "requests" % "0.7.0", - "com.typesafe.scala-logging" %% "scala-logging" % "3.9.0", - "org.scalatest" %% "scalatest" % "3.0.5" % "test" + "com.github.music-of-the-ainur" %% "almaren-framework" % s"0.9.8-${majorVersion}" % "provided", + "com.lihaoyi" %% "requests" % "0.7.1", + "com.typesafe.scala-logging" %% "scala-logging" % "3.9.5", + "org.scalatest" %% "scalatest" % "3.2.14" % "test" ) enablePlugins(GitVersioning) diff --git a/src/test/scala/com/github/music/of/the/ainur/almaren/http/Test.scala b/src/test/scala/com/github/music/of/the/ainur/almaren/http/Test.scala index 59c1c39..dfc29f3 100644 --- a/src/test/scala/com/github/music/of/the/ainur/almaren/http/Test.scala +++ b/src/test/scala/com/github/music/of/the/ainur/almaren/http/Test.scala @@ -7,8 +7,9 @@ import com.github.music.of.the.ainur.almaren.Almaren import com.github.music.of.the.ainur.almaren.builder.Core.Implicit import com.github.music.of.the.ainur.almaren.http.HTTPConn.HTTPImplicit import org.apache.spark.sql.Row +import org.scalatest.funsuite.AnyFunSuite -class Test extends FunSuite with BeforeAndAfter { +class Test extends AnyFunSuite with BeforeAndAfter { val almaren = Almaren("http-almaren") val spark: SparkSession = almaren.spark From 27631a9d7b881830e2bdaca2ece783805525d21f Mon Sep 17 00:00:00 2001 From: sharath-sg2706 Date: Mon, 17 Oct 2022 11:20:05 +0530 Subject: [PATCH 4/9] Upgraded to spark-3.3 --- .github/workflows/http.almaren-githubactions.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/http.almaren-githubactions.yml b/.github/workflows/http.almaren-githubactions.yml index 4da3641..a831daf 100644 --- a/.github/workflows/http.almaren-githubactions.yml +++ b/.github/workflows/http.almaren-githubactions.yml @@ -28,6 +28,7 @@ jobs: cache: sbt - name: Setup web environment run: | + curl -L https://cpanmin.us | perl - App::cpanminus cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib) cpanm Mojolicious cpanm JSON::Parse From 802fd55bebe694e0ce812ab9075ff24f3ce07de1 Mon Sep 17 00:00:00 2001 From: sharath-sg2706 Date: Mon, 17 Oct 2022 11:21:43 +0530 Subject: [PATCH 5/9] Upgraded to spark-3.3 --- .github/workflows/http.almaren-githubactions.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/http.almaren-githubactions.yml b/.github/workflows/http.almaren-githubactions.yml index a831daf..be63013 100644 --- a/.github/workflows/http.almaren-githubactions.yml +++ b/.github/workflows/http.almaren-githubactions.yml @@ -29,6 +29,7 @@ jobs: - name: Setup web environment run: | curl -L https://cpanmin.us | perl - App::cpanminus + cpanm -l CPAN Crypt::Eksblowfish::Bcrypt cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib) cpanm Mojolicious cpanm JSON::Parse From 471b665de76356925b9263678dad058520c8222c Mon Sep 17 00:00:00 2001 From: sharath-sg2706 Date: Mon, 17 Oct 2022 12:06:37 +0530 Subject: [PATCH 6/9] Upgraded to spark-3.3 --- .github/workflows/http.almaren-githubactions.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/http.almaren-githubactions.yml b/.github/workflows/http.almaren-githubactions.yml index be63013..548acff 100644 --- a/.github/workflows/http.almaren-githubactions.yml +++ b/.github/workflows/http.almaren-githubactions.yml @@ -46,4 +46,5 @@ jobs: find $HOME/Library/Caches/Coursier/v1 -name "ivydata-*.properties" -delete || true find $HOME/.ivy2/cache -name "ivydata-*.properties" -delete || true find $HOME/.cache/coursier/v1 -name "ivydata-*.properties" -delete || true - find $HOME/.sbt -name "*.lock" -delete || true \ No newline at end of file + find $HOME/.sbt -name "*.lock" -delete || true + killall -9 perl \ No newline at end of file From 9f2650a295aa237178d764ce90c8c58bb2b94258 Mon Sep 17 00:00:00 2001 From: sharath-sg2706 Date: Mon, 17 Oct 2022 12:23:07 +0530 Subject: [PATCH 7/9] Upgraded to spark-3.3 --- .github/workflows/http.almaren-githubactions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/http.almaren-githubactions.yml b/.github/workflows/http.almaren-githubactions.yml index 548acff..adf4c1e 100644 --- a/.github/workflows/http.almaren-githubactions.yml +++ b/.github/workflows/http.almaren-githubactions.yml @@ -29,7 +29,7 @@ jobs: - name: Setup web environment run: | curl -L https://cpanmin.us | perl - App::cpanminus - cpanm -l CPAN Crypt::Eksblowfish::Bcrypt + cpanm -nq LWP::Protocol::https cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib) cpanm Mojolicious cpanm JSON::Parse From 2a8d5268c553b8a900ec0c48f6acc1c43f5356f3 Mon Sep 17 00:00:00 2001 From: sharath-sg2706 Date: Tue, 18 Oct 2022 15:05:27 +0530 Subject: [PATCH 8/9] Updated README file --- .../workflows/http.almaren-githubactions.yml | 1 - README.md | 34 ++++++++++++++++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/.github/workflows/http.almaren-githubactions.yml b/.github/workflows/http.almaren-githubactions.yml index adf4c1e..587dd34 100644 --- a/.github/workflows/http.almaren-githubactions.yml +++ b/.github/workflows/http.almaren-githubactions.yml @@ -29,7 +29,6 @@ jobs: - name: Setup web environment run: | curl -L https://cpanmin.us | perl - App::cpanminus - cpanm -nq LWP::Protocol::https cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib) cpanm Mojolicious cpanm JSON::Parse diff --git a/README.md b/README.md index c4d5177..2600f53 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,24 @@ # HTTP Connector -[![Build Status](https://travis-ci.com/modakanalytics/http.almaren.svg?token=TEB3zRDqVUuChez9334q&branch=master)](https://travis-ci.com/modakanalytics/http.almaren) +[![Build Status](https://github.com/music-of-the-ainur/http.almaren/actions/workflows/http.almaren-githubactions.yml/badge.svg)](https://github.com/music-of-the-ainur/http.almaren/actions/workflows/http.almaren-githubactions.yml) + +To add http.almaren dependency to your sbt build: ``` -libraryDependencies += "com.github.music-of-the-ainur" %% "http-almaren" % "1.2.4-$SPARK_VERSION" +libraryDependencies += "com.github.music-of-the-ainur" %% "http-almaren" % "1.2.5-3.3" ``` +To run in spark-shell: +For scala-version(2.12): ``` -spark-shell --master "local[*]" --packages "com.github.music-of-the-ainur:almaren-framework_2.12:0.9.4-$SPARK_VERSION,com.github.music-of-the-ainur:http-almaren_2.12:1.2.4-$SPARK_VERSION" +spark-shell --master "local[*]" --packages "com.github.music-of-the-ainur:almaren-framework_2.12:0.9.8-3.3,com.github.music-of-the-ainur:http-almaren_2.12:1.2.5-3.3" +``` +For scala-version(2.13): +``` +spark-shell --master "local[*]" --packages "com.github.music-of-the-ainur:almaren-framework_2.13:0.9.8-3.3,com.github.music-of-the-ainur:http-almaren_2.13:1.2.5-3.3" ``` -## Table of Contents +## Table of Contents +- [Maven / Ivy Package Usage](#maven--ivy-package-usage) - [Methods](#methods) * [HTTP](#http) + [Example](#example) @@ -31,6 +40,23 @@ spark-shell --master "local[*]" --packages "com.github.music-of-the-ainur:almare - [Batch Delimiter](#batch-delimiter) - [Examples](#examples) + +#### Maven / Ivy Package Usage +The connector is also available from the +[Maven Central](https://mvnrepository.com/artifact/com.github.music-of-the-ainur) +repository. It can be used using the `--packages` option or the +`spark.jars.packages` configuration property. Use the following value + +| version | Connector Artifact | +|----------------------------|-------------------------------------------------------------| +| Spark 3.3.x and scala 2.13 | `com.github.music-of-the-ainur:http-almaren_2.13:1.2.5-3.3` | +| Spark 3.3.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.5-3.3` | +| Spark 3.2.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.5-3.2` | +| Spark 3.1.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.5-3.1` | +| Spark 2.4.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.5-2.4` | +| Spark 2.4.x and scala 2.11 | `com.github.music-of-the-ainur:http-almaren_2.11:1.2.5-2.4` | + + ## Methods ### HTTP From 0ad1d846dd8a4658d8a10bfef48d2713707e8fc6 Mon Sep 17 00:00:00 2001 From: sharath-sg2706 Date: Tue, 18 Oct 2022 15:08:11 +0530 Subject: [PATCH 9/9] Updated README file --- .github/workflows/http.almaren-githubactions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/http.almaren-githubactions.yml b/.github/workflows/http.almaren-githubactions.yml index 587dd34..04c3161 100644 --- a/.github/workflows/http.almaren-githubactions.yml +++ b/.github/workflows/http.almaren-githubactions.yml @@ -28,7 +28,7 @@ jobs: cache: sbt - name: Setup web environment run: | - curl -L https://cpanmin.us | perl - App::cpanminus + curl -L http://cpanmin.us | perl - --sudo App::cpanminus cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib) cpanm Mojolicious cpanm JSON::Parse