From 9b13f55a908eaba184e7a4a51558cf0e89083f68 Mon Sep 17 00:00:00 2001 From: badrinathpatchikolla Date: Tue, 4 Oct 2022 15:33:56 +0530 Subject: [PATCH 1/2] Added Test Cases for Deserializer Sample Options #61 --- .github/workflows/almaren-framework.yml | 40 +++++++++++++++++++ README.md | 2 +- .../music/of/the/ainur/almaren/Test.scala | 18 +++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/almaren-framework.yml diff --git a/.github/workflows/almaren-framework.yml b/.github/workflows/almaren-framework.yml new file mode 100644 index 00000000..7037615f --- /dev/null +++ b/.github/workflows/almaren-framework.yml @@ -0,0 +1,40 @@ +name: Almaren Framework +on: [push, pull_request] + +jobs: + Build: + runs-on: ubuntu-20.04 + services: + postgres: + image: postgres:13.4 + env: + POSTGRES_PASSWORD: postgres + POSTGRES_HOST_AUTH_METHOD: trust + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - name : Check out repository code + uses: actions/checkout@v2 + - name: Setup JDK + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: 8 + cache: sbt + - name: Build and test scala version + run: | + PGPASSWORD="postgres" psql -c 'create database almaren;' -U postgres -h localhost + PGPASSWORD="postgres" psql -c "ALTER USER postgres PASSWORD 'foo' ;" -U postgres -h localhost + PGPASSWORD="postgres" psql -c 'create role runner;' -U postgres -h localhost + PGPASSWORD="postgres" psql -c 'ALTER ROLE "runner" WITH LOGIN SUPERUSER INHERIT CREATEDB CREATEROLE REPLICATION;' -U postgres -h localhost + sbt ++2.12.10 test + rm -rf "$HOME/.ivy2/local" || true + find $HOME/Library/Caches/Coursier/v1 -name "ivydata-*.properties" -delete || true + find $HOME/.ivy2/cache -name "ivydata-*.properties" -delete || true + find $HOME/.cache/coursier/v1 -name "ivydata-*.properties" -delete || true + find $HOME/.sbt -name "*.lock" -delete || true \ No newline at end of file diff --git a/README.md b/README.md index e7bf485e..4b925a7e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The Almaren Framework provides a simplified consistent minimalistic layer over Apache Spark, while still allowing you to take advantage of native Apache Spark features. You can even combine it with standard Spark code. -[![Build Status](https://travis-ci.com/mantovani/almaren-framework.svg?branch=master)](https://travis-ci.com/mantovani/almaren-framework) +[![Build Status](https://github.com/music-of-the-ainur/almaren-framework/actions/workflows/almaren-framework.yml/badge.svg)](https://github.com/music-of-the-ainur/almaren-framework/actions/workflows/almaren-framework.yml) [![Gitter Community](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/music-of-the-ainur/community) ## Table of Contents diff --git a/src/test/scala/com/github/music/of/the/ainur/almaren/Test.scala b/src/test/scala/com/github/music/of/the/ainur/almaren/Test.scala index d28039d4..1ed76a20 100644 --- a/src/test/scala/com/github/music/of/the/ainur/almaren/Test.scala +++ b/src/test/scala/com/github/music/of/the/ainur/almaren/Test.scala @@ -113,6 +113,7 @@ class Test extends FunSuite with BeforeAndAfter { deserializerXmlTest() deserializerAvroTest() deserializerCsvTest() + deserializerCsvSampleOptionsTest() testInferSchemaJsonColumn() testInferSchemaDataframe(moviesDf) @@ -435,6 +436,23 @@ class Test extends FunSuite with BeforeAndAfter { test(newCsvSchemaDf, csvSchemaDf, "Deserialize CSV Schema") } + def deserializerCsvSampleOptionsTest(): Unit = { + val df = Seq( + ("John,Chris", "Smith", "London"), + ("David,Michael", "Jones", "India"), + ("Joseph,Mike", "Lee", "Russia"), + ("Chris,Tony", "Brown", "Indonesia"), + ).toDF("first_name", "last_name", "country") + val newCsvDF = almaren.builder + .sourceDataFrame(df) + .deserializer("CSV", "first_name", options = Map("header" -> "false", + "samplingRatio" -> "0.5", + "samplingMaxLines" -> "1")) + .batch + val csvDf = spark.read.parquet("src/test/resources/data/csvDeserializer.parquet") + test(newCsvDF, csvDf, "Deserialize CSV Sample Options") + } + def deserializerXmlTest(): Unit = { val xmlStr = Seq( """ From 09b39eb32bd8d167530ea829bbe331450f67340d Mon Sep 17 00:00:00 2001 From: badrinathpatchikolla Date: Tue, 4 Oct 2022 15:50:33 +0530 Subject: [PATCH 2/2] Removed Travis file --- .travis.yml | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3f5480d9..00000000 --- a/.travis.yml +++ /dev/null @@ -1,35 +0,0 @@ -language: scala - -jdk: openjdk8 - -script: - - sbt +test - -install: - - | - # update this only when sbt-the-bash-script needs to be updated - export SBT_LAUNCHER=1.4.8 - export SBT_OPTS="-Dfile.encoding=UTF-8" - curl -L --silent "https://github.com/sbt/sbt/releases/download/v$SBT_LAUNCHER/sbt-$SBT_LAUNCHER.tgz" > $HOME/sbt.tgz - tar zxf $HOME/sbt.tgz -C $HOME - sudo rm /usr/local/bin/sbt - sudo ln -s $HOME/sbt/bin/sbt /usr/local/bin/sbt - -cache: - directories: - - $HOME/.cache/coursier - - $HOME/.ivy2/cache - - $HOME/.sbt - -before_cache: - - rm -fv $HOME/.ivy2/.sbt.ivy.lock - - find $HOME/.ivy2/cache -name "ivydata-*.properties" -print -delete - - find $HOME/.sbt -name "*.lock" -print -delete - -services: - - postgresql - -before_script: - - psql -c 'create database almaren;' -U postgres - - psql -c "ALTER USER postgres PASSWORD 'foo' ;" -