diff --git a/CHANGELOG.md b/CHANGELOG.md index 49e0b734..ea4aa471 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,16 @@ # Changelog -## [1.0](https://github.com/rapidsai/gQuant/tree/1.0) (2020-12-17) +## [v1.0.1](https://github.com/rapidsai/gQuant/tree/v1.0.1) (2021-01-20) -[Full Changelog](https://github.com/rapidsai/gQuant/compare/0.5...1.0) +[Full Changelog](https://github.com/rapidsai/gQuant/compare/v1.0.0...v1.0.1) + +**Merged pull requests:** + +- \[REVIEW\] Simple external plugin example [\#113](https://github.com/rapidsai/gQuant/pull/113) ([yidong72](https://github.com/yidong72)) + +## [v1.0.0](https://github.com/rapidsai/gQuant/tree/v1.0.0) (2020-12-30) + +[Full Changelog](https://github.com/rapidsai/gQuant/compare/0.5...v1.0.0) **Closed issues:** @@ -15,6 +23,7 @@ - \[REVIEW\]gQuant plugin implementation [\#112](https://github.com/rapidsai/gQuant/pull/112) ([yidong72](https://github.com/yidong72)) - Gpuciscripts clean and update [\#111](https://github.com/rapidsai/gQuant/pull/111) ([msadang](https://github.com/msadang)) +- \[REVIEW\] gQuant 1.0 [\#110](https://github.com/rapidsai/gQuant/pull/110) ([yidong72](https://github.com/yidong72)) - Streamz gQuant example 2 [\#109](https://github.com/rapidsai/gQuant/pull/109) ([yidong72](https://github.com/yidong72)) - Revert "Streamz gQuant example" [\#108](https://github.com/rapidsai/gQuant/pull/108) ([yidong72](https://github.com/yidong72)) - Streamz gQuant example [\#107](https://github.com/rapidsai/gQuant/pull/107) ([yidong72](https://github.com/yidong72)) @@ -22,6 +31,7 @@ - Nemo and xgboost integration [\#103](https://github.com/rapidsai/gQuant/pull/103) ([yidong72](https://github.com/yidong72)) - FIX Update change log check [\#102](https://github.com/rapidsai/gQuant/pull/102) ([mike-wendt](https://github.com/mike-wendt)) - \[REVIEW\] Update CI scripts to remove references to master \[skip ci\] [\#99](https://github.com/rapidsai/gQuant/pull/99) ([dillon-cullinan](https://github.com/dillon-cullinan)) +- \[skip ci\] Update master references for main branch [\#98](https://github.com/rapidsai/gQuant/pull/98) ([ajschmidt8](https://github.com/ajschmidt8)) - \[REVIEW\]gQuant UI, first version [\#89](https://github.com/rapidsai/gQuant/pull/89) ([yidong72](https://github.com/yidong72)) ## [0.5](https://github.com/rapidsai/gQuant/tree/0.5) (2020-07-10) diff --git a/README.md b/README.md index 37cac9ed..09566519 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,10 @@ To install JupyterLab plugin, install the following dependence libraries: ```bash conda install nodejs ipywidgets ``` +Build the ipywidgets Jupyterlab plugin +```bash +jupyter labextension install @jupyter-widgets/jupyterlab-manager@2.0 +``` Then install the gquantlab lib: ```bash pip install gquantlab==0.1.1 diff --git a/docker/build.sh b/docker/build.sh index 941779e2..39980f60 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -39,7 +39,7 @@ echo -e "\nPlease, select your CUDA version:\n" \ read -p "Enter your option and hit return [1]-3: " CUDA_VERSION -RAPIDS_VERSION="0.14.1" +RAPIDS_VERSION="0.17.0" CUDA_VERSION=${CUDA_VERSION:-1} case $CUDA_VERSION in @@ -158,7 +158,7 @@ RUN conda install -y -c conda-forge jupyterlab'<3.0.0' RUN conda install -y -c conda-forge python-graphviz bqplot nodejs ipywidgets \ pytables mkl numexpr pydot flask pylint flake8 autopep8 -RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager --no-build +RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager@2.0 --no-build RUN jupyter labextension install bqplot --no-build #RUN jupyter labextension install jupyterlab-nvdashboard --no-build RUN jupyter lab build && jupyter lab clean @@ -169,7 +169,7 @@ RUN pip install jupyterlab-nvdashboard RUN jupyter labextension install jupyterlab-nvdashboard ## install the dask extension -RUN pip install dask_labextension +RUN pip install "dask_labextension<5.0.0" RUN jupyter labextension install dask-labextension RUN jupyter serverextension enable dask_labextension @@ -289,9 +289,10 @@ index 901a79af..4eb76f95 100644 @@ -14,4 +14,4 @@ unidecode webdataset kaldi-python-io - librosa<=0.7.2 +-librosa<=0.7.2 ++librosa<=0.8.0 -numba<=0.48 -+numba==0.49.1 ++numba==0.52.0 diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index 885adf3e..0e4e44e2 100644 --- a/requirements/requirements_nlp.txt diff --git a/external/README.md b/external/README.md new file mode 100644 index 00000000..50784a72 --- /dev/null +++ b/external/README.md @@ -0,0 +1,51 @@ +## Simple External Plugin Example + +This is a simple example to show how to write an external gQuant plugin. gQuant take advantage of the `entry point` inside the `setup.py` file to register the plugin. gQuant can discover all the plugins that has the entry point group name `gquant.plugin`. Check the `setup.py` file to see details. + +### Create an new Python enviroment +```bash +conda create -n test python=3.8 +``` + +### Install the gQuant lib +To install the gQuant graph computation library, first install the dependence libraries: +```bash +pip install dask[dataframe] distributed networkx +conda install python-graphviz ruamel.yaml numpy pandas +``` +Then install gquant lib: +```bash +pip install gquant +``` + +### Install the gQuantlab plugin +To install JupyterLab plugin, install the following dependence libraries: +```bash +conda install nodejs ipywidgets +``` +Build the ipywidgets Jupyterlab plugin +```bash +jupyter labextension install @jupyter-widgets/jupyterlab-manager@2.0 +``` +Then install the gquantlab lib: +```bash +pip install gquantlab +``` +If you launch the JupyterLab, it will prompt to build the new plugin. You can +explicitly build it by: +```bash +jupyter lab build +``` + +### Install the external example plugin +To install the external plugin, in the plugin diretory, run following command +```bash +pip install . +``` + +### Launch the Jupyter lab +After launching the JupyterLab by, +```bash +jupyter-lab --allow-root --ip=0.0.0.0 --no-browser --NotebookApp.token='' +``` +You can see the `DistanceNode` and `PointNode` under the name `custom_node` in the menu. diff --git a/external/example/__init__.py b/external/example/__init__.py new file mode 100644 index 00000000..532b5555 --- /dev/null +++ b/external/example/__init__.py @@ -0,0 +1,77 @@ +from .distanceNode import DistanceNode +from .pointNode import PointNode +import pandas as pd +import numpy as np +from .client import validation, display # noqa: F40 +from gquant.dataframe_flow._node_flow import register_validator +from gquant.dataframe_flow._node_flow import register_copy_function + + +def _validate_df(df_to_val, ref_cols, obj): + '''Validate a pandas DataFrame. + + :param df_to_val: A dataframe typically of type pd.DataFrame + :param ref_cols: Dictionary of column names and their expected types. + :returns: True or False based on matching all columns in the df_to_val + and columns spec in ref_cols. + :raises: Exception - Raised when invalid dataframe length or unexpected + number of columns. TODO: Create a ValidationError subclass. + + ''' + if (isinstance(df_to_val, pd.DataFrame) and len(df_to_val) == 0): + err_msg = 'Node "{}" produced empty output'.format(obj.uid) + raise Exception(err_msg) + + if not isinstance(df_to_val, pd.DataFrame): + return True + + i_cols = df_to_val.columns + if len(i_cols) != len(ref_cols): + print("expect %d columns, only see %d columns" + % (len(ref_cols), len(i_cols))) + print("ref:", ref_cols) + print("columns", i_cols) + raise Exception("not valid for node %s" % (obj.uid)) + + for col in ref_cols.keys(): + if col not in i_cols: + print("error for node %s, column %s is not in the required " + "output df" % (obj.uid, col)) + return False + + if ref_cols[col] is None: + continue + + err_msg = "for node {} type {}, column {} type {} "\ + "does not match expected type {}".format( + obj.uid, type(obj), col, df_to_val[col].dtype, + ref_cols[col]) + + if ref_cols[col] == 'category': + # comparing pandas.core.dtypes.dtypes.CategoricalDtype to + # numpy.dtype causes TypeError. Instead, let's compare + # after converting all types to their string representation + # d_type_tuple = (pd.core.dtypes.dtypes.CategoricalDtype(),) + d_type_tuple = (str(pd.CategoricalDtype()),) + elif ref_cols[col] == 'date': + # Cudf read_csv doesn't understand 'datetime64[ms]' even + # though it reads the data in as 'datetime64[ms]', but + # expects 'date' as dtype specified passed to read_csv. + d_type_tuple = ('datetime64[ms]', 'date', 'datetime64[ns]') + else: + d_type_tuple = (str(np.dtype(ref_cols[col])),) + + if (str(df_to_val[col].dtype) not in d_type_tuple): + print("ERROR: {}".format(err_msg)) + # Maybe raise an exception here and have the caller + # try/except the validation routine. + return False + return True + + +def copy_df(df_obj): + return df_obj.copy(deep=False) + + +register_validator(pd.DataFrame, _validate_df) +register_copy_function(pd.DataFrame, copy_df) \ No newline at end of file diff --git a/external/example/client.py b/external/example/client.py new file mode 100644 index 00000000..c24d98b1 --- /dev/null +++ b/external/example/client.py @@ -0,0 +1,26 @@ + +display_fun = """ + const columnKeys = Object.keys(metaObj); + let header = ''; + if (columnKeys.length > 0) { + header += '
Column Name | '; + for (let i = 0; i < columnKeys.length; i++) { + header += `${columnKeys[i]} | `; + } + header += '
---|---|
Type | '; + for (let i = 0; i < columnKeys.length; i++) { + header += `${metaObj[columnKeys[i]]} | `; + } + header += '