From 97366d9bd109d1c04ca8dd7e8e2696463d4d8ac7 Mon Sep 17 00:00:00 2001 From: Kamran Kowsari Date: Mon, 4 Jun 2018 11:30:27 -0400 Subject: [PATCH] bug fixes --- RMDL/Download/Download_WOS.py | 2 +- RMDL/RMDL_Text.py | 81 +++++++++++++++++++++++++++++++---- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/RMDL/Download/Download_WOS.py b/RMDL/Download/Download_WOS.py index 610dfe4..efc6dac 100644 --- a/RMDL/Download/Download_WOS.py +++ b/RMDL/Download/Download_WOS.py @@ -55,7 +55,7 @@ def _progress(count, block_size, total_size): float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() - filepath, _ = urllib.urlretrieve(DATA_URL, filepath)#, reporthook=_progress) + filepath, _ = urllib.urlretrieve(DATA_URL, filepath, reporthook=_progress) print('Downloaded', filename) diff --git a/RMDL/RMDL_Text.py b/RMDL/RMDL_Text.py index 5aa700c..4c4b12e 100644 --- a/RMDL/RMDL_Text.py +++ b/RMDL/RMDL_Text.py @@ -3,7 +3,7 @@ RMDL: Random Multimodel Deep Learning for Classification * Copyright (C) 2018 Kamran Kowsari - * Last Update: May 11, 2018 + * Last Update: June 04, 2018 * This file is part of RMDL project, University of Virginia. * Free to use, change, share and distribute source code of RMDL * Refrenced paper : RMDL: Random Multimodel Deep Learning for Classification @@ -34,15 +34,78 @@ def Text_Classification(x_train, y_train, x_test, y_test, batch_size=128, min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024, min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32, max_nodes_rnn=128, min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512, - random_state=42, random_optimizor=True, dropout=0.5,no_of_classes = 0): + random_state=42, random_optimizor=True, dropout=0.5,no_of_classes=0): + + """ + Text_Classification(x_train, y_train, x_test, y_test, batch_size=128, + EMBEDDING_DIM=50,MAX_SEQUENCE_LENGTH = 500, MAX_NB_WORDS = 75000, + GloVe_dir="", GloVe_file = "glove.6B.50d.txt", + sparse_categorical=True, random_deep=[3, 3, 3], epochs=[500, 500, 500], plot=False, + min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024, + min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32, max_nodes_rnn=128, + min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512, + random_state=42, random_optimizor=True, dropout=0.5): + + Parameters + ---------- + batch_size : Integer, , optional + Number of samples per gradient update. If unspecified, it will default to 128 + MAX_NB_WORDS: int, optional + Maximum number of unique words in datasets, it will default to 75000. + GloVe_dir: String, optional + Address of GloVe or any pre-trained directory, it will default to null which glove.6B.zip will be download. + GloVe_dir: String, optional + Which version of GloVe or pre-trained word emending will be used, it will default to glove.6B.50d.txt. + NOTE: if you use other version of GloVe EMBEDDING_DIM must be same dimensions. + sparse_categorical: bool. + When target's dataset is (n,1) should be True, it will default to True. + random_deep: array of int [3], optional + Number of ensembled model used in RMDL random_deep[0] is number of DNN, random_deep[1] is number of RNN, random_deep[0] is number of CNN, it will default to [3, 3, 3]. + epochs: array of int [3], optional + Number of epochs in each ensembled model used in RMDL epochs[0] is number of epochs used in DNN, epochs[1] is number of epochs used in RNN, epochs[0] is number of epochs used in CNN, it will default to [500, 500, 500]. + plot: bool, optional + True: shows confusion matrix and accuracy and loss + min_hidden_layer_dnn: Integer, optional + Lower Bounds of hidden layers of DNN used in RMDL, it will default to 1. + max_hidden_layer_dnn: Integer, optional + Upper bounds of hidden layers of DNN used in RMDL, it will default to 8. + min_nodes_dnn: Integer, optional + Lower bounds of nodes in each layer of DNN used in RMDL, it will default to 128. + max_nodes_dnn: Integer, optional + Upper bounds of nodes in each layer of DNN used in RMDL, it will default to 1024. + min_hidden_layer_rnn: Integer, optional + Lower Bounds of hidden layers of RNN used in RMDL, it will default to 1. + min_hidden_layer_rnn: Integer, optional + Upper Bounds of hidden layers of RNN used in RMDL, it will default to 5. + min_nodes_rnn: Integer, optional + Lower bounds of nodes (LSTM or GRU) in each layer of RNN used in RMDL, it will default to 32. + max_nodes_rnn: Integer, optional + Upper bounds of nodes (LSTM or GRU) in each layer of RNN used in RMDL, it will default to 128. + min_hidden_layer_cnn: Integer, optional + Lower Bounds of hidden layers of CNN used in RMDL, it will default to 3. + max_hidden_layer_cnn: Integer, optional + Upper Bounds of hidden layers of CNN used in RMDL, it will default to 10. + min_nodes_cnn: Integer, optional + Lower bounds of nodes (2D convolution layer) in each layer of CNN used in RMDL, it will default to 128. + min_nodes_cnn: Integer, optional + Upper bounds of nodes (2D convolution layer) in each layer of CNN used in RMDL, it will default to 512. + random_state : Integer, optional + RandomState instance or None, optional (default=None) + If Integer, random_state is the seed used by the random number generator; + random_optimizor : bool, optional + If False, all models use adam optimizer. If True, all models use random optimizers. it will default to True + dropout: Float, optional + between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. + + """ np.random.seed(random_state) glove_directory = GloVe_dir GloVe_file = GloVe_file - + print("Done1") GloVe_needed = random_deep[1] != 0 or random_deep[2] != 0 @@ -107,15 +170,15 @@ def _one_hot_values(labels_data): score = [] if no_of_classes==0: - - #checking no_of_classes + #checking no_of_classes #np.max(data)+1 will not work for one_hot encoding labels - - number_of_classes = len(y_train[0]) - print(number_of_classes) + if sparse_categorical: + number_of_classes = np.max(y_train) + 1 + else: + number_of_classes = len(y_train[0]) else: number_of_classes = no_of_classes - print(number_of_classes) + print(number_of_classes) i = 0