bug fixes

kk7nc · Jun 4, 2018 · 97366d9 · 97366d9
1 parent ad67e3a
commit 97366d9
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 10 deletions.
diff --git a/RMDL/Download/Download_WOS.py b/RMDL/Download/Download_WOS.py
@@ -55,7 +55,7 @@ def _progress(count, block_size, total_size):
                                                           float(count * block_size) / float(total_size) * 100.0))
             sys.stdout.flush()
 
-        filepath, _ = urllib.urlretrieve(DATA_URL, filepath)#, reporthook=_progress)
+        filepath, _ = urllib.urlretrieve(DATA_URL, filepath, reporthook=_progress)
 
         print('Downloaded', filename)
 

diff --git a/RMDL/RMDL_Text.py b/RMDL/RMDL_Text.py
@@ -3,7 +3,7 @@
 RMDL: Random Multimodel Deep Learning for Classification
 
  * Copyright (C) 2018  Kamran Kowsari <[email protected]>
- * Last Update: May 11, 2018
+ * Last Update: June 04, 2018
  * This file is part of  RMDL project, University of Virginia.
  * Free to use, change, share and distribute source code of RMDL
  * Refrenced paper : RMDL: Random Multimodel Deep Learning for Classification
@@ -34,15 +34,78 @@ def Text_Classification(x_train, y_train, x_test,  y_test, batch_size=128,
                         min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024,
                         min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32,  max_nodes_rnn=128,
                         min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512,
-                        random_state=42, random_optimizor=True, dropout=0.5,no_of_classes = 0):
+                        random_state=42, random_optimizor=True, dropout=0.5,no_of_classes=0):
 
+
+    """
+    Text_Classification(x_train, y_train, x_test,  y_test, batch_size=128,
+                        EMBEDDING_DIM=50,MAX_SEQUENCE_LENGTH = 500, MAX_NB_WORDS = 75000,
+                        GloVe_dir="", GloVe_file = "glove.6B.50d.txt",
+                        sparse_categorical=True, random_deep=[3, 3, 3], epochs=[500, 500, 500],  plot=False,
+                        min_hidden_layer_dnn=1, max_hidden_layer_dnn=8, min_nodes_dnn=128, max_nodes_dnn=1024,
+                        min_hidden_layer_rnn=1, max_hidden_layer_rnn=5, min_nodes_rnn=32,  max_nodes_rnn=128,
+                        min_hidden_layer_cnn=3, max_hidden_layer_cnn=10, min_nodes_cnn=128, max_nodes_cnn=512,
+                        random_state=42, random_optimizor=True, dropout=0.5):
+
+        Parameters
+        ----------
+            batch_size : Integer, , optional
+                Number of samples per gradient update. If unspecified, it will default to 128
+            MAX_NB_WORDS: int, optional
+                Maximum number of unique words in datasets, it will default to 75000.
+            GloVe_dir: String, optional
+                Address of GloVe or any pre-trained directory, it will default to null which glove.6B.zip will be download.
+            GloVe_dir: String, optional
+                Which version of GloVe or pre-trained word emending will be used, it will default to glove.6B.50d.txt.
+                NOTE: if you use other version of GloVe EMBEDDING_DIM must be same dimensions.
+            sparse_categorical: bool.
+                When target's dataset is (n,1) should be True, it will default to True.
+            random_deep: array of int [3], optional
+                Number of ensembled model used in RMDL random_deep[0] is number of DNN, random_deep[1] is number of RNN, random_deep[0] is number of CNN, it will default to [3, 3, 3].
+            epochs: array of int [3], optional
+                Number of epochs in each ensembled model used in RMDL epochs[0] is number of epochs used in DNN, epochs[1] is number of epochs used in RNN, epochs[0] is number of epochs used in CNN, it will default to [500, 500, 500].
+            plot: bool, optional
+                True: shows confusion matrix and accuracy and loss
+            min_hidden_layer_dnn: Integer, optional
+                Lower Bounds of hidden layers of DNN used in RMDL, it will default to 1.
+            max_hidden_layer_dnn: Integer, optional
+                Upper bounds of hidden layers of DNN used in RMDL, it will default to 8.
+            min_nodes_dnn: Integer, optional
+                Lower bounds of nodes in each layer of DNN used in RMDL, it will default to 128.
+            max_nodes_dnn: Integer, optional
+                Upper bounds of nodes in each layer of DNN used in RMDL, it will default to 1024.
+            min_hidden_layer_rnn: Integer, optional
+                Lower Bounds of hidden layers of RNN used in RMDL, it will default to 1.
+            min_hidden_layer_rnn: Integer, optional
+                Upper Bounds of hidden layers of RNN used in RMDL, it will default to 5.
+            min_nodes_rnn: Integer, optional
+                Lower bounds of nodes (LSTM or GRU) in each layer of RNN used in RMDL, it will default to 32.
+            max_nodes_rnn: Integer, optional
+                Upper bounds of nodes (LSTM or GRU) in each layer of RNN used in RMDL, it will default to 128.
+            min_hidden_layer_cnn: Integer, optional
+                Lower Bounds of hidden layers of CNN used in RMDL, it will default to 3.
+            max_hidden_layer_cnn: Integer, optional
+                Upper Bounds of hidden layers of CNN used in RMDL, it will default to 10.
+            min_nodes_cnn: Integer, optional
+                Lower bounds of nodes (2D convolution layer) in each layer of CNN used in RMDL, it will default to 128.
+            min_nodes_cnn: Integer, optional
+                Upper bounds of nodes (2D convolution layer) in each layer of CNN used in RMDL, it will default to 512.
+            random_state : Integer, optional
+                RandomState instance or None, optional (default=None)
+                If Integer, random_state is the seed used by the random number generator;
+            random_optimizor : bool, optional
+                If False, all models use adam optimizer. If True, all models use random optimizers. it will default to True
+            dropout: Float, optional
+                between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs.
+
+    """
     np.random.seed(random_state)
 
 
     glove_directory = GloVe_dir
     GloVe_file = GloVe_file
 
-
+    print("Done1")
 
     GloVe_needed = random_deep[1] != 0 or random_deep[2] != 0
 
@@ -107,15 +170,15 @@ def _one_hot_values(labels_data):
     score = []
 
     if no_of_classes==0:
-
-        #checking no_of_classes 
+        #checking no_of_classes
         #np.max(data)+1 will not work for one_hot encoding labels
-
-        number_of_classes = len(y_train[0])
-        print(number_of_classes)
+        if sparse_categorical:
+            number_of_classes = np.max(y_train) + 1
+        else:
+            number_of_classes = len(y_train[0])
     else:
         number_of_classes = no_of_classes
-        print(number_of_classes)
+    print(number_of_classes)
 
 
     i = 0