cosanlab
diff --git a/‎nltools/__init__.py‎
Lines changed: 9 additions & 10 deletions b/‎nltools/__init__.py‎
Lines changed: 9 additions & 10 deletions
diff --git a/‎nltools/analysis.py‎
Lines changed: 44 additions & 42 deletions b/‎nltools/analysis.py‎
Lines changed: 44 additions & 42 deletions
diff --git a/‎nltools/cross_validation.py‎
Lines changed: 22 additions & 20 deletions b/‎nltools/cross_validation.py‎
Lines changed: 22 additions & 20 deletions
@@ -1,12 +1,12 @@
-__all__ = [ 'data',
- 			'datasets',
-			'analysis', 
-			'cross_validation', 
-			'plotting', 
-			'stats', 
-			'utils',
-			'file_reader',  
-			'pbs_job', 
+__all__ = ['data',
+            'datasets',
+            'analysis',
+            'cross_validation',
+            'plotting',
+			'stats',
+            'utils',
+            'file_reader',
+			'pbs_job',
 			'masks',
 			'__version__']
 
@@ -16,4 +16,3 @@
 from pbs_job import PBS_Job
 from simulator import Simulator
 from version import __version__
-
@@ -24,29 +24,35 @@
 class Roc(object):
 
     """ Roc Class
-    
-    The Roc class is based on Tor Wager's Matlab roc_plot.m function and allows a user to easily run different types of 
-    receiver operator characteristic curves.  For example, one might be interested in single interval or forced choice.
+
+    The Roc class is based on Tor Wager's Matlab roc_plot.m function and
+    allows a user to easily run different types of receiver operator
+    characteristic curves.  For example, one might be interested in single
+    interval or forced choice.
 
     Args:
         input_values: nibabel data instance
         binary_outcome: vector of training labels
-        threshold_type: ['optimal_overall', 'optimal_balanced','minimum_sdt_bias']
-        **kwargs: Additional keyword arguments to pass to the prediction algorithm
+        threshold_type: ['optimal_overall', 'optimal_balanced',
+                        'minimum_sdt_bias']
+        **kwargs: Additional keyword arguments to pass to the prediction
+                    algorithm
 
     """
 
-    def __init__(self, input_values=None, binary_outcome=None, 
+    def __init__(self, input_values=None, binary_outcome=None,
         threshold_type='optimal_overall', forced_choice=None, **kwargs):
         if len(input_values) != len(binary_outcome):
-            raise ValueError("Data Problem: input_value and binary_outcome are different lengths.")
+            raise ValueError("Data Problem: input_value and binary_outcome"
+                            "are different lengths.")
 
         if not any(binary_outcome):
             raise ValueError("Data Problem: binary_outcome may not be boolean")
 
         thr_type = ['optimal_overall', 'optimal_balanced','minimum_sdt_bias']
         if threshold_type not in thr_type:
-            raise ValueError("threshold_type must be ['optimal_overall', 'optimal_balanced','minimum_sdt_bias']")
+            raise ValueError("threshold_type must be ['optimal_overall', "
+                            "'optimal_balanced','minimum_sdt_bias']")
 
         self.input_values = deepcopy(input_values)
         self.binary_outcome = deepcopy(binary_outcome)
@@ -55,25 +61,32 @@ def __init__(self, input_values=None, binary_outcome=None,
 
         if isinstance(self.binary_outcome,pd.DataFrame):
             self.binary_outcome = np.array(self.binary_outcome).flatten()
-        else:   
+        else:
             self.binary_outcome = deepcopy(binary_outcome)
 
-    def calculate(self, input_values=None, binary_outcome=None, criterion_values=None,
-        threshold_type='optimal_overall', forced_choice=None, balanced_acc=False):
-        
-        """ Calculate Receiver Operating Characteristic plot (ROC) for single-interval
-        classification.
+    def calculate(self, input_values=None, binary_outcome=None,
+                criterion_values=None, threshold_type='optimal_overall',
+                forced_choice=None, balanced_acc=False):
+
+        """ Calculate Receiver Operating Characteristic plot (ROC) for
+        single-interval classification.
 
         Args:
             input_values: nibabel data instance
             binary_outcome: vector of training labels
-            criterion_values: (optional) criterion values for calculating fpr & tpr
-            threshold_type: ['optimal_overall', 'optimal_balanced','minimum_sdt_bias']
-            forced_choice: index indicating position for each unique subject (default=None)
-            balanced_acc: balanced accuracy for single-interval classification (bool)
-            **kwargs: Additional keyword arguments to pass to the prediction algorithm
+            criterion_values: (optional) criterion values for calculating fpr
+                            & tpr
+            threshold_type: ['optimal_overall', 'optimal_balanced',
+                            'minimum_sdt_bias']
+            forced_choice: index indicating position for each unique subject
+                            (default=None)
+            balanced_acc: balanced accuracy for single-interval classification
+                            (bool)
+            **kwargs: Additional keyword arguments to pass to the prediction
+                            algorithm
 
         """
+
         if input_values is not None:
             self.input_values = deepcopy(input_values)
 
@@ -84,14 +97,17 @@ def calculate(self, input_values=None, binary_outcome=None, criterion_values=Non
         if criterion_values is not None:
             self.criterion_values = deepcopy(criterion_values)
         else:
-            self.criterion_values = np.linspace(min(self.input_values), max(self.input_values), num=50*len(self.binary_outcome))
+            self.criterion_values = np.linspace(min(self.input_values),
+                                    max(self.input_values),
+                                    num=50*len(self.binary_outcome))
 
         if forced_choice is not None:
             self.forced_choice = deepcopy(forced_choice)
 
         if self.forced_choice is not None:
             sub_idx = np.unique(self.forced_choice)
-            assert len(sub_idx) == len(self.binary_outcome)/2, "Make sure that subject ids are correct for 'forced_choice'."
+            assert len(sub_idx) == len(self.binary_outcome)/2, ("Make sure "
+                        "that subject ids are correct for 'forced_choice'.")
             assert len(set(sub_idx).union(set(np.array(self.forced_choice)[self.binary_outcome]))) == len(sub_idx), "Issue with forced_choice subject labels."
             assert len(set(sub_idx).union(set(np.array(self.forced_choice)[~self.binary_outcome]))) == len(sub_idx), "Issue with forced_choice subject labels."
             for sub in sub_idx:
@@ -109,17 +125,7 @@ def calculate(self, input_values=None, binary_outcome=None, criterion_values=Non
             self.fpr[i] = np.sum(wh[~self.binary_outcome])/np.sum(~self.binary_outcome)
         self.n_true = np.sum(self.binary_outcome)
         self.n_false = np.sum(~self.binary_outcome)
-
-        # Calculate Area Under the Curve
-
-        # fix for AUC = 1 if no overlap - code not working (tpr_unique and fpr_unique can be different lengths)
-        # fpr_unique = np.unique(self.fpr)
-        # tpr_unique = np.unique(self.tpr)
-        # if any((fpr_unique == 0) & (tpr_unique == 1)):
-        #    self.auc = 1 # Fix for AUC = 1 if no overlap;
-        # else:
-        #    self.auc = auc(self.fpr, self.tpr) # Use sklearn auc otherwise
-        self.auc = auc(self.fpr, self.tpr) # Use sklearn auc
+        self.auc = auc(self.fpr, self.tpr)
 
         # Get criterion threshold
         if self.forced_choice is None:
@@ -169,16 +175,17 @@ def plot(self, plot_method = 'gaussian'):
         """ Create ROC Plot
 
         Create a specific kind of ROC curve plot, based on input values
-        along a continuous distribution and a binary outcome variable (logical).
+        along a continuous distribution and a binary outcome variable (logical)
 
         Args:
             plot_method: type of plot ['gaussian','observed']
             binary_outcome: vector of training labels
-            **kwargs: Additional keyword arguments to pass to the prediction algorithm
+            **kwargs: Additional keyword arguments to pass to the prediction
+                        algorithm
 
         Returns:
             fig
-            
+
         """
 
         self.calculate() # Calculate ROC parameters
@@ -188,7 +195,7 @@ def plot(self, plot_method = 'gaussian'):
                 sub_idx = np.unique(self.forced_choice)
                 diff_scores = []
                 for sub in sub_idx:
-                    diff_scores.append(self.input_values[(self.forced_choice==sub) & (self.binary_outcome==True)][0] - self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)][0])
+                    diff_scores.append(self.input_values[(self.forced_choice == sub) & (self.binary_outcome==True)][0] - self.input_values[(self.forced_choice==sub) & (self.binary_outcome==False)][0])
                 diff_scores = np.array(diff_scores)
                 mn_diff = np.mean(diff_scores)
                 d = mn_diff / np.std(diff_scores)
@@ -221,9 +228,7 @@ def plot(self, plot_method = 'gaussian'):
         return fig
 
     def summary(self):
-        """ Display a formatted summary of ROC analysis.
-
-        """
+        """ Display a formatted summary of ROC analysis. """
 
         print("------------------------")
         print(".:ROC Analysis Summary:.")
@@ -236,6 +241,3 @@ def summary(self):
         print("{:20s}".format("AUC:") + "{:.2f}".format(self.auc))
         print("{:20s}".format("PPV:") + "{:.2f}".format(self.ppv))
         print("------------------------")
-
-
-
 
@@ -4,7 +4,7 @@
 Cross-Validation Data Classes
 =============================
 
-Scikit-learn compatible classes for performing various 
+Scikit-learn compatible classes for performing various
 types of cross-validation
 
 '''
@@ -21,15 +21,15 @@
 import pandas as pd
 
 class KFoldStratified(_BaseKFold):
-    """K-Folds cross validation iterator which stratifies continuous data (unlike scikit-learn equivalent).
+    """K-Folds cross validation iterator which stratifies continuous data
+    (unlike scikit-learn equivalent).
 
     Provides train/test indices to split data in train test sets. Split
-    dataset into k consecutive folds while ensuring that same subject is held
-    out within each fold 
-    Each fold is then used a validation set once while the k - 1 remaining
-    folds form the training set.
+    dataset into k consecutive folds while ensuring that same subject is
+    held out within each fold.  Each fold is then used a validation set
+    once while the k - 1 remaining folds form the training set.
     Extension of KFold from scikit-learn cross_validation model
-    
+
     Args:
         n_splits: int, default=3
             Number of folds. Must be at least 2.
@@ -38,7 +38,7 @@ class KFoldStratified(_BaseKFold):
         random_state: None, int or RandomState
             Pseudo-random number generator state used for random
             sampling. If None, use default numpy RNG for shuffling
-    
+
     """
 
     def __init__(self, n_splits=3, shuffle=False, random_state=None):
@@ -60,14 +60,14 @@ def _iter_test_masks(self, X, y=None, groups=None):
 
     def split(self, X, y, groups=None):
         """Generate indices to split data into training and test set.
-        
+
         Args:
             X : array-like, shape (n_samples, n_features)
                 Training data, where n_samples is the number of samples
                 and n_features is the number of features.
-                Note that providing ``y`` is sufficient to generate the splits and
-                hence ``np.zeros(n_samples)`` may be used as a placeholder for
-                ``X`` instead of actual training data.
+                Note that providing ``y`` is sufficient to generate the splits
+                and hence ``np.zeros(n_samples)`` may be used as a placeholder
+                for ``X`` instead of actual training data.
             y : array-like, shape (n_samples,)
                 The target variable for supervised learning problems.
                 Stratification is done based on the y labels.
@@ -76,12 +76,14 @@ def split(self, X, y, groups=None):
         Returns:
             train : (ndarray) The training set indices for that split.
             test : (ndarray) The testing set indices for that split.
+
         """
         y = check_array(y, ensure_2d=False, dtype=None)
         return super(KFoldStratified, self).split(X, y, groups)
 
 def set_cv(Y=None, cv_dict=None):
-    """ Helper function to create a sci-kit learn compatible cv object using common parameters for prediction analyses.
+    """ Helper function to create a sci-kit learn compatible cv object using
+    common parameters for prediction analyses.
 
     Args:
         Y:  (pd.DataFrame) Pandas Dataframe of Y labels
@@ -115,13 +117,13 @@ def set_cv(Y=None, cv_dict=None):
             cv = loso.split(X=np.zeros(len(Y)), y=Y, groups=cv_dict['subject_id'])
         else:
             raise ValueError("""Make sure you specify a dictionary of
-            {'type': 'kfolds', 'n_folds': n},
-            {'type': 'kfolds', 'n_folds': n, 'stratified': Y},
-            {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or
-            {'type': 'loso', 'subject_id': holdout},
-            where n = number of folds, and subject = vector of subject ids that corresponds to self.Y""")
+                            {'type': 'kfolds', 'n_folds': n},
+                            {'type': 'kfolds', 'n_folds': n, 'stratified': Y},
+                            {'type': 'kfolds', 'n_folds': n,
+                            'subject_id': holdout}, or {'type': 'loso',
+                            'subject_id': holdout}, where n = number of folds,
+                            and subject = vector of subject ids that
+                            corresponds to self.Y""")
     else:
         raise ValueError("Make sure 'cv_dict' is a dictionary.")
     return cv
-
-