version 1.1-4

cran · Oct 31, 2013 · f94a4e8 · f94a4e8
1 parent 0a5419b
commit f94a4e8
Show file tree

Hide file tree

Showing 10 changed files with 79 additions and 22 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,17 +1,16 @@
 Package: AppliedPredictiveModeling
 Type: Package
 Title: Functions and Data Sets for 'Applied Predictive Modeling'
-Version: 1.1-1
-Date: 2013-05-29
+Version: 1.1-4
+Date: 2013-10-31
 Author: Max Kuhn, Kjell Johnson
 Maintainer: Max Kuhn <[email protected]>
-Description: A few functions and several data set for the Springer book
-        'Applied Predictive Modeling'
+Description: A few functions and several data set for the Springer book 'Applied Predictive Modeling'
 URL: http://appliedpredictivemodeling.com/
 Depends: R (>= 2.10), CORElearn, MASS, plyr, reshape2
 Suggests: caret, lattice, ellipse
 License: GPL
-Packaged: 2013-05-29 19:34:07 UTC; kuhna03
+Packaged: 2013-10-31 19:04:35 UTC; kuhna03
 NeedsCompilation: no
 Repository: CRAN
-Date/Publication: 2013-05-30 07:31:33
+Date/Publication: 2013-10-31 20:59:05
diff --git a/MD5 b/MD5
@@ -1,15 +1,15 @@
-a1ba1a42e2fd42c2b6af9ba42e6e5933 *DESCRIPTION
+f8b2be474dc4a9cb26b5695f4c447306 *DESCRIPTION
 8b54e5a89fbda3af5e077053d40bec76 *NAMESPACE
 e4e564d2188913c297d854a86868bd37 *R/bookTheme.R
-16af3f1f03fc98647e26edefe3c1ebce *R/easyBoundaryFunc.R
+538821ec8c21e26d4b936611aa157bc4 *R/easyBoundaryFunc.R
 f4e5463cfcc4da4261f8014b1426c10c *R/getPackages.R
 35a9e06d580a6ed8b8d98c9f3c0a61eb *R/panels.R
-f3956e0be5393bb5d6fcbcfef0b6ff29 *R/permuteRelief.R
-c3519cd360a2dd39cef5b453f8551bc9 *R/quadBoundaryFunc.R
+57c3568c0838c1acba62489985f0dea9 *R/permuteRelief.R
+5996be154af6ef121fab077294b2e5e0 *R/quadBoundaryFunc.R
 d35f915bd2268cbb07c258bd8fce5c50 *R/scriptLocation.R
 98d928db47d8347a4f886f0c8e4adde1 *R/transparentTheme.R
 8a34126ad3a2f9d077653b26d950dddb *data/AlzheimerDisease.RData
-ebd3302a547a1064620517a0598f9ebf *data/ChemicalManufacturingProcess.RData
+fe3de40e923db3e0133b269f1610afa2 *data/ChemicalManufacturingProcess.RData
 ef3addd28ad9449688f0c33ba9bfc2d0 *data/FuelEconomy.RData
 833d3d4a90e6afe16ec007d5fc628cd2 *data/abalone.RData
 8fe13332a2419a2c253fb51c396f6000 *data/concrete.RData
@@ -21,10 +21,10 @@ e1590269851cf810fdffa832b6cf6d65 *data/schedulingData.RData
 669172e9b524f9194a23fbc84a2816f8 *data/segmentationOriginal.RData
 06780bd86a4db76cb2a8eb12ef107df7 *data/solubility.RData
 5e5422a8c05125f3ab1822f6c525296a *data/twoClassData.RData
-202cb28b25a21e6cb4f4182056cd3636 *inst/NEWS.Rd
+3f96b555cc8131b756ac72889f77abdc *inst/NEWS.Rd
 55afb317aa767a6e82c6c52ee985563f *inst/chapters/02_A_Short_Tour.R
 1f2f2179f8756bc60a5db4d285384e53 *inst/chapters/02_A_Short_Tour.Rout
-fa5a1a6cd542c0f02a5db901afbccc7e *inst/chapters/03_Data_Pre_Processing.R
+ec4768cf8bf24124e998a1ce680dceb6 *inst/chapters/03_Data_Pre_Processing.R
 b740e1169a13b1d720dbdf82c220e72d *inst/chapters/03_Data_Pre_Processing.Rout
 73ff45e8ce4a2afd6792b3bf7f74d4d0 *inst/chapters/04_Over_Fitting.R
 4472854d26e70e0fbbb5fb389bf03abc *inst/chapters/04_Over_Fitting.Rout
@@ -56,7 +56,7 @@ ee8d141c6ff92f1878bb1954d21cab67 *inst/chapters/CreateGrantData.R
 6b7d3facf17c4ad5704ca9c54c17acc1 *inst/chapters/CreateGrantData.Rout
 6a51123bb7533bc6ac7cc60e20c30f7c *man/AlzheimerDisease.Rd
 79b66304686ea5f41624e941a839f783 *man/AppliedPredictiveModeling-package.Rd
-2f60f009e2049b3a23bfce4321c6961e *man/ChemicalManufacturingProcess.Rd
+b5c2029d7b9d21d128b3084b108404a8 *man/ChemicalManufacturingProcess.Rd
 b8fb23f2d87770651df5c0b9ab178180 *man/FuelEconomy.Rd
 a114aed8c4e19f6e471f76aa10607efc *man/Hepatic.Rd
 bb766d31a2c9a73fb64a83ad8edcbf9d *man/abalone.Rd
@@ -71,5 +71,5 @@ e422ed025d73fac0cd25de1a2146af1a *man/logisticCreditPredictions.Rd
 a0b9d85cec1c624144825536cc0b4993 *man/quadBoundaryFunc.Rd
 94865b7fd486f04a94e7dae86599f242 *man/scriptLocation.Rd
 d242e9c533e5abb92513999c16dd91d1 *man/segmentationOrignal.Rd
-fc76accf5ef83c49775649d296b61ff7 *man/solubility.Rd
+1e481abc63c674153b4b7c700c7d830f *man/solubility.Rd
 bc21567d7b20d731be212decec057ab5 *man/twoClassData.Rd
diff --git a/R/easyBoundaryFunc.R b/R/easyBoundaryFunc.R
@@ -1,7 +1,6 @@
 
 easyBoundaryFunc <- function(n, intercept = 0, interaction = 2)
 {
-   require(MASS)
    sigma <- matrix(c(2,1.3,1.3,2),2,2)
 
    tmpData <- data.frame(mvrnorm(n=n, c(0,0), sigma))

diff --git a/R/permuteRelief.R b/R/permuteRelief.R
@@ -1,9 +1,6 @@
 permuteRelief <-
 function(x, y, nperm = 100, ...)
   {
-    library(CORElearn)
-    library(plyr)
-    library(reshape2)
     dat <- x
     dat$y <- y
 

diff --git a/R/quadBoundaryFunc.R b/R/quadBoundaryFunc.R
@@ -1,7 +1,6 @@
 quadBoundaryFunc <-
 function(n)
 {
-   require(MASS)
    sigma <- matrix(c(1,.7,.7,2),2,2)
 
    tmpData <- data.frame(mvrnorm(n=n, c(1,0), sigma))

diff --git a/data/ChemicalManufacturingProcess.RData b/data/ChemicalManufacturingProcess.RData
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
@@ -2,6 +2,21 @@
 \title{News for Package \pkg{AppliedPredictiveModeling}}
 \newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}}
 
+
+\section{Changes in version 1.1-4}{
+\itemize{
+\item The data set \code{ChemicalManufacturingProcess} did not contain 
+the rows with missing data. They were added back in.
+
+\item Small changes to conform to R CMD check. 
+}}
+
+\section{Changes in version 1.1-2}{
+\itemize{
+\item Code to create the \code{carsSubset} object in Seciton 3.8 was added 
+      to 03_Data_Pre_Processing.R
+}}
+
 \section{Changes in version 1.1-1}{
 \itemize{
 \item Initial Version

diff --git a/inst/chapters/03_Data_Pre_Processing.R b/inst/chapters/03_Data_Pre_Processing.R
@@ -202,6 +202,32 @@ corrplot(segCorr, order = "hclust", tl.cex = .35)
 ## caret's findCorrelation function is used to identify columns to remove.
 highCorr <- findCorrelation(segCorr, .75)
 
+################################################################################
+### Section 3.8 Computing (Creating Dummy Variables)
+
+data(cars)
+type <- c("convertible", "coupe", "hatchback", "sedan", "wagon")
+cars$Type <- factor(apply(cars[, 14:18], 1, function(x) type[which(x == 1)]))
+
+carSubset <- cars[sample(1:nrow(cars), 20), c(1, 2, 19)]
+
+head(carSubset)
+levels(carSubset$Type)
+
+simpleMod <- dummyVars(~Mileage + Type,
+                       data = carSubset,
+                       ## Remove the variable name from the
+                       ## column name
+                       levelsOnly = TRUE)
+simpleMod
+
+withInteraction <- dummyVars(~Mileage + Type + Mileage:Type,
+                             data = carSubset,
+                             levelsOnly = TRUE)
+withInteraction
+predict(withInteraction, head(carSubset))
+
+
 
 ################################################################################
 ### Session Information

diff --git a/man/ChemicalManufacturingProcess.Rd b/man/ChemicalManufacturingProcess.Rd
@@ -27,8 +27,8 @@ the same batch of biological starting material.
 
 \usage{data(ChemicalManufacturingProcess)}
 \value{
-  \item{ChemicalManufacturingProcess}{a data frame with columns for the outcome (\code{Yield}) and the predictors (\code{BiologicalMaterial01} though \code{BiologicalMaterial12} and \code{ManufacturingProcess01} though \code{ManufacturingProcess45}}
-  }
+\code{ChemicalManufacturingProcess}: a data frame with columns for the outcome (\code{Yield}) and the predictors (\code{BiologicalMaterial01} though \code{BiologicalMaterial12} and \code{ManufacturingProcess01} though \code{ManufacturingProcess45}
+}
 
 \examples{
 data(ChemicalManufacturingProcess)

diff --git a/man/solubility.Rd b/man/solubility.Rd
@@ -38,6 +38,28 @@ library(caret)
 set.seed(100)
 indx <- createFolds(solTrainY, returnTrain = TRUE)
 
+### To re-create the transformed version of the data:
+\dontrun{
+## Find the predictors that are not fingerprints
+contVars <- names(solTrainX)[!grepl("FP", names(solTrainX))]
+## Some have zero values, so we need to add one to them so that
+## we can use the Box-Cox transformation. Alternatively, we could 
+## use the Yeo-Johnson transformation without altering the data.
+contPredTrain <- solTrainX[,contVars] + 1
+contPredTest  <-  solTestX[,contVars] + 1
+
+pp <- preProcess(contPredTrain, method = "BoxCox")
+contPredTrain <- predict(pp, contPredTrain)
+contPredTest  <- predict(pp, contPredTest)
+
+## Reassemble the fingerprint data with the transformed values.
+trainXtrans <- cbind(solTrainX[,grep("FP", names(solTrainX))], contPredTrain)
+testXtrans  <- cbind( solTestX[,grep("FP", names(solTestX))],  contPredTest)
+
+all.equal(trainXtrans, solTrainXtrans)
+all.equal(testXtrans, solTestXtrans)
+	}
+
 }
 
 \keyword{datasets}