diff --git a/DESCRIPTION b/DESCRIPTION index 20cae9f..265ef01 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,16 @@ Package: AppliedPredictiveModeling Type: Package Title: Functions and Data Sets for 'Applied Predictive Modeling' -Version: 1.1-1 -Date: 2013-05-29 +Version: 1.1-4 +Date: 2013-10-31 Author: Max Kuhn, Kjell Johnson Maintainer: Max Kuhn -Description: A few functions and several data set for the Springer book - 'Applied Predictive Modeling' +Description: A few functions and several data set for the Springer book 'Applied Predictive Modeling' URL: http://appliedpredictivemodeling.com/ Depends: R (>= 2.10), CORElearn, MASS, plyr, reshape2 Suggests: caret, lattice, ellipse License: GPL -Packaged: 2013-05-29 19:34:07 UTC; kuhna03 +Packaged: 2013-10-31 19:04:35 UTC; kuhna03 NeedsCompilation: no Repository: CRAN -Date/Publication: 2013-05-30 07:31:33 +Date/Publication: 2013-10-31 20:59:05 diff --git a/MD5 b/MD5 index 397d14f..2291341 100644 --- a/MD5 +++ b/MD5 @@ -1,15 +1,15 @@ -a1ba1a42e2fd42c2b6af9ba42e6e5933 *DESCRIPTION +f8b2be474dc4a9cb26b5695f4c447306 *DESCRIPTION 8b54e5a89fbda3af5e077053d40bec76 *NAMESPACE e4e564d2188913c297d854a86868bd37 *R/bookTheme.R -16af3f1f03fc98647e26edefe3c1ebce *R/easyBoundaryFunc.R +538821ec8c21e26d4b936611aa157bc4 *R/easyBoundaryFunc.R f4e5463cfcc4da4261f8014b1426c10c *R/getPackages.R 35a9e06d580a6ed8b8d98c9f3c0a61eb *R/panels.R -f3956e0be5393bb5d6fcbcfef0b6ff29 *R/permuteRelief.R -c3519cd360a2dd39cef5b453f8551bc9 *R/quadBoundaryFunc.R +57c3568c0838c1acba62489985f0dea9 *R/permuteRelief.R +5996be154af6ef121fab077294b2e5e0 *R/quadBoundaryFunc.R d35f915bd2268cbb07c258bd8fce5c50 *R/scriptLocation.R 98d928db47d8347a4f886f0c8e4adde1 *R/transparentTheme.R 8a34126ad3a2f9d077653b26d950dddb *data/AlzheimerDisease.RData -ebd3302a547a1064620517a0598f9ebf *data/ChemicalManufacturingProcess.RData +fe3de40e923db3e0133b269f1610afa2 *data/ChemicalManufacturingProcess.RData ef3addd28ad9449688f0c33ba9bfc2d0 *data/FuelEconomy.RData 833d3d4a90e6afe16ec007d5fc628cd2 *data/abalone.RData 8fe13332a2419a2c253fb51c396f6000 *data/concrete.RData @@ -21,10 +21,10 @@ e1590269851cf810fdffa832b6cf6d65 *data/schedulingData.RData 669172e9b524f9194a23fbc84a2816f8 *data/segmentationOriginal.RData 06780bd86a4db76cb2a8eb12ef107df7 *data/solubility.RData 5e5422a8c05125f3ab1822f6c525296a *data/twoClassData.RData -202cb28b25a21e6cb4f4182056cd3636 *inst/NEWS.Rd +3f96b555cc8131b756ac72889f77abdc *inst/NEWS.Rd 55afb317aa767a6e82c6c52ee985563f *inst/chapters/02_A_Short_Tour.R 1f2f2179f8756bc60a5db4d285384e53 *inst/chapters/02_A_Short_Tour.Rout -fa5a1a6cd542c0f02a5db901afbccc7e *inst/chapters/03_Data_Pre_Processing.R +ec4768cf8bf24124e998a1ce680dceb6 *inst/chapters/03_Data_Pre_Processing.R b740e1169a13b1d720dbdf82c220e72d *inst/chapters/03_Data_Pre_Processing.Rout 73ff45e8ce4a2afd6792b3bf7f74d4d0 *inst/chapters/04_Over_Fitting.R 4472854d26e70e0fbbb5fb389bf03abc *inst/chapters/04_Over_Fitting.Rout @@ -56,7 +56,7 @@ ee8d141c6ff92f1878bb1954d21cab67 *inst/chapters/CreateGrantData.R 6b7d3facf17c4ad5704ca9c54c17acc1 *inst/chapters/CreateGrantData.Rout 6a51123bb7533bc6ac7cc60e20c30f7c *man/AlzheimerDisease.Rd 79b66304686ea5f41624e941a839f783 *man/AppliedPredictiveModeling-package.Rd -2f60f009e2049b3a23bfce4321c6961e *man/ChemicalManufacturingProcess.Rd +b5c2029d7b9d21d128b3084b108404a8 *man/ChemicalManufacturingProcess.Rd b8fb23f2d87770651df5c0b9ab178180 *man/FuelEconomy.Rd a114aed8c4e19f6e471f76aa10607efc *man/Hepatic.Rd bb766d31a2c9a73fb64a83ad8edcbf9d *man/abalone.Rd @@ -71,5 +71,5 @@ e422ed025d73fac0cd25de1a2146af1a *man/logisticCreditPredictions.Rd a0b9d85cec1c624144825536cc0b4993 *man/quadBoundaryFunc.Rd 94865b7fd486f04a94e7dae86599f242 *man/scriptLocation.Rd d242e9c533e5abb92513999c16dd91d1 *man/segmentationOrignal.Rd -fc76accf5ef83c49775649d296b61ff7 *man/solubility.Rd +1e481abc63c674153b4b7c700c7d830f *man/solubility.Rd bc21567d7b20d731be212decec057ab5 *man/twoClassData.Rd diff --git a/R/easyBoundaryFunc.R b/R/easyBoundaryFunc.R index 9c57545..b1620e0 100644 --- a/R/easyBoundaryFunc.R +++ b/R/easyBoundaryFunc.R @@ -1,7 +1,6 @@ easyBoundaryFunc <- function(n, intercept = 0, interaction = 2) { - require(MASS) sigma <- matrix(c(2,1.3,1.3,2),2,2) tmpData <- data.frame(mvrnorm(n=n, c(0,0), sigma)) diff --git a/R/permuteRelief.R b/R/permuteRelief.R index e15d214..30234e5 100644 --- a/R/permuteRelief.R +++ b/R/permuteRelief.R @@ -1,9 +1,6 @@ permuteRelief <- function(x, y, nperm = 100, ...) { - library(CORElearn) - library(plyr) - library(reshape2) dat <- x dat$y <- y diff --git a/R/quadBoundaryFunc.R b/R/quadBoundaryFunc.R index b4886be..1f2d093 100644 --- a/R/quadBoundaryFunc.R +++ b/R/quadBoundaryFunc.R @@ -1,7 +1,6 @@ quadBoundaryFunc <- function(n) { - require(MASS) sigma <- matrix(c(1,.7,.7,2),2,2) tmpData <- data.frame(mvrnorm(n=n, c(1,0), sigma)) diff --git a/data/ChemicalManufacturingProcess.RData b/data/ChemicalManufacturingProcess.RData index c6dc014..ef89711 100644 Binary files a/data/ChemicalManufacturingProcess.RData and b/data/ChemicalManufacturingProcess.RData differ diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index 43061de..fa38772 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -2,6 +2,21 @@ \title{News for Package \pkg{AppliedPredictiveModeling}} \newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}} + +\section{Changes in version 1.1-4}{ +\itemize{ +\item The data set \code{ChemicalManufacturingProcess} did not contain +the rows with missing data. They were added back in. + +\item Small changes to conform to R CMD check. +}} + +\section{Changes in version 1.1-2}{ +\itemize{ +\item Code to create the \code{carsSubset} object in Seciton 3.8 was added + to 03_Data_Pre_Processing.R +}} + \section{Changes in version 1.1-1}{ \itemize{ \item Initial Version diff --git a/inst/chapters/03_Data_Pre_Processing.R b/inst/chapters/03_Data_Pre_Processing.R index 063a5a1..9d92d26 100644 --- a/inst/chapters/03_Data_Pre_Processing.R +++ b/inst/chapters/03_Data_Pre_Processing.R @@ -202,6 +202,32 @@ corrplot(segCorr, order = "hclust", tl.cex = .35) ## caret's findCorrelation function is used to identify columns to remove. highCorr <- findCorrelation(segCorr, .75) +################################################################################ +### Section 3.8 Computing (Creating Dummy Variables) + +data(cars) +type <- c("convertible", "coupe", "hatchback", "sedan", "wagon") +cars$Type <- factor(apply(cars[, 14:18], 1, function(x) type[which(x == 1)])) + +carSubset <- cars[sample(1:nrow(cars), 20), c(1, 2, 19)] + +head(carSubset) +levels(carSubset$Type) + +simpleMod <- dummyVars(~Mileage + Type, + data = carSubset, + ## Remove the variable name from the + ## column name + levelsOnly = TRUE) +simpleMod + +withInteraction <- dummyVars(~Mileage + Type + Mileage:Type, + data = carSubset, + levelsOnly = TRUE) +withInteraction +predict(withInteraction, head(carSubset)) + + ################################################################################ ### Session Information diff --git a/man/ChemicalManufacturingProcess.Rd b/man/ChemicalManufacturingProcess.Rd index d19ce7f..86d0ef9 100644 --- a/man/ChemicalManufacturingProcess.Rd +++ b/man/ChemicalManufacturingProcess.Rd @@ -27,8 +27,8 @@ the same batch of biological starting material. \usage{data(ChemicalManufacturingProcess)} \value{ - \item{ChemicalManufacturingProcess}{a data frame with columns for the outcome (\code{Yield}) and the predictors (\code{BiologicalMaterial01} though \code{BiologicalMaterial12} and \code{ManufacturingProcess01} though \code{ManufacturingProcess45}} - } +\code{ChemicalManufacturingProcess}: a data frame with columns for the outcome (\code{Yield}) and the predictors (\code{BiologicalMaterial01} though \code{BiologicalMaterial12} and \code{ManufacturingProcess01} though \code{ManufacturingProcess45} +} \examples{ data(ChemicalManufacturingProcess) diff --git a/man/solubility.Rd b/man/solubility.Rd index a10db3c..b54f762 100644 --- a/man/solubility.Rd +++ b/man/solubility.Rd @@ -38,6 +38,28 @@ library(caret) set.seed(100) indx <- createFolds(solTrainY, returnTrain = TRUE) +### To re-create the transformed version of the data: +\dontrun{ +## Find the predictors that are not fingerprints +contVars <- names(solTrainX)[!grepl("FP", names(solTrainX))] +## Some have zero values, so we need to add one to them so that +## we can use the Box-Cox transformation. Alternatively, we could +## use the Yeo-Johnson transformation without altering the data. +contPredTrain <- solTrainX[,contVars] + 1 +contPredTest <- solTestX[,contVars] + 1 + +pp <- preProcess(contPredTrain, method = "BoxCox") +contPredTrain <- predict(pp, contPredTrain) +contPredTest <- predict(pp, contPredTest) + +## Reassemble the fingerprint data with the transformed values. +trainXtrans <- cbind(solTrainX[,grep("FP", names(solTrainX))], contPredTrain) +testXtrans <- cbind( solTestX[,grep("FP", names(solTestX))], contPredTest) + +all.equal(trainXtrans, solTrainXtrans) +all.equal(testXtrans, solTestXtrans) + } + } \keyword{datasets}