Skip to content

Commit

Permalink
version 1.1-1
Browse files Browse the repository at this point in the history
  • Loading branch information
topepo authored and gaborcsardi committed May 29, 2013
0 parents commit 0a5419b
Show file tree
Hide file tree
Showing 76 changed files with 22,200 additions and 0 deletions.
17 changes: 17 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Package: AppliedPredictiveModeling
Type: Package
Title: Functions and Data Sets for 'Applied Predictive Modeling'
Version: 1.1-1
Date: 2013-05-29
Author: Max Kuhn, Kjell Johnson
Maintainer: Max Kuhn <[email protected]>
Description: A few functions and several data set for the Springer book
'Applied Predictive Modeling'
URL: http://appliedpredictivemodeling.com/
Depends: R (>= 2.10), CORElearn, MASS, plyr, reshape2
Suggests: caret, lattice, ellipse
License: GPL
Packaged: 2013-05-29 19:34:07 UTC; kuhna03
NeedsCompilation: no
Repository: CRAN
Date/Publication: 2013-05-30 07:31:33
75 changes: 75 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
a1ba1a42e2fd42c2b6af9ba42e6e5933 *DESCRIPTION
8b54e5a89fbda3af5e077053d40bec76 *NAMESPACE
e4e564d2188913c297d854a86868bd37 *R/bookTheme.R
16af3f1f03fc98647e26edefe3c1ebce *R/easyBoundaryFunc.R
f4e5463cfcc4da4261f8014b1426c10c *R/getPackages.R
35a9e06d580a6ed8b8d98c9f3c0a61eb *R/panels.R
f3956e0be5393bb5d6fcbcfef0b6ff29 *R/permuteRelief.R
c3519cd360a2dd39cef5b453f8551bc9 *R/quadBoundaryFunc.R
d35f915bd2268cbb07c258bd8fce5c50 *R/scriptLocation.R
98d928db47d8347a4f886f0c8e4adde1 *R/transparentTheme.R
8a34126ad3a2f9d077653b26d950dddb *data/AlzheimerDisease.RData
ebd3302a547a1064620517a0598f9ebf *data/ChemicalManufacturingProcess.RData
ef3addd28ad9449688f0c33ba9bfc2d0 *data/FuelEconomy.RData
833d3d4a90e6afe16ec007d5fc628cd2 *data/abalone.RData
8fe13332a2419a2c253fb51c396f6000 *data/concrete.RData
8fc335fdac839cc09b91ff3b5595f00b *data/datalist
812edff8771d7faaada62fdb2c662e40 *data/hepatic.RData
7d5b8064233260c344793aaecc045136 *data/logisticCreditPredictions.RData
ce7019b604378875d2a55e32dde9c05a *data/permeability.RData
e1590269851cf810fdffa832b6cf6d65 *data/schedulingData.RData
669172e9b524f9194a23fbc84a2816f8 *data/segmentationOriginal.RData
06780bd86a4db76cb2a8eb12ef107df7 *data/solubility.RData
5e5422a8c05125f3ab1822f6c525296a *data/twoClassData.RData
202cb28b25a21e6cb4f4182056cd3636 *inst/NEWS.Rd
55afb317aa767a6e82c6c52ee985563f *inst/chapters/02_A_Short_Tour.R
1f2f2179f8756bc60a5db4d285384e53 *inst/chapters/02_A_Short_Tour.Rout
fa5a1a6cd542c0f02a5db901afbccc7e *inst/chapters/03_Data_Pre_Processing.R
b740e1169a13b1d720dbdf82c220e72d *inst/chapters/03_Data_Pre_Processing.Rout
73ff45e8ce4a2afd6792b3bf7f74d4d0 *inst/chapters/04_Over_Fitting.R
4472854d26e70e0fbbb5fb389bf03abc *inst/chapters/04_Over_Fitting.Rout
894a4d8414f73b412f6e4fc8933b1681 *inst/chapters/06_Linear_Regression.R
cdcf0f83fd8d7d3f27d2c01c310aa68c *inst/chapters/06_Linear_Regression.Rout
397c1aa95ea73468e2a1e706488faf37 *inst/chapters/07_Non-Linear_Reg.R
8fcecf4b5e24eeda5fc55927f4f76735 *inst/chapters/07_Non-Linear_Reg.Rout
d2129d815070f3edd9206227ebe24b9d *inst/chapters/08_Regression_Trees.R
578e20ebfbd069f0ae2f1a595c323da5 *inst/chapters/08_Regression_Trees.Rout
ad9f505564c6ca809b9970182f3f8790 *inst/chapters/10_Case_Study_Concrete.R
83e761236b56017776258ca14149c13d *inst/chapters/10_Case_Study_Concrete.Rout
c3693cc6dc941a60844d864cf264db28 *inst/chapters/11_Class_Performance.R
e59029dd4ddb012b5100f5cae3c3fe5a *inst/chapters/11_Class_Performance.Rout
512616afef1588d9f26d24a45dd577d7 *inst/chapters/12_Discriminant_Analysis.R
6abe970f2d4d9c64d25e2da453184ab6 *inst/chapters/12_Discriminant_Analysis.Rout
793602cd5edfc82f9bfef2c53a2fd9dc *inst/chapters/13_Non-Linear_Class.R
10baa59ef6e6600c44f92a372c96de8e *inst/chapters/13_Non-Linear_Class.Rout
c1a9e721dafd1f68b415ca3e611d794c *inst/chapters/14_Class_Trees.R
e33ae9349b008519d48084fca2278005 *inst/chapters/14_Class_Trees.Rout
2422c3ed8b8f5f0b46c6f1586da4af3e *inst/chapters/16_Class_Imbalance.R
9331611025109d739d54646693ad71ad *inst/chapters/16_Class_Imbalance.Rout
ab2f1dc1fb60cdb116d7e335f69d75a9 *inst/chapters/17_Job_Scheduling.R
9d98d6048300c4f2bbc808d814a1c67f *inst/chapters/17_Job_Scheduling.Rout
cdd39d98758aa17566201c45150265b8 *inst/chapters/18_Importance.R
fadb4b6bf8ebbfcd2a28ec12cef5ad1b *inst/chapters/18_Importance.Rout
4e9ffdf738004484e37287d8df2e3726 *inst/chapters/19_Feature_Select.R
fe14eb0e0a390ab5522b5548bb50ff8f *inst/chapters/19_Feature_Select.Rout
ee8d141c6ff92f1878bb1954d21cab67 *inst/chapters/CreateGrantData.R
6b7d3facf17c4ad5704ca9c54c17acc1 *inst/chapters/CreateGrantData.Rout
6a51123bb7533bc6ac7cc60e20c30f7c *man/AlzheimerDisease.Rd
79b66304686ea5f41624e941a839f783 *man/AppliedPredictiveModeling-package.Rd
2f60f009e2049b3a23bfce4321c6961e *man/ChemicalManufacturingProcess.Rd
b8fb23f2d87770651df5c0b9ab178180 *man/FuelEconomy.Rd
a114aed8c4e19f6e471f76aa10607efc *man/Hepatic.Rd
bb766d31a2c9a73fb64a83ad8edcbf9d *man/abalone.Rd
7b4f4f04359281d886b4dd90765c2a29 *man/bookTheme.Rd
1a3a9b303f7a599f89cd2292a359511e *man/concrete.Rd
20acadadd6df9fce573aea2b7ee52020 *man/getPackages.Rd
8a02ecb81e0750c23ee2711b492dde91 *man/internal.Rd
f3f357c33a2b2433fee6c8cecd4876fe *man/jobScheduling.Rd
e422ed025d73fac0cd25de1a2146af1a *man/logisticCreditPredictions.Rd
1b347ea3e594dc8f4b0081d90b5764a1 *man/permeability.Rd
8559d6e7451f0060acb9b80827bb5fc3 *man/permuteRelief.Rd
a0b9d85cec1c624144825536cc0b4993 *man/quadBoundaryFunc.Rd
94865b7fd486f04a94e7dae86599f242 *man/scriptLocation.Rd
d242e9c533e5abb92513999c16dd91d1 *man/segmentationOrignal.Rd
fc76accf5ef83c49775649d296b61ff7 *man/solubility.Rd
bc21567d7b20d731be212decec057ab5 *man/twoClassData.Rd
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
exportPattern("^[[:alpha:]]+")
70 changes: 70 additions & 0 deletions R/bookTheme.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
bookTheme <- function(set = TRUE)
{
library(lattice)
theme <- list(plot.polygon = list(alpha = 1, col = "aliceblue", border = "black", lty = 1, lwd = 1),
background = list(col = "transparent"),
bar.fill = list(col = "#cce6ff"),
box.rectangle = list(col = "black"),
box.umbrella = list(col = "black"),
dot.line = list(col = "#e8e8e8"),
dot.symbol = list(col = "black"),
plot.line = list(col = "black", lwd = 1, lty = 1),
plot.symbol = list(col = "black", pch = 16),
regions = list(col =
c("#FEF8FA", "#FDF6F9", "#FBF5F9", "#FAF3F8",
"#F8F2F7", "#F7F0F7", "#F5EEF6", "#F4EDF5",
"#F2EBF5", "#F1EAF4", "#EFE8F3", "#EDE7F2",
"#ECE5F1", "#EAE4F1", "#E8E2F0", "#E6E1EF",
"#E4DFEE", "#E2DEED", "#E0DCEC", "#DEDAEB",
"#DCD9EA", "#D9D7E9", "#D7D6E8", "#D4D4E7",
"#D1D2E6", "#CED1E5", "#CCCFE4", "#C8CEE3",
"#C5CCE2", "#C2CAE1", "#BFC9E0", "#BBC7DF",
"#B8C5DF", "#B4C4DE", "#B1C2DD", "#ADC0DC",
"#A9BFDB", "#A6BDDA", "#A2BBD9", "#9EB9D9",
"#9BB8D8", "#97B6D7", "#93B4D6", "#8FB2D5",
"#8BB0D4", "#87AFD3", "#83ADD2", "#7FABD1",
"#7AA9D0", "#76A7CF", "#71A5CE", "#6CA3CC",
"#68A1CB", "#63A0CA", "#5D9EC9", "#589CC8",
"#539AC6", "#4E98C5", "#4996C4", "#4493C3",
"#3F91C1", "#3A8FC0", "#358DBF", "#308BBE",
"#2C89BD", "#2887BC", "#2385BB", "#1F83BA",
"#1C80B9", "#187EB7", "#157CB6", "#127AB5",
"#0F78B3", "#0D76B2", "#0A73B0", "#0971AE",
"#076FAC", "#066DAA", "#056AA7", "#0568A5")
),
strip.shingle = list(col = c(
"#ff7f00", "#00ff00", "#00ffff",
"#ff00ff", "#ff0000", "#ffff00", "#0080ff")),
strip.background = list(col = c(
"#ffe5cc", "#ccffcc", "#ccffff",
"#ffccff", "#ffcccc", "#ffffcc", "#cce6ff")),
reference.line = list(col = "#e8e8e8"),
superpose.line = list(
col = c(
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black"),
lty = rep(1:6, each = 6)),
superpose.symbol = list(
pch = c(
1, 4, 6, 0, 5, 17,
4, 6, 0, 5, 17, 1,
6, 0, 5, 17, 1, 4,
0, 5, 17, 1, 4, 6,
5, 17, 1, 4, 6, 0 ,
17, 1, 4, 6, 0, 5),
cex = rep(0.7, 6 * 6),
col = c(
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black",
"#053061", "#B2182B", "#F46D43", "#5E4FA2", "#66C2A5", "black")))

if(set) trellis.par.set(theme)
invisible(theme)
}
16 changes: 16 additions & 0 deletions R/easyBoundaryFunc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

easyBoundaryFunc <- function(n, intercept = 0, interaction = 2)
{
require(MASS)
sigma <- matrix(c(2,1.3,1.3,2),2,2)

tmpData <- data.frame(mvrnorm(n=n, c(0,0), sigma))
xSeq <- seq(-4, 4, length=40)
plotGrid <- expand.grid(x = xSeq, y = xSeq)
zFoo <- function(x, y) intercept -4 * x + 4* y + interaction*x*y
z2p <- function(x) 1/(1+exp(-x))

tmpData$prob <- z2p(zFoo(tmpData$X1, tmpData$X2))
tmpData$class <- factor(ifelse(runif(length(tmpData$prob)) <= tmpData$prob, "Class1", "Class2"))
tmpData
}
45 changes: 45 additions & 0 deletions R/getPackages.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@


getPackages <- function(chapter, ...)
{
if(is.numeric(chapter)) chapter <- paste(chapter)
pkg <- list()
pkg[["2"]] <- c("earth", "caret", "lattice")
pkg[["3"]] <- c("e1071", "caret", "corrplot")
pkg[["4"]] <- c("kernlab", "caret")
pkg[["6"]] <- c("lattice", "corrplot", "pls", "elasticnet")
pkg[["7"]] <- c("caret", "earth", "kernlab","lattice", "nnet")
pkg[["8"]] <- c("caret", "Cubist", "gbm", "lattice", "party", "partykit",
"randomForest", "rpart", "RWeka")
pkg[["10"]] <- c("caret", "Cubist", "earth", "elasticnet", "gbm", "ipred",
"lattice", "nnet", "party","pls", "randomForests", "rpart",
"RWeka")
pkg[["11"]] <- c("caret", "MASS", "randomForest", "pROC", "klaR")
pkg[["12"]] <- c("caret", "glmnet", "lattice",
"MASS", "pamr", "pls", "pROC", "sparseLDA")
pkg[["13"]] <- c("caret", "kernlab", "klaR", "lattice", "latticeExtra",
"MASS", "mda", "nnet", "pROC")
pkg[["14"]] <- c("C50", "caret", "gbm", "lattice", "partykit", "pROC",
"randomForest", "reshape2",
"rpart", "RWeka")
pkg[["16"]] <- c("caret", "C50", "earth", "DMwR", "DWD", " kernlab", "mda",
"pROC", "randomForest", "rpart")
pkg[["17"]] <- c("C50", "caret", "earth", "Hmisc", "ipred", "tabplot",
"kernlab", "lattice", "MASS", "mda", "nnet", "pls",
"randomForest", "rpart", "sparseLDA")
pkg[["18"]] <- c("caret", "CORElearn", "corrplot", "pROC", "minerva")
pkg[["19"]] <- c("caret", "MASS", "corrplot", "RColorBrewer", "randomForest",
"kernlab", "klaR")
plist <- paste(paste("'", names(pkg), "'", sep = ""), collapse = ", ")
if(!any(chapter %in% names(pkg))) stop(paste("'chapter' must be: ",
paste(plist, collapse = ", ")),
sep = "")


pkg <- unlist(pkg[chapter])
pkg <- pkg[!is.na(pkg)]
pkg <- pkg[pkg != ""]
pkg <- pkg[order(tolower(pkg))]

install.packages(pkg, ...)
}
30 changes: 30 additions & 0 deletions R/panels.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
upperp <- function(...)
{
library(ellipse)
args <- list(...)
circ1 <- ellipse(diag(rep(1, 2)), t = .1)
panel.xyplot(circ1[,1], circ1[,2],
type = "l",
lty = trellis.par.get("reference.line")$lty,
col = trellis.par.get("reference.line")$col,
lwd = trellis.par.get("reference.line")$lwd)
circ2 <- ellipse(diag(rep(1, 2)), t = .2)
panel.xyplot(circ2[,1], circ2[,2],
type = "l",
lty = trellis.par.get("reference.line")$lty,
col = trellis.par.get("reference.line")$col,
lwd = trellis.par.get("reference.line")$lwd)
circ3 <- ellipse(diag(rep(1, 2)), t = .3)
panel.xyplot(circ3[,1], circ3[,2],
type = "l",
lty = trellis.par.get("reference.line")$lty,
col = trellis.par.get("reference.line")$col,
lwd = trellis.par.get("reference.line")$lwd)
panel.xyplot(args$x, args$y, groups = args$groups, subscripts = args$subscripts)
}

lowerp <- function(...)
{

}

28 changes: 28 additions & 0 deletions R/permuteRelief.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
permuteRelief <-
function(x, y, nperm = 100, ...)
{
library(CORElearn)
library(plyr)
library(reshape2)
dat <- x
dat$y <- y

obs <- attrEval(y ~ ., data = dat, ...)
permuted <- matrix(NA, ncol = length(obs), nrow = nperm)
colnames(permuted) <- names(obs)
for(i in 1:nperm)
{
dat$y <- sample(y)
permuted[i,] <- attrEval(y ~ ., data = dat, ...)
}
means <- colMeans(permuted)
sds <- apply(permuted, 2, sd)
permuted <- melt(permuted)
names(permuted)[2] <- "Predictor"
permuted$X1 <- NULL
list(standardized = (obs - means)/sds,
permutations = permuted,
observed = obs,
options = list(...))
}

16 changes: 16 additions & 0 deletions R/quadBoundaryFunc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
quadBoundaryFunc <-
function(n)
{
require(MASS)
sigma <- matrix(c(1,.7,.7,2),2,2)

tmpData <- data.frame(mvrnorm(n=n, c(1,0), sigma))
xSeq <- seq(-4, 4, length=40)
plotGrid <- expand.grid(x = xSeq, y = xSeq)
zFoo <- function(x, y) -1 - 2 * x - 0 * y - .2 * x^2 + 2 * y^2
z2p <- function(x) 1/(1+exp(-x))

tmpData$prob <- z2p(zFoo(tmpData$X1, tmpData$X2))
tmpData$class <- factor(ifelse(runif(length(tmpData$prob)) <= tmpData$prob, "Class1", "Class2"))
tmpData
}
2 changes: 2 additions & 0 deletions R/scriptLocation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
scriptLocation <- function() system.file("chapters", package = "AppliedPredictiveModeling")

59 changes: 59 additions & 0 deletions R/transparentTheme.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
transparentTheme <-
function(set = TRUE, pchSize = 1, trans = .2)
{
library(lattice)
theme <- list(plot.polygon = list(alpha = 1, col = "aliceblue", border = "black", lty = 1, lwd = 1),
background = list(col = "transparent"),
bar.fill = list(col = "#cce6ff"),
box.rectangle = list(col = "black"),
box.umbrella = list(col = "black"),
dot.line = list(col = "#e8e8e8"),
dot.symbol = list(col = "black"),
plot.line = list(col = "black"),
plot.symbol = list(col = "black"),
regions = list(col =
c("#FEF8FA", "#FDF6F9", "#FBF5F9", "#FAF3F8",
"#F8F2F7", "#F7F0F7", "#F5EEF6", "#F4EDF5",
"#F2EBF5", "#F1EAF4", "#EFE8F3", "#EDE7F2",
"#ECE5F1", "#EAE4F1", "#E8E2F0", "#E6E1EF",
"#E4DFEE", "#E2DEED", "#E0DCEC", "#DEDAEB",
"#DCD9EA", "#D9D7E9", "#D7D6E8", "#D4D4E7",
"#D1D2E6", "#CED1E5", "#CCCFE4", "#C8CEE3",
"#C5CCE2", "#C2CAE1", "#BFC9E0", "#BBC7DF",
"#B8C5DF", "#B4C4DE", "#B1C2DD", "#ADC0DC",
"#A9BFDB", "#A6BDDA", "#A2BBD9", "#9EB9D9",
"#9BB8D8", "#97B6D7", "#93B4D6", "#8FB2D5",
"#8BB0D4", "#87AFD3", "#83ADD2", "#7FABD1",
"#7AA9D0", "#76A7CF", "#71A5CE", "#6CA3CC",
"#68A1CB", "#63A0CA", "#5D9EC9", "#589CC8",
"#539AC6", "#4E98C5", "#4996C4", "#4493C3",
"#3F91C1", "#3A8FC0", "#358DBF", "#308BBE",
"#2C89BD", "#2887BC", "#2385BB", "#1F83BA",
"#1C80B9", "#187EB7", "#157CB6", "#127AB5",
"#0F78B3", "#0D76B2", "#0A73B0", "#0971AE",
"#076FAC", "#066DAA", "#056AA7", "#0568A5")
),
strip.shingle = list(col = c(
"#ff7f00", "#00ff00", "#00ffff",
"#ff00ff", "#ff0000", "#ffff00", "#0080ff")),
strip.background = list(col = c(
"#ffe5cc", "#ccffcc", "#ccffff",
"#ffccff", "#ffcccc", "#ffffcc", "#cce6ff")),
reference.line = list(col = "#e8e8e8"),
superpose.line = list(
col = c(
rgb(1, 0, 0, trans), rgb(0, 0, 1, trans),
rgb(0.3984375, 0.7578125, 0.6445312, max(.6, trans)),
rgb(0, 0, 0, trans)),
lty = rep(1:2, 6)),
superpose.symbol = list(
pch = c(16, 15, 17, 18, 16),
cex = rep(pchSize, 5),
col = c(
rgb(1, 0, 0, trans), rgb(0, 0, 1, trans),
rgb(0.3984375, 0.7578125, 0.6445312, max(.6, trans)),
rgb(0, 0, 0, trans))))

if(set) trellis.par.set(theme, warn = FALSE)
invisible(theme)
}
Binary file added data/AlzheimerDisease.RData
Binary file not shown.
Binary file added data/ChemicalManufacturingProcess.RData
Binary file not shown.
Binary file added data/FuelEconomy.RData
Binary file not shown.
Binary file added data/abalone.RData
Binary file not shown.
Binary file added data/concrete.RData
Binary file not shown.
12 changes: 12 additions & 0 deletions data/datalist
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
AlzheimerDisease: diagnosis predictors
ChemicalManufacturingProcess
FuelEconomy: cars2010 cars2011 cars2012
abalone
concrete: concrete mixtures
hepatic: bio chem injury
logisticCreditPredictions
permeability: fingerprints permeability
schedulingData
segmentationOriginal
solubility: solTestX solTestXtrans solTestY solTrainX solTrainXtrans solTrainY
twoClassData: classes predictors
Binary file added data/hepatic.RData
Binary file not shown.
Binary file added data/logisticCreditPredictions.RData
Binary file not shown.
Binary file added data/permeability.RData
Binary file not shown.
Binary file added data/schedulingData.RData
Binary file not shown.
Binary file added data/segmentationOriginal.RData
Binary file not shown.
Binary file added data/solubility.RData
Binary file not shown.
Binary file added data/twoClassData.RData
Binary file not shown.
9 changes: 9 additions & 0 deletions inst/NEWS.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
\name{NEWS}
\title{News for Package \pkg{AppliedPredictiveModeling}}
\newcommand{\cpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}}

\section{Changes in version 1.1-1}{
\itemize{
\item Initial Version
}}

Loading

0 comments on commit 0a5419b

Please sign in to comment.