NOTE: Access and run the source code for this notebook here.
Regression problems
Code for Figure 1.1:
df <- AmesHousing::make_ames()
x <- matrix(sort(df$Gr_Liv_Area)[floor(seq(1, nrow(df), length.out = 15))], 15, 1)
y <- matrix(sort(df$Year_Built)[floor(seq(1, nrow(df), length.out = 15))], 1, 15)
z <- 25051 + 3505*(log(x^.9) %*% log(y)) - 5*as.vector(x)
c <- matrix(c(.92, .95, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, .95), 1, 15)
z <- sweep(z, MARGIN = 2, c, `*`)
par(mar = c(0.1, 0.1, 0.1, 0.1))
persp(
x = x,
y = y,
z = z,
xlab = "Square footage",
ylab = "Year built",
zlab = "Sale price",
theta = -45,
phi = 25,
col = viridis::viridis(100)
)
Classification problems
Code for Figure 1.2:
# code to create graphic
library(DiagrammeR)
grViz("
digraph boxes_and_circles {
node [shape = circle]
x1; x2; x3;
node [shape = box]
Model;
node [shape = triangle]
Yes; No;
x1->Model; x2->Model; x3->Model; Model->No; Model->Yes;
}")
The data sets
# access data
ames <- AmesHousing::make_ames()
# initial dimension
dim(ames)
[1] 2930 81
# response variable
head(ames$Sale_Price)
[1] 215000 105000 172000 244000 189900 195500
# access data
attrition <- rsample::attrition
# initial dimension
dim(attrition)
[1] 1470 31
# response variable
head(attrition$Attrition)
[1] Yes No Yes No No No
Levels: No Yes
- MNIST handwritten numbers data
#access data
mnist <- dslabs::read_mnist()
names(mnist)
[1] "train" "test"
# initial feature dimensions
dim(mnist$train$images)
[1] 60000 784
# response variable
head(mnist$train$labels)
[1] 5 0 4 1 9 2
- Grocery items and quantities purchased.
# URL to download/read in the data
url <- "https://koalaverse.github.io/homlr/data/my_basket.csv"
# Access data
my_basket <- readr::read_csv(url)
# Print dimensions
dim(my_basket)
[1] 2000 42
# Peek at response variable
my_basket
LS0tCnRpdGxlOiAiQ2hhcHRlciAwMTogSW50cm9kdWN0aW9uIHRvIE1hY2hpbmUgTGVhcm5pbmciCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCl9fTk9URV9fOiBBY2Nlc3MgYW5kIHJ1biB0aGUgc291cmNlIGNvZGUgZm9yIHRoaXMgbm90ZWJvb2sgW2hlcmVdKGh0dHBzOi8vcnN0dWRpby5jbG91ZC9wcm9qZWN0LzgwMTE4NSkuIAoKIyMgUmVncmVzc2lvbiBwcm9ibGVtcwoKQ29kZSBmb3IgRmlndXJlIDEuMToKCmBgYHtyIGludHJvLXJlZ3Jlc3Npb24tcHJvYmxlbSwgZWNobz1UUlVFLCBmaWcuY2FwPSJBdmVyYWdlIGhvbWUgc2FsZXMgcHJpY2UgYXMgYSBmdW5jdGlvbiBvZiB5ZWFyIGJ1aWx0IGFuZCB0b3RhbCBzcXVhcmUgZm9vdGFnZS4iLCBmaWcuaGVpZ2h0PTMsIGZpZy53aWR0aD0zfQpkZiA8LSBBbWVzSG91c2luZzo6bWFrZV9hbWVzKCkKeCA8LSBtYXRyaXgoc29ydChkZiRHcl9MaXZfQXJlYSlbZmxvb3Ioc2VxKDEsIG5yb3coZGYpLCBsZW5ndGgub3V0ID0gMTUpKV0sIDE1LCAxKQp5IDwtIG1hdHJpeChzb3J0KGRmJFllYXJfQnVpbHQpW2Zsb29yKHNlcSgxLCBucm93KGRmKSwgbGVuZ3RoLm91dCA9IDE1KSldLCAxLCAxNSkKeiA8LSAyNTA1MSArIDM1MDUqKGxvZyh4Xi45KSAlKiUgbG9nKHkpKSAtIDUqYXMudmVjdG9yKHgpIApjIDwtIG1hdHJpeChjKC45MiwgLjk1LCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAuOTUpLCAxLCAxNSkKeiA8LSBzd2VlcCh6LCBNQVJHSU4gPSAyLCBjLCBgKmApCgpwYXIobWFyID0gYygwLjEsIDAuMSwgMC4xLCAwLjEpKQpwZXJzcCgKICB4ID0geCwKICB5ID0geSwKICB6ID0geiwKICB4bGFiID0gIlNxdWFyZSBmb290YWdlIiwKICB5bGFiID0gIlllYXIgYnVpbHQiLAogIHpsYWIgPSAiU2FsZSBwcmljZSIsCiAgdGhldGEgPSAtNDUsCiAgcGhpID0gMjUsCiAgY29sID0gdmlyaWRpczo6dmlyaWRpcygxMDApCikKYGBgCgojIyBDbGFzc2lmaWNhdGlvbiBwcm9ibGVtcwoKQ29kZSBmb3IgRmlndXJlIDEuMjoKICAgIApgYGB7ciBjbGFzc2lmaWNhdGlvbi1wcm9ibGVtLCBlY2hvPVRSVUUsIG91dC53aWR0aD0iNzUlIiwgb3V0LmhlaWdodD0iNzUlIiwgZmlnLmNhcD0iQ2xhc3NpZmljYXRpb24gcHJvYmxlbSBtb2RlbGluZyB3aXRoICdZZXMnIGFuZCAnTm8nIHJlc3BvbnNlIGJhc2VkIG9uIHRocmVlIGZlYXR1cmVzLiIsIGNhY2hlPUZBTFNFfQojIGNvZGUgdG8gY3JlYXRlIGdyYXBoaWMKbGlicmFyeShEaWFncmFtbWVSKQogZ3JWaXooIgogICAKICAgZGlncmFwaCBib3hlc19hbmRfY2lyY2xlcyB7CiAgICAgbm9kZSBbc2hhcGUgPSBjaXJjbGVdCiAgICAgeDE7IHgyOyB4MzsKICAgICAKICAgICBub2RlIFtzaGFwZSA9IGJveF0KICAgICBNb2RlbDsKICAgICAKICAgICBub2RlIFtzaGFwZSA9IHRyaWFuZ2xlXQogICAgIFllczsgTm87CiAKICAgICB4MS0+TW9kZWw7IHgyLT5Nb2RlbDsgeDMtPk1vZGVsOyBNb2RlbC0+Tm87IE1vZGVsLT5ZZXM7CiB9IikKYGBgCgojIyBUaGUgZGF0YSBzZXRzCgoqIEFtZXMgaG9tZXMgc2FsZXMgZGF0YQogICAgCmBgYHtyIGltcG9ydC1hbWVzLWRhdGF9CiMgYWNjZXNzIGRhdGEKYW1lcyA8LSBBbWVzSG91c2luZzo6bWFrZV9hbWVzKCkKCiMgaW5pdGlhbCBkaW1lbnNpb24KZGltKGFtZXMpCgojIHJlc3BvbnNlIHZhcmlhYmxlCmhlYWQoYW1lcyRTYWxlX1ByaWNlKQpgYGAKICAgIAoqIEVtcGxveWVlIGF0dHJpdGlvbiBpbmZvcm1hdGlvbiBvcmlnaW5hbGx5IHByb3ZpZGVkIGJ5IFtJQk0gV2F0c29uIEFuYWx5dGljcyBMYWJdKGh0dHBzOi8vd3d3LmlibS5jb20vY29tbXVuaXRpZXMvYW5hbHl0aWNzL3dhdHNvbi1hbmFseXRpY3MtYmxvZy9oci1lbXBsb3llZS1hdHRyaXRpb24vKS4KCmBgYHtyIGltcG9ydC1hdHRyaXRpb24tZGF0YX0KIyBhY2Nlc3MgZGF0YQphdHRyaXRpb24gPC0gcnNhbXBsZTo6YXR0cml0aW9uCgojIGluaXRpYWwgZGltZW5zaW9uCmRpbShhdHRyaXRpb24pCgojIHJlc3BvbnNlIHZhcmlhYmxlCmhlYWQoYXR0cml0aW9uJEF0dHJpdGlvbikKYGBgICAgIAogICAgCiogTU5JU1QgaGFuZHdyaXR0ZW4gbnVtYmVycyBkYXRhIAoKYGBge3IgaW1wb3J0LW1uaXN0LWRhdGEsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CiNhY2Nlc3MgZGF0YQptbmlzdCA8LSBkc2xhYnM6OnJlYWRfbW5pc3QoKQpuYW1lcyhtbmlzdCkKCiMgaW5pdGlhbCBmZWF0dXJlIGRpbWVuc2lvbnMKZGltKG1uaXN0JHRyYWluJGltYWdlcykKCiMgcmVzcG9uc2UgdmFyaWFibGUKaGVhZChtbmlzdCR0cmFpbiRsYWJlbHMpCmBgYCAgIAoKKiBHcm9jZXJ5IGl0ZW1zIGFuZCBxdWFudGl0aWVzIHB1cmNoYXNlZC4gCgpgYGB7ciBpbXBvcnQtbXliYXNrZXQtZGF0YSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KIyBVUkwgdG8gZG93bmxvYWQvcmVhZCBpbiB0aGUgZGF0YQp1cmwgPC0gImh0dHBzOi8va29hbGF2ZXJzZS5naXRodWIuaW8vaG9tbHIvZGF0YS9teV9iYXNrZXQuY3N2IgoKIyBBY2Nlc3MgZGF0YQpteV9iYXNrZXQgPC0gcmVhZHI6OnJlYWRfY3N2KHVybCkKCiMgUHJpbnQgZGltZW5zaW9ucwpkaW0obXlfYmFza2V0KQoKIyBQZWVrIGF0IHJlc3BvbnNlIHZhcmlhYmxlCm15X2Jhc2tldApgYGAgICA=