NOTE: Access and run the source code for this notebook here.

Regression problems

Code for Figure 1.1:

df <- AmesHousing::make_ames()
x <- matrix(sort(df$Gr_Liv_Area)[floor(seq(1, nrow(df), length.out = 15))], 15, 1)
y <- matrix(sort(df$Year_Built)[floor(seq(1, nrow(df), length.out = 15))], 1, 15)
z <- 25051 + 3505*(log(x^.9) %*% log(y)) - 5*as.vector(x) 
c <- matrix(c(.92, .95, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, .95), 1, 15)
z <- sweep(z, MARGIN = 2, c, `*`)

par(mar = c(0.1, 0.1, 0.1, 0.1))
persp(
  x = x,
  y = y,
  z = z,
  xlab = "Square footage",
  ylab = "Year built",
  zlab = "Sale price",
  theta = -45,
  phi = 25,
  col = viridis::viridis(100)
)

Classification problems

Code for Figure 1.2:

# code to create graphic
library(DiagrammeR)
 grViz("
   
   digraph boxes_and_circles {
     node [shape = circle]
     x1; x2; x3;
     
     node [shape = box]
     Model;
     
     node [shape = triangle]
     Yes; No;
 
     x1->Model; x2->Model; x3->Model; Model->No; Model->Yes;
 }")

The data sets

# access data
ames <- AmesHousing::make_ames()

# initial dimension
dim(ames)
[1] 2930   81
# response variable
head(ames$Sale_Price)
[1] 215000 105000 172000 244000 189900 195500
# access data
attrition <- rsample::attrition

# initial dimension
dim(attrition)
[1] 1470   31
# response variable
head(attrition$Attrition)
[1] Yes No  Yes No  No  No 
Levels: No Yes
#access data
mnist <- dslabs::read_mnist()
names(mnist)
[1] "train" "test" 
# initial feature dimensions
dim(mnist$train$images)
[1] 60000   784
# response variable
head(mnist$train$labels)
[1] 5 0 4 1 9 2
# URL to download/read in the data
url <- "https://koalaverse.github.io/homlr/data/my_basket.csv"

# Access data
my_basket <- readr::read_csv(url)

# Print dimensions
dim(my_basket)
[1] 2000   42
# Peek at response variable
my_basket
LS0tCnRpdGxlOiAiQ2hhcHRlciAwMTogSW50cm9kdWN0aW9uIHRvIE1hY2hpbmUgTGVhcm5pbmciCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCl9fTk9URV9fOiBBY2Nlc3MgYW5kIHJ1biB0aGUgc291cmNlIGNvZGUgZm9yIHRoaXMgbm90ZWJvb2sgW2hlcmVdKGh0dHBzOi8vcnN0dWRpby5jbG91ZC9wcm9qZWN0LzgwMTE4NSkuIAoKIyMgUmVncmVzc2lvbiBwcm9ibGVtcwoKQ29kZSBmb3IgRmlndXJlIDEuMToKCmBgYHtyIGludHJvLXJlZ3Jlc3Npb24tcHJvYmxlbSwgZWNobz1UUlVFLCBmaWcuY2FwPSJBdmVyYWdlIGhvbWUgc2FsZXMgcHJpY2UgYXMgYSBmdW5jdGlvbiBvZiB5ZWFyIGJ1aWx0IGFuZCB0b3RhbCBzcXVhcmUgZm9vdGFnZS4iLCBmaWcuaGVpZ2h0PTMsIGZpZy53aWR0aD0zfQpkZiA8LSBBbWVzSG91c2luZzo6bWFrZV9hbWVzKCkKeCA8LSBtYXRyaXgoc29ydChkZiRHcl9MaXZfQXJlYSlbZmxvb3Ioc2VxKDEsIG5yb3coZGYpLCBsZW5ndGgub3V0ID0gMTUpKV0sIDE1LCAxKQp5IDwtIG1hdHJpeChzb3J0KGRmJFllYXJfQnVpbHQpW2Zsb29yKHNlcSgxLCBucm93KGRmKSwgbGVuZ3RoLm91dCA9IDE1KSldLCAxLCAxNSkKeiA8LSAyNTA1MSArIDM1MDUqKGxvZyh4Xi45KSAlKiUgbG9nKHkpKSAtIDUqYXMudmVjdG9yKHgpIApjIDwtIG1hdHJpeChjKC45MiwgLjk1LCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAxLCAuOTUpLCAxLCAxNSkKeiA8LSBzd2VlcCh6LCBNQVJHSU4gPSAyLCBjLCBgKmApCgpwYXIobWFyID0gYygwLjEsIDAuMSwgMC4xLCAwLjEpKQpwZXJzcCgKICB4ID0geCwKICB5ID0geSwKICB6ID0geiwKICB4bGFiID0gIlNxdWFyZSBmb290YWdlIiwKICB5bGFiID0gIlllYXIgYnVpbHQiLAogIHpsYWIgPSAiU2FsZSBwcmljZSIsCiAgdGhldGEgPSAtNDUsCiAgcGhpID0gMjUsCiAgY29sID0gdmlyaWRpczo6dmlyaWRpcygxMDApCikKYGBgCgojIyBDbGFzc2lmaWNhdGlvbiBwcm9ibGVtcwoKQ29kZSBmb3IgRmlndXJlIDEuMjoKICAgIApgYGB7ciBjbGFzc2lmaWNhdGlvbi1wcm9ibGVtLCBlY2hvPVRSVUUsIG91dC53aWR0aD0iNzUlIiwgb3V0LmhlaWdodD0iNzUlIiwgZmlnLmNhcD0iQ2xhc3NpZmljYXRpb24gcHJvYmxlbSBtb2RlbGluZyB3aXRoICdZZXMnIGFuZCAnTm8nIHJlc3BvbnNlIGJhc2VkIG9uIHRocmVlIGZlYXR1cmVzLiIsIGNhY2hlPUZBTFNFfQojIGNvZGUgdG8gY3JlYXRlIGdyYXBoaWMKbGlicmFyeShEaWFncmFtbWVSKQogZ3JWaXooIgogICAKICAgZGlncmFwaCBib3hlc19hbmRfY2lyY2xlcyB7CiAgICAgbm9kZSBbc2hhcGUgPSBjaXJjbGVdCiAgICAgeDE7IHgyOyB4MzsKICAgICAKICAgICBub2RlIFtzaGFwZSA9IGJveF0KICAgICBNb2RlbDsKICAgICAKICAgICBub2RlIFtzaGFwZSA9IHRyaWFuZ2xlXQogICAgIFllczsgTm87CiAKICAgICB4MS0+TW9kZWw7IHgyLT5Nb2RlbDsgeDMtPk1vZGVsOyBNb2RlbC0+Tm87IE1vZGVsLT5ZZXM7CiB9IikKYGBgCgojIyBUaGUgZGF0YSBzZXRzCgoqIEFtZXMgaG9tZXMgc2FsZXMgZGF0YQogICAgCmBgYHtyIGltcG9ydC1hbWVzLWRhdGF9CiMgYWNjZXNzIGRhdGEKYW1lcyA8LSBBbWVzSG91c2luZzo6bWFrZV9hbWVzKCkKCiMgaW5pdGlhbCBkaW1lbnNpb24KZGltKGFtZXMpCgojIHJlc3BvbnNlIHZhcmlhYmxlCmhlYWQoYW1lcyRTYWxlX1ByaWNlKQpgYGAKICAgIAoqIEVtcGxveWVlIGF0dHJpdGlvbiBpbmZvcm1hdGlvbiBvcmlnaW5hbGx5IHByb3ZpZGVkIGJ5IFtJQk0gV2F0c29uIEFuYWx5dGljcyBMYWJdKGh0dHBzOi8vd3d3LmlibS5jb20vY29tbXVuaXRpZXMvYW5hbHl0aWNzL3dhdHNvbi1hbmFseXRpY3MtYmxvZy9oci1lbXBsb3llZS1hdHRyaXRpb24vKS4KCmBgYHtyIGltcG9ydC1hdHRyaXRpb24tZGF0YX0KIyBhY2Nlc3MgZGF0YQphdHRyaXRpb24gPC0gcnNhbXBsZTo6YXR0cml0aW9uCgojIGluaXRpYWwgZGltZW5zaW9uCmRpbShhdHRyaXRpb24pCgojIHJlc3BvbnNlIHZhcmlhYmxlCmhlYWQoYXR0cml0aW9uJEF0dHJpdGlvbikKYGBgICAgIAogICAgCiogTU5JU1QgaGFuZHdyaXR0ZW4gbnVtYmVycyBkYXRhIAoKYGBge3IgaW1wb3J0LW1uaXN0LWRhdGEsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CiNhY2Nlc3MgZGF0YQptbmlzdCA8LSBkc2xhYnM6OnJlYWRfbW5pc3QoKQpuYW1lcyhtbmlzdCkKCiMgaW5pdGlhbCBmZWF0dXJlIGRpbWVuc2lvbnMKZGltKG1uaXN0JHRyYWluJGltYWdlcykKCiMgcmVzcG9uc2UgdmFyaWFibGUKaGVhZChtbmlzdCR0cmFpbiRsYWJlbHMpCmBgYCAgIAoKKiBHcm9jZXJ5IGl0ZW1zIGFuZCBxdWFudGl0aWVzIHB1cmNoYXNlZC4gCgpgYGB7ciBpbXBvcnQtbXliYXNrZXQtZGF0YSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KIyBVUkwgdG8gZG93bmxvYWQvcmVhZCBpbiB0aGUgZGF0YQp1cmwgPC0gImh0dHBzOi8va29hbGF2ZXJzZS5naXRodWIuaW8vaG9tbHIvZGF0YS9teV9iYXNrZXQuY3N2IgoKIyBBY2Nlc3MgZGF0YQpteV9iYXNrZXQgPC0gcmVhZHI6OnJlYWRfY3N2KHVybCkKCiMgUHJpbnQgZGltZW5zaW9ucwpkaW0obXlfYmFza2V0KQoKIyBQZWVrIGF0IHJlc3BvbnNlIHZhcmlhYmxlCm15X2Jhc2tldApgYGAgICA=