Note: Some results may differ from the hard copy book due to the changing of sampling procedures introduced in R 3.6.0. See http://bit.ly/35D1SW7 for more details. Access and run the source code for this notebook here.

Hidden chapter requirements used in the book to set the plotting theme and load packages used in hidden code chunks:

knitr::opts_chunk$set(
  message = FALSE, 
  warning = FALSE, 
  cache = FALSE
)

# Set the graphical theme
ggplot2::theme_set(ggplot2::theme_light())

# Load required packages
library(tidyr)
library(purrr)

Prerequisites

For this chapter we’ll use the following packages:

# Helper packages
library(dplyr)       # for data manipulation
library(ggplot2)     # for data visualization
library(stringr)     # for string functionality

# Modeling packages
library(cluster)     # for general clustering algorithms
library(factoextra)  # for visualizing cluster results

To illustrate k-means concepts we’ll use the mnist and my_basket data sets:

mnist <- dslabs::read_mnist()

url <- "https://koalaverse.github.io/homlr/data/my_basket.csv"
my_basket <- readr::read_csv(url)

Distance measures

Figure 20.1:

# generate data
corr_ex <- tibble(
  v = 1:20,
  obs_1 = sample(5:7, 20, replace = TRUE),
  obs_2 = sample(4:10, 20, replace = TRUE)
) %>%
  mutate(obs_3 = obs_2 * 2 + sample(0:1, 1))

corr_ex %>%
  gather(Observation, value, obs_1:obs_3) %>%
  ggplot(aes(v, value, color = Observation)) +
  geom_line(size = 1) +
  scale_colour_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) +
  scale_x_continuous("Variable index") +
  scale_y_continuous("Some arbitrary measure")

Defining clusters

Figure 20.2:

# Generate data
create_data <- function(sd) {
  data_frame(
    x1 = c(rnorm(100, sd = sd), rnorm(100, sd = sd) + 3),
    x2 = c(rnorm(100, sd = sd), rnorm(100, sd = sd) - 2)
  ) %>%
    mutate(`W(Ck)` = case_when(
      sd == 0.5  ~ "Best",
      sd == 0.75 ~ "Better",
      sd == 1   ~ "Good"
    ))
}
df <- map(c(0.5, 0.75, 1), create_data)

# Compute and add cluster info to data
k2 <- map(df, ~ kmeans(.x[, 1:2], 2, nstart = 20))
df <- map2(df, k2, ~ mutate(.x, cluster = .y$cluster)) %>%
  map2_dfr(k2, ~ inner_join(.x, .y$centers %>% 
                          as.data.frame() %>% 
                          mutate(cluster = row_number()), by = "cluster")
       ) %>%
  rename(x1 = x1.x, x2 = x2.x, x_center = x1.y, y_center = x2.y) %>%
  mutate(`W(Ck)` = factor(`W(Ck)`, levels = c("Good", "Better", "Best")))

# Plot results
df %>%
  ggplot(aes(colour = factor(cluster))) +
  facet_wrap(~ `W(Ck)`) +
  geom_segment(aes(x = x1, xend = x_center, y = x2, yend = y_center), lty = "dashed", alpha = .5) +
  geom_point(aes(x_center, y_center), size = 4) +
  geom_point(aes(x1, x2), show.legend = FALSE, alpha = .5) +
  scale_x_continuous(bquote(X[1]), breaks = NULL, labels = NULL) +
  scale_y_continuous(bquote(X[2]), breaks = NULL, labels = NULL) +
  theme(legend.position = "none")

Figure 20.3:

# Generate data
set.seed(111)
obj <- mlbench::mlbench.spirals(200, 1, 0.025)
df <- data.frame(
  x = obj$x[, 1],
  y = obj$x[, 2],
  class = obj$classes
)

# Plot data
p1 <- ggplot(df, aes(x, y)) +
  geom_point() +
  xlab(NULL) +
  ylab(NULL) +
  ggtitle('(A) Original spiral data')

# Run k-means
kmeans_on_spiral <- kmeans(df[, 1:2], 2)
df$kmeans_clusters <- kmeans_on_spiral$cluster
p2 <- ggplot(df, aes(x, y, color = kmeans_clusters)) +
  geom_point(show.legend = FALSE) +
  xlab(NULL) +
  ylab(NULL) +
  ggtitle('(B) k-means clusters')

# Plot results
sc <- kernlab::specc(as.matrix(df[, 1:2]), centers = 2)
df$spec_clusters <- sc@.Data
p3 <- ggplot(df, aes(x, y, color = spec_clusters)) +
  geom_point(show.legend = FALSE) +
  xlab(NULL) +
  ylab(NULL) +
  ggtitle('(C) Spectral clusters')

# Display plots side by side
gridExtra::grid.arrange(p1, p2, p3, nrow = 1)

k-means algorithm

Figure 20.4:

# Generate data
df <- data_frame(
    x1 = c(rnorm(100), rnorm(100) + 3),
    x2 = c(rnorm(100), rnorm(100) - 2)
)

# Compute and plot results
map(1:6, ~ kmeans(df, 3)) %>%
  map2_dfr(1:6, ~ df %>% mutate(
    cluster = .x$cluster,
    name = paste0("Iteration: ", .y, ";  W(Ck): ", round(.x$tot.withinss, 2))
    )) %>%
  ggplot(aes(x1, x2, colour = cluster)) +
  geom_point(show.legend = FALSE, size = 1) +
  facet_wrap(~ name, nrow = 2)

Clustering digits

features <- mnist$train$images

# Use k-means model with 10 centers and 10 random starts
mnist_clustering <- kmeans(features, centers = 10, nstart = 10)

# Print contents of the model output
str(mnist_clustering)
List of 9
 $ cluster     : int [1:60000] 2 9 4 1 7 3 8 2 8 7 ...
 $ centers     : num [1:10, 1:784] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:10] "1" "2" "3" "4" ...
  .. ..$ : NULL
 $ totss       : num 2.06e+11
 $ withinss    : num [1:10] 1.04e+10 1.81e+10 1.51e+10 2.41e+10 1.58e+10 ...
 $ tot.withinss: num 1.53e+11
 $ betweenss   : num 5.27e+10
 $ size        : int [1:10] 5620 6544 4681 8918 5717 7447 8837 5967 3176 3093
 $ iter        : int 10
 $ ifault      : int 0
 - attr(*, "class")= chr "kmeans"
# Extract cluster centers
mnist_centers <- mnist_clustering$centers

# Plot typical cluster digits
par(mfrow = c(2, 5), mar = c(0.5, 0.5, 0.5, 0.5))
layout(matrix(seq_len(nrow(mnist_centers)), 2, 5, byrow = FALSE))
for (i in seq_len(nrow(mnist_centers))) {
  image(matrix(mnist_centers[i, ], 28, 28)[, 28:1], 
        col = gray.colors(12, rev = TRUE), xaxt = "n", yaxt = "n")
}

# Create mode function
mode_fun <- function(x){  
  which.max(tabulate(x))
}

mnist_comparison <- data.frame(
  cluster = mnist_clustering$cluster,
  actual = mnist$train$labels
) %>%
  group_by(cluster) %>%
  mutate(mode = mode_fun(actual)) %>%
  ungroup() %>%
  mutate_all(factor, levels = 0:9)

# Create confusion matrix and plot results
yardstick::conf_mat(
  mnist_comparison, 
  truth = actual, 
  estimate = mode
) %>%
  autoplot(type = 'heatmap')

How many clusters?

fviz_nbclust(
  my_basket, 
  kmeans, 
  k.max = 25,
  method = "wss",
  diss = get_dist(my_basket, method = "spearman")
)

Clustering with mixed data

# Full ames data set --> recode ordinal variables to numeric
ames_full <- AmesHousing::make_ames() %>%
  mutate_if(str_detect(names(.), 'Qual|Cond|QC|Qu'), as.numeric)

# One-hot encode --> retain only the features and not sale price
full_rank  <- caret::dummyVars(Sale_Price ~ ., data = ames_full, 
                               fullRank = TRUE)
ames_1hot <- predict(full_rank, ames_full)

# Scale data
ames_1hot_scaled <- scale(ames_1hot)

# New dimensions
dim(ames_1hot_scaled)
[1] 2930  240
set.seed(123)

fviz_nbclust(
  ames_1hot_scaled, 
  kmeans, 
  method = "wss", 
  k.max = 25, 
  verbose = FALSE
)

# Original data minus Sale_Price
ames_full <- AmesHousing::make_ames() %>% select(-Sale_Price)

# Compute Gower distance for original data
gower_dst <- daisy(ames_full, metric = "gower")
# You can supply the Gower distance matrix to several clustering algos
pam_gower <- pam(x = gower_dst, k = 8, diss = TRUE)
diana_gower <- diana(x = gower_dst, diss = TRUE)
agnes_gower <- agnes(x = gower_dst, diss = TRUE)

Alternative partitioning methods

fviz_nbclust(
  ames_1hot_scaled, 
  pam, 
  method = "wss", 
  k.max = 25, 
  verbose = FALSE
)

# k-means computation time on MNIST data
system.time(kmeans(features, centers = 10))
   user  system elapsed 
 20.437   0.517  20.971 
# CLARA computation time on MNIST data
system.time(clara(features, k = 10))
   user  system elapsed 
 36.450   0.191  36.659 
LS0tCnRpdGxlOiAiQ2hhcHRlciAyMDogSy1tZWFucyBDbHVzdGVyaW5nIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpfX05vdGVfXzogU29tZSByZXN1bHRzIG1heSBkaWZmZXIgZnJvbSB0aGUgaGFyZCBjb3B5IGJvb2sgZHVlIHRvIHRoZSBjaGFuZ2luZyBvZgpzYW1wbGluZyBwcm9jZWR1cmVzIGludHJvZHVjZWQgaW4gUiAzLjYuMC4gU2VlIGh0dHA6Ly9iaXQubHkvMzVEMVNXNyBmb3IgbW9yZQpkZXRhaWxzLiBBY2Nlc3MgYW5kIHJ1biB0aGUgc291cmNlIGNvZGUgZm9yIHRoaXMgbm90ZWJvb2sgW2hlcmVdKGh0dHBzOi8vcnN0dWRpby5jbG91ZC9wcm9qZWN0LzgwMTE4NSkuCgpIaWRkZW4gY2hhcHRlciByZXF1aXJlbWVudHMgdXNlZCBpbiB0aGUgYm9vayB0byBzZXQgdGhlIHBsb3R0aW5nIHRoZW1lIGFuZCBsb2FkCnBhY2thZ2VzIHVzZWQgaW4gaGlkZGVuIGNvZGUgY2h1bmtzOgoKYGBge3Igc2V0dXB9CmtuaXRyOjpvcHRzX2NodW5rJHNldCgKICBtZXNzYWdlID0gRkFMU0UsIAogIHdhcm5pbmcgPSBGQUxTRSwgCiAgY2FjaGUgPSBGQUxTRQopCgojIFNldCB0aGUgZ3JhcGhpY2FsIHRoZW1lCmdncGxvdDI6OnRoZW1lX3NldChnZ3Bsb3QyOjp0aGVtZV9saWdodCgpKQoKIyBMb2FkIHJlcXVpcmVkIHBhY2thZ2VzCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkocHVycnIpCmBgYAoKIyMgUHJlcmVxdWlzaXRlcwoKRm9yIHRoaXMgY2hhcHRlciB3ZeKAmWxsIHVzZSB0aGUgZm9sbG93aW5nIHBhY2thZ2VzOgoKYGBge3Iga21lYW5zLXBrZ3N9CiMgSGVscGVyIHBhY2thZ2VzCmxpYnJhcnkoZHBseXIpICAgICAgICMgZm9yIGRhdGEgbWFuaXB1bGF0aW9uCmxpYnJhcnkoZ2dwbG90MikgICAgICMgZm9yIGRhdGEgdmlzdWFsaXphdGlvbgpsaWJyYXJ5KHN0cmluZ3IpICAgICAjIGZvciBzdHJpbmcgZnVuY3Rpb25hbGl0eQoKIyBNb2RlbGluZyBwYWNrYWdlcwpsaWJyYXJ5KGNsdXN0ZXIpICAgICAjIGZvciBnZW5lcmFsIGNsdXN0ZXJpbmcgYWxnb3JpdGhtcwpsaWJyYXJ5KGZhY3RvZXh0cmEpICAjIGZvciB2aXN1YWxpemluZyBjbHVzdGVyIHJlc3VsdHMKYGBgCgpUbyBpbGx1c3RyYXRlIF9rXy1tZWFucyBjb25jZXB0cyB3ZSdsbCB1c2UgdGhlIGBtbmlzdGAgYW5kIGBteV9iYXNrZXRgIGRhdGEgc2V0czoKCmBgYHtyIGttZWFucy1kYXRhfQptbmlzdCA8LSBkc2xhYnM6OnJlYWRfbW5pc3QoKQoKdXJsIDwtICJodHRwczovL2tvYWxhdmVyc2UuZ2l0aHViLmlvL2hvbWxyL2RhdGEvbXlfYmFza2V0LmNzdiIKbXlfYmFza2V0IDwtIHJlYWRyOjpyZWFkX2Nzdih1cmwpCmBgYAoKIyMgRGlzdGFuY2UgbWVhc3VyZXMKCkZpZ3VyZSAyMC4xOgoKYGBge3IgY29ycmVsYXRpb24tZGlzdGFuY2UtZXhhbXBsZSwgZmlnLndpZHRoPTcsIGZpZy5jYXA9J0NvcnJlbGF0aW9uLWJhc2VkIGRpc3RhbmNlIG1lYXN1cmVzIHdpbGwgY2FwdHVyZSB0aGUgY29ycmVsYXRpb24gYmV0d2VlbiB0d28gb2JzZXJ2YXRpb25zIGJldHRlciB0aGFuIGEgbm9uLWNvcnJlbGF0aW9uLWJhc2VkIGRpc3RhbmNlIG1lYXN1cmU7IHJlZ2FyZGxlc3Mgb2YgbWFnbml0dWRlIGRpZmZlcmVuY2VzLid9CiMgZ2VuZXJhdGUgZGF0YQpjb3JyX2V4IDwtIHRpYmJsZSgKICB2ID0gMToyMCwKICBvYnNfMSA9IHNhbXBsZSg1OjcsIDIwLCByZXBsYWNlID0gVFJVRSksCiAgb2JzXzIgPSBzYW1wbGUoNDoxMCwgMjAsIHJlcGxhY2UgPSBUUlVFKQopICU+JQogIG11dGF0ZShvYnNfMyA9IG9ic18yICogMiArIHNhbXBsZSgwOjEsIDEpKQoKY29ycl9leCAlPiUKICBnYXRoZXIoT2JzZXJ2YXRpb24sIHZhbHVlLCBvYnNfMTpvYnNfMykgJT4lCiAgZ2dwbG90KGFlcyh2LCB2YWx1ZSwgY29sb3IgPSBPYnNlcnZhdGlvbikpICsKICBnZW9tX2xpbmUoc2l6ZSA9IDEpICsKICBzY2FsZV9jb2xvdXJfbWFudWFsKHZhbHVlcyA9IGMoIiMwMEFGQkIiLCAiI0U3QjgwMCIsICIjRkM0RTA3IikpICsKICBzY2FsZV94X2NvbnRpbnVvdXMoIlZhcmlhYmxlIGluZGV4IikgKwogIHNjYWxlX3lfY29udGludW91cygiU29tZSBhcmJpdHJhcnkgbWVhc3VyZSIpCmBgYAoKCiMjIERlZmluaW5nIGNsdXN0ZXJzCgpGaWd1cmUgMjAuMjoKCmBgYHtyIGttZWFucy1jbHVzdGVycy1nb29kLWJldHRlci1iZXN0LCBmaWcuaGVpZ2h0PTMuNSwgZmlnLndpZHRoPTEwLCBmaWcuY2FwPSJUb3RhbCB3aXRoaW4tY2x1c3RlciB2YXJpYXRpb24gY2FwdHVyZXMgdGhlIHRvdGFsIGRpc3RhbmNlcyBiZXR3ZWVuIGEgY2x1c3RlcidzIGNlbnRyb2lkIGFuZCB0aGUgaW5kaXZpZHVhbCBvYnNlcnZhdGlvbnMgYXNzaWduZWQgdG8gdGhhdCBjbHVzdGVyLiBUaGUgbW9yZSBjb21wYWN0IHRoZSB0aGVzZSBkaXN0YW5jZXMsIHRoZSBtb3JlIGRlZmluZWQgYW5kIGlzb2xhdGVkIHRoZSBjbHVzdGVycyBhcmUuIn0KIyBHZW5lcmF0ZSBkYXRhCmNyZWF0ZV9kYXRhIDwtIGZ1bmN0aW9uKHNkKSB7CiAgZGF0YV9mcmFtZSgKICAgIHgxID0gYyhybm9ybSgxMDAsIHNkID0gc2QpLCBybm9ybSgxMDAsIHNkID0gc2QpICsgMyksCiAgICB4MiA9IGMocm5vcm0oMTAwLCBzZCA9IHNkKSwgcm5vcm0oMTAwLCBzZCA9IHNkKSAtIDIpCiAgKSAlPiUKICAgIG11dGF0ZShgVyhDaylgID0gY2FzZV93aGVuKAogICAgICBzZCA9PSAwLjUgIH4gIkJlc3QiLAogICAgICBzZCA9PSAwLjc1IH4gIkJldHRlciIsCiAgICAgIHNkID09IDEgICB+ICJHb29kIgogICAgKSkKfQpkZiA8LSBtYXAoYygwLjUsIDAuNzUsIDEpLCBjcmVhdGVfZGF0YSkKCiMgQ29tcHV0ZSBhbmQgYWRkIGNsdXN0ZXIgaW5mbyB0byBkYXRhCmsyIDwtIG1hcChkZiwgfiBrbWVhbnMoLnhbLCAxOjJdLCAyLCBuc3RhcnQgPSAyMCkpCmRmIDwtIG1hcDIoZGYsIGsyLCB+IG11dGF0ZSgueCwgY2x1c3RlciA9IC55JGNsdXN0ZXIpKSAlPiUKICBtYXAyX2RmcihrMiwgfiBpbm5lcl9qb2luKC54LCAueSRjZW50ZXJzICU+JSAKICAgICAgICAgICAgICAgICAgICAgICAgICBhcy5kYXRhLmZyYW1lKCkgJT4lIAogICAgICAgICAgICAgICAgICAgICAgICAgIG11dGF0ZShjbHVzdGVyID0gcm93X251bWJlcigpKSwgYnkgPSAiY2x1c3RlciIpCiAgICAgICApICU+JQogIHJlbmFtZSh4MSA9IHgxLngsIHgyID0geDIueCwgeF9jZW50ZXIgPSB4MS55LCB5X2NlbnRlciA9IHgyLnkpICU+JQogIG11dGF0ZShgVyhDaylgID0gZmFjdG9yKGBXKENrKWAsIGxldmVscyA9IGMoIkdvb2QiLCAiQmV0dGVyIiwgIkJlc3QiKSkpCgojIFBsb3QgcmVzdWx0cwpkZiAlPiUKICBnZ3Bsb3QoYWVzKGNvbG91ciA9IGZhY3RvcihjbHVzdGVyKSkpICsKICBmYWNldF93cmFwKH4gYFcoQ2spYCkgKwogIGdlb21fc2VnbWVudChhZXMoeCA9IHgxLCB4ZW5kID0geF9jZW50ZXIsIHkgPSB4MiwgeWVuZCA9IHlfY2VudGVyKSwgbHR5ID0gImRhc2hlZCIsIGFscGhhID0gLjUpICsKICBnZW9tX3BvaW50KGFlcyh4X2NlbnRlciwgeV9jZW50ZXIpLCBzaXplID0gNCkgKwogIGdlb21fcG9pbnQoYWVzKHgxLCB4MiksIHNob3cubGVnZW5kID0gRkFMU0UsIGFscGhhID0gLjUpICsKICBzY2FsZV94X2NvbnRpbnVvdXMoYnF1b3RlKFhbMV0pLCBicmVha3MgPSBOVUxMLCBsYWJlbHMgPSBOVUxMKSArCiAgc2NhbGVfeV9jb250aW51b3VzKGJxdW90ZShYWzJdKSwgYnJlYWtzID0gTlVMTCwgbGFiZWxzID0gTlVMTCkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikKYGBgCgpGaWd1cmUgMjAuMzoKCmBgYHtyIG5vbi1saW5lYXItYm91bmRhcmllcywgZmlnLmNhcD0nVGhlIGFzc3VtcHRpb25zIG9mIGstbWVhbnMgbGVuZHMgaXQgaW5lZmZlY3RpdmUgaW4gY2FwdHVyaW5nIGNvbXBsZXggZ2VvbWV0cmljIGdyb3VwaW5nczsgaG93ZXZlciwgc3BlY3RyYWwgY2x1c3RlcmluZyBhbGxvd3MgeW91IHRvIGNsdXN0ZXIgZGF0YSB0aGF0IGlzIGNvbm5lY3RlZCBidXQgbm90IG5lY2Vzc2FyaWx5IGNsdXN0ZXJlZCB3aXRoaW4gY29udmV4IGJvdW5kYXJpZXMuJ30KIyBHZW5lcmF0ZSBkYXRhCnNldC5zZWVkKDExMSkKb2JqIDwtIG1sYmVuY2g6Om1sYmVuY2guc3BpcmFscygyMDAsIDEsIDAuMDI1KQpkZiA8LSBkYXRhLmZyYW1lKAogIHggPSBvYmokeFssIDFdLAogIHkgPSBvYmokeFssIDJdLAogIGNsYXNzID0gb2JqJGNsYXNzZXMKKQoKIyBQbG90IGRhdGEKcDEgPC0gZ2dwbG90KGRmLCBhZXMoeCwgeSkpICsKICBnZW9tX3BvaW50KCkgKwogIHhsYWIoTlVMTCkgKwogIHlsYWIoTlVMTCkgKwogIGdndGl0bGUoJyhBKSBPcmlnaW5hbCBzcGlyYWwgZGF0YScpCgojIFJ1biBrLW1lYW5zCmttZWFuc19vbl9zcGlyYWwgPC0ga21lYW5zKGRmWywgMToyXSwgMikKZGYka21lYW5zX2NsdXN0ZXJzIDwtIGttZWFuc19vbl9zcGlyYWwkY2x1c3RlcgpwMiA8LSBnZ3Bsb3QoZGYsIGFlcyh4LCB5LCBjb2xvciA9IGttZWFuc19jbHVzdGVycykpICsKICBnZW9tX3BvaW50KHNob3cubGVnZW5kID0gRkFMU0UpICsKICB4bGFiKE5VTEwpICsKICB5bGFiKE5VTEwpICsKICBnZ3RpdGxlKCcoQikgay1tZWFucyBjbHVzdGVycycpCgojIFBsb3QgcmVzdWx0cwpzYyA8LSBrZXJubGFiOjpzcGVjYyhhcy5tYXRyaXgoZGZbLCAxOjJdKSwgY2VudGVycyA9IDIpCmRmJHNwZWNfY2x1c3RlcnMgPC0gc2NALkRhdGEKcDMgPC0gZ2dwbG90KGRmLCBhZXMoeCwgeSwgY29sb3IgPSBzcGVjX2NsdXN0ZXJzKSkgKwogIGdlb21fcG9pbnQoc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIHhsYWIoTlVMTCkgKwogIHlsYWIoTlVMTCkgKwogIGdndGl0bGUoJyhDKSBTcGVjdHJhbCBjbHVzdGVycycpCgojIERpc3BsYXkgcGxvdHMgc2lkZSBieSBzaWRlCmdyaWRFeHRyYTo6Z3JpZC5hcnJhbmdlKHAxLCBwMiwgcDMsIG5yb3cgPSAxKQpgYGAKCgojIyBfa18tbWVhbnMgYWxnb3JpdGhtCgpGaWd1cmUgMjAuNDoKCmBgYHtyIHJhbmRvbS1zdGFydHMsIGZpZy5oZWlnaHQ9NiwgZmlnLndpZHRoPTEwLCBmaWcuY2FwPSdFYWNoIGFwcGxpY2F0aW9uIG9mIHRoZSBrLW1lYW5zIGFsZ29yaXRobSBjYW4gYWNoaWV2ZSBzbGlnaHQgZGlmZmVyZW5jZXMgaW4gdGhlIGZpbmFsIHJlc3VsdHMgYmFzZWQgb24gdGhlIHJhbmRvbSBzdGFydC4nfQojIEdlbmVyYXRlIGRhdGEKZGYgPC0gZGF0YV9mcmFtZSgKICAgIHgxID0gYyhybm9ybSgxMDApLCBybm9ybSgxMDApICsgMyksCiAgICB4MiA9IGMocm5vcm0oMTAwKSwgcm5vcm0oMTAwKSAtIDIpCikKCiMgQ29tcHV0ZSBhbmQgcGxvdCByZXN1bHRzCm1hcCgxOjYsIH4ga21lYW5zKGRmLCAzKSkgJT4lCiAgbWFwMl9kZnIoMTo2LCB+IGRmICU+JSBtdXRhdGUoCiAgICBjbHVzdGVyID0gLngkY2x1c3RlciwKICAgIG5hbWUgPSBwYXN0ZTAoIkl0ZXJhdGlvbjogIiwgLnksICI7ICBXKENrKTogIiwgcm91bmQoLngkdG90LndpdGhpbnNzLCAyKSkKICAgICkpICU+JQogIGdncGxvdChhZXMoeDEsIHgyLCBjb2xvdXIgPSBjbHVzdGVyKSkgKwogIGdlb21fcG9pbnQoc2hvdy5sZWdlbmQgPSBGQUxTRSwgc2l6ZSA9IDEpICsKICBmYWNldF93cmFwKH4gbmFtZSwgbnJvdyA9IDIpCmBgYAoKIyMgQ2x1c3RlcmluZyBkaWdpdHMKCmBgYHtyIG1uaXN0LWttZWFuc30KZmVhdHVyZXMgPC0gbW5pc3QkdHJhaW4kaW1hZ2VzCgojIFVzZSBrLW1lYW5zIG1vZGVsIHdpdGggMTAgY2VudGVycyBhbmQgMTAgcmFuZG9tIHN0YXJ0cwptbmlzdF9jbHVzdGVyaW5nIDwtIGttZWFucyhmZWF0dXJlcywgY2VudGVycyA9IDEwLCBuc3RhcnQgPSAxMCkKCiMgUHJpbnQgY29udGVudHMgb2YgdGhlIG1vZGVsIG91dHB1dApzdHIobW5pc3RfY2x1c3RlcmluZykKYGBgCgpgYGB7ciBwbG90LWttZWFucy1tbmlzdC1jZW50ZXJzLCBmaWcuaGVpZ2h0PTQsIGZpZy53aWR0aD0xMiwgZmlnLmNhcD0nQ2x1c3RlciBjZW50ZXJzIGZvciB0aGUgMTAgY2x1c3RlcnMgaWRlbnRpZmllZCBpbiB0aGUgTU5JU1QgdHJhaW5pbmcgZGF0YS4nfQojIEV4dHJhY3QgY2x1c3RlciBjZW50ZXJzCm1uaXN0X2NlbnRlcnMgPC0gbW5pc3RfY2x1c3RlcmluZyRjZW50ZXJzCgojIFBsb3QgdHlwaWNhbCBjbHVzdGVyIGRpZ2l0cwpwYXIobWZyb3cgPSBjKDIsIDUpLCBtYXIgPSBjKDAuNSwgMC41LCAwLjUsIDAuNSkpCmxheW91dChtYXRyaXgoc2VxX2xlbihucm93KG1uaXN0X2NlbnRlcnMpKSwgMiwgNSwgYnlyb3cgPSBGQUxTRSkpCmZvciAoaSBpbiBzZXFfbGVuKG5yb3cobW5pc3RfY2VudGVycykpKSB7CiAgaW1hZ2UobWF0cml4KG1uaXN0X2NlbnRlcnNbaSwgXSwgMjgsIDI4KVssIDI4OjFdLCAKICAgICAgICBjb2wgPSBncmF5LmNvbG9ycygxMiwgcmV2ID0gVFJVRSksIHhheHQgPSAibiIsIHlheHQgPSAibiIpCn0KYGBgCgpgYGB7ciBtbmlzdC1jbHVzdGVyaW5nLWNvbmZ1c2lvbi1tYXRyaXgsIGZpZy5jYXA9J0NvbmZ1c2lvbiBtYXRyaXggaWxsdXN0cmF0aW5nIGhvdyB0aGUgay1tZWFucyBhbGdvcml0aG0gY2x1c3RlcmVkIHRoZSBkaWdpdHMgKHgtYXhpcykgYW5kIHRoZSBhY3R1YWwgbGFiZWxzICh5LWF4aXMpLid9CiMgQ3JlYXRlIG1vZGUgZnVuY3Rpb24KbW9kZV9mdW4gPC0gZnVuY3Rpb24oeCl7ICAKICB3aGljaC5tYXgodGFidWxhdGUoeCkpCn0KCm1uaXN0X2NvbXBhcmlzb24gPC0gZGF0YS5mcmFtZSgKICBjbHVzdGVyID0gbW5pc3RfY2x1c3RlcmluZyRjbHVzdGVyLAogIGFjdHVhbCA9IG1uaXN0JHRyYWluJGxhYmVscwopICU+JQogIGdyb3VwX2J5KGNsdXN0ZXIpICU+JQogIG11dGF0ZShtb2RlID0gbW9kZV9mdW4oYWN0dWFsKSkgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZV9hbGwoZmFjdG9yLCBsZXZlbHMgPSAwOjkpCgojIENyZWF0ZSBjb25mdXNpb24gbWF0cml4IGFuZCBwbG90IHJlc3VsdHMKeWFyZHN0aWNrOjpjb25mX21hdCgKICBtbmlzdF9jb21wYXJpc29uLCAKICB0cnV0aCA9IGFjdHVhbCwgCiAgZXN0aW1hdGUgPSBtb2RlCikgJT4lCiAgYXV0b3Bsb3QodHlwZSA9ICdoZWF0bWFwJykKYGBgCgojIyBIb3cgbWFueSBjbHVzdGVycz8gCgpgYGB7ciBlbGJvdy1tZXRob2QsIGZpZy5jYXA9IlVzaW5nIHRoZSBlbGJvdyBtZXRob2QgdG8gaWRlbnRpZnkgdGhlIHByZWZlcnJlZCBudW1iZXIgb2YgY2x1c3RlcnMgaW4gdGhlIG15IGJhc2tldCBkYXRhIHNldC4ifQpmdml6X25iY2x1c3QoCiAgbXlfYmFza2V0LCAKICBrbWVhbnMsIAogIGsubWF4ID0gMjUsCiAgbWV0aG9kID0gIndzcyIsCiAgZGlzcyA9IGdldF9kaXN0KG15X2Jhc2tldCwgbWV0aG9kID0gInNwZWFybWFuIikKKQpgYGAKCiMjIENsdXN0ZXJpbmcgd2l0aCBtaXhlZCBkYXRhCgpgYGB7cn0KIyBGdWxsIGFtZXMgZGF0YSBzZXQgLS0+IHJlY29kZSBvcmRpbmFsIHZhcmlhYmxlcyB0byBudW1lcmljCmFtZXNfZnVsbCA8LSBBbWVzSG91c2luZzo6bWFrZV9hbWVzKCkgJT4lCiAgbXV0YXRlX2lmKHN0cl9kZXRlY3QobmFtZXMoLiksICdRdWFsfENvbmR8UUN8UXUnKSwgYXMubnVtZXJpYykKCiMgT25lLWhvdCBlbmNvZGUgLS0+IHJldGFpbiBvbmx5IHRoZSBmZWF0dXJlcyBhbmQgbm90IHNhbGUgcHJpY2UKZnVsbF9yYW5rICA8LSBjYXJldDo6ZHVtbXlWYXJzKFNhbGVfUHJpY2UgfiAuLCBkYXRhID0gYW1lc19mdWxsLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZ1bGxSYW5rID0gVFJVRSkKYW1lc18xaG90IDwtIHByZWRpY3QoZnVsbF9yYW5rLCBhbWVzX2Z1bGwpCgojIFNjYWxlIGRhdGEKYW1lc18xaG90X3NjYWxlZCA8LSBzY2FsZShhbWVzXzFob3QpCgojIE5ldyBkaW1lbnNpb25zCmRpbShhbWVzXzFob3Rfc2NhbGVkKQpgYGAKCmBgYHtyIGttZWFucy1zaWxob3VldHRlLW1peGVkLCBmaWcud2lkdGg9NywgZmlnLmhlaWdodD00LCBmaWcuY2FwPSJTdWdnZXN0ZWQgbnVtYmVyIG9mIGNsdXN0ZXJzIGZvciBvbmUtaG90IGVuY29kZWQgQW1lcyBkYXRhIHVzaW5nIGstbWVhbnMgY2x1c3RlcmluZyBhbmQgdGhlIGVsYm93IGNyaXRlcmlvbi4ifQpzZXQuc2VlZCgxMjMpCgpmdml6X25iY2x1c3QoCiAgYW1lc18xaG90X3NjYWxlZCwgCiAga21lYW5zLCAKICBtZXRob2QgPSAid3NzIiwgCiAgay5tYXggPSAyNSwgCiAgdmVyYm9zZSA9IEZBTFNFCikKYGBgCgpgYGB7cn0KIyBPcmlnaW5hbCBkYXRhIG1pbnVzIFNhbGVfUHJpY2UKYW1lc19mdWxsIDwtIEFtZXNIb3VzaW5nOjptYWtlX2FtZXMoKSAlPiUgc2VsZWN0KC1TYWxlX1ByaWNlKQoKIyBDb21wdXRlIEdvd2VyIGRpc3RhbmNlIGZvciBvcmlnaW5hbCBkYXRhCmdvd2VyX2RzdCA8LSBkYWlzeShhbWVzX2Z1bGwsIG1ldHJpYyA9ICJnb3dlciIpCmBgYAoKYGBge3IgZ293ZXItYmFzZWQtY2x1c3RlcmluZywgZXZhbD1GQUxTRX0KIyBZb3UgY2FuIHN1cHBseSB0aGUgR293ZXIgZGlzdGFuY2UgbWF0cml4IHRvIHNldmVyYWwgY2x1c3RlcmluZyBhbGdvcwpwYW1fZ293ZXIgPC0gcGFtKHggPSBnb3dlcl9kc3QsIGsgPSA4LCBkaXNzID0gVFJVRSkKZGlhbmFfZ293ZXIgPC0gZGlhbmEoeCA9IGdvd2VyX2RzdCwgZGlzcyA9IFRSVUUpCmFnbmVzX2dvd2VyIDwtIGFnbmVzKHggPSBnb3dlcl9kc3QsIGRpc3MgPSBUUlVFKQpgYGAKCiMjIEFsdGVybmF0aXZlIHBhcnRpdGlvbmluZyBtZXRob2RzCgpgYGB7ciBwYW0sIGZpZy53aWR0aD03LCBmaWcuaGVpZ2h0PTQsIGZpZy5jYXA9IlRvdGFsIHdpdGhpbiBzdW0gb2Ygc3F1YXJlcyBmb3IgMS0yNSBjbHVzdGVycyB1c2luZyBQQU0gY2x1c3RlcmluZy4ifQpmdml6X25iY2x1c3QoCiAgYW1lc18xaG90X3NjYWxlZCwgCiAgcGFtLCAKICBtZXRob2QgPSAid3NzIiwgCiAgay5tYXggPSAyNSwgCiAgdmVyYm9zZSA9IEZBTFNFCikKYGBgCgpgYGB7ciBjbGFyYX0KIyBrLW1lYW5zIGNvbXB1dGF0aW9uIHRpbWUgb24gTU5JU1QgZGF0YQpzeXN0ZW0udGltZShrbWVhbnMoZmVhdHVyZXMsIGNlbnRlcnMgPSAxMCkpCgojIENMQVJBIGNvbXB1dGF0aW9uIHRpbWUgb24gTU5JU1QgZGF0YQpzeXN0ZW0udGltZShjbGFyYShmZWF0dXJlcywgayA9IDEwKSkKYGBg