library(tidyverse)
library(vegan)
library(cluster)
library(factoextra)
library(fpc)Unlike hierarchical, partitioning does not require dissimilarity matrices
We are going to use non-ecological data in this excersize to illustrate the different types of data that can be incorporated into this type of analysis
data("USArrests")
glimpse(USArrests)Observations: 50
Variables: 4
$ Murder   <dbl> 13.2, 10.0, 8.1, 8.8, 9.0, 7.9, 3.3, 5.9, 15.4, 17.4, 5.3, 2.6, 10...
$ Assault  <int> 236, 263, 294, 190, 276, 204, 110, 238, 335, 211, 46, 120, 249, 11...
$ UrbanPop <int> 58, 48, 80, 50, 91, 78, 77, 72, 80, 60, 83, 54, 83, 65, 57, 66, 52...
$ Rape     <dbl> 21.2, 44.5, 31.0, 19.5, 40.6, 38.7, 11.1, 15.8, 31.9, 25.8, 20.2, ...Lets scale the data
USArrests %>% 
  scale() -> arrest.scale
head(arrest.scale)               Murder   Assault   UrbanPop         Rape
Alabama    1.24256408 0.7828393 -0.5209066 -0.003416473
Alaska     0.50786248 1.1068225 -1.2117642  2.484202941
Arizona    0.07163341 1.4788032  0.9989801  1.042878388
Arkansas   0.23234938 0.2308680 -1.0735927 -0.184916602
California 0.27826823 1.2628144  1.7589234  2.067820292
Colorado   0.02571456 0.3988593  0.8608085  1.864967207lets convert this to a distance matrix using the factoextra::get_dist() function.
arrest.scale %>% 
  get_dist(upper = TRUE, diag = TRUE) -> arrest.distVisualizing the distance matrix
arrest.dist.df <- as.data.frame(as.matrix(arrest.dist))
arrest.dist.df$row <- rownames(arrest.dist.df)
arrest.dist.df %>% 
  gather(col, value, -row) -> arrest_longggplot(data = arrest_long) +
  geom_raster(aes(x = col,y=row, fill = value)) +
  coord_equal(expand = F) +
  scale_fill_gradient2( low = "red", mid = "white", high = "blue", midpoint = 3) +
theme_classic() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        axis.title.x = element_blank(),
        axis.title.y = element_blank())