Difference between Transformations and Standardizations

Create some data

rawdata <- matrix(c(1,1,1,3,3,1,
                    2,2,4,6,6,0,
                    10,10,20,30,30,0,
                    3,3,2,1,1,0,
                    0,0,0,20,0,0), ncol = 6, byrow = TRUE)
colnames(rawdata) <- paste("species",toupper(letters[1:6]), sep = "_")
rawdata
     species_A species_B species_C species_D species_E species_F
[1,]         1         1         1         3         3         1
[2,]         2         2         4         6         6         0
[3,]        10        10        20        30        30         0
[4,]         3         3         2         1         1         0
[5,]         0         0         0        20         0         0

Calculating row and column statistics

Rows

# Row sums
rowSums(rawdata)
[1]  10  20 100  10  20
apply(rawdata, 1, sum)
[1]  10  20 100  10  20
# Max values
apply(rawdata, 1, max)
[1]  3  6 30  3 20

Columns

# Sums
apply(rawdata, 2, sum)
species_A species_B species_C species_D species_E species_F 
       16        16        27        60        40         1 
colSums(rawdata)
species_A species_B species_C species_D species_E species_F 
       16        16        27        60        40         1 
# Max
apply(rawdata, 2, max)
species_A species_B species_C species_D species_E species_F 
       10        10        20        30        30         1 

Monotonic transformations

Log transformations

  • Useful for when you have a wide spread in data values

  • Ir is important that you add 1 to values to account for zeros log10(x+1)

logdata <- apply(rawdata , c(1,2), function(x) log10(x + 1))
library(tidyverse)
hemlock <- read_csv("https://raw.githubusercontent.com/chrischizinski/SNR_R_Group/master/data/hemlock_cover.csv")
hemlock$logTsuga<- log10(hemlock$Tsuga.canadensis +1) # log transform 
glimpse(hemlock)
Observations: 98
Variables: 3
$ Site             <int> 1001, 1002, 1003, 1004, 1005, 1006, 1011, 1102, 1108, 1110, 1112, 1201, 1203, 1206, 1305, 1306, 1307, 1...
$ Tsuga.canadensis <dbl> 0.1, 34.0, 42.0, 21.0, 37.0, 18.0, 9.0, 0.0, 0.0, 0.0, 43.0, 8.0, 0.0, 4.0, 45.0, 25.0, 0.0, 0.0, 0.0, ...
$ logTsuga         <dbl> 0.04139269, 1.54406804, 1.63346846, 1.34242268, 1.57978360, 1.27875360, 1.00000000, 0.00000000, 0.00000...
ggplot(data = hemlock) + 
  geom_histogram(aes(Tsuga.canadensis), binwidth = 5, colour = "black", fill = "dodgerblue") + 
  coord_cartesian(ylim = c(0, 30), expand = FALSE) +
  theme_bw()

ggplot(data = hemlock) + 
  geom_histogram(aes(logTsuga), bins = 30, colour = "black", fill = "red") + 
  coord_cartesian(ylim = c(0, 30), expand = FALSE) +
  theme_bw()

Power tranformations

  • Square root transformation is most often used for Poisson type date (count data)
  • Greater the power, the greater the compression of the data
  • Flexible for a wide range of data
  • Applied when the data is > 0
Write power function
pwr_trans <- function(x, trans){
  x <- ifelse(x>0,x^(1/trans),0)
  return(x)
}
pwr_trans(25,2)
[1] 5
pwr_trans(0,2)
[1] 0

Display the effect of the power function

newdata <- data.frame(x = 0:100, 
                      cubic = pwr_trans(x=0:100, trans = 3),
                      power10 = pwr_trans(x=0:100, trans = 10))
head(newdata)
ggplot(data = newdata) +
  geom_line(aes(x = x, y = cubic), size = 1, colour = "blue") +
  geom_line(aes(x = x, y = power10), size = 1, colour = "red") +
  labs(y = "Value") +
  coord_cartesian(xlim = c(0,100.5), ylim = c(0,5), expand = F)+
  theme_classic()

NA

Presence absence transformation

  • Transforms quantitative data to non-quanitative (binary)
  • Applicable to species data
  • Most useful when there is not a lot of quantitative data available (i.e., LOTS of zeros)
  • Severe transformation (i.e., loose lots of information)
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.4-3
decostand(rawdata, method = "pa")
     species_A species_B species_C species_D species_E species_F
[1,]         1         1         1         1         1         1
[2,]         1         1         1         1         1         0
[3,]         1         1         1         1         1         0
[4,]         1         1         1         1         1         0
[5,]         0         0         0         1         0         0
attr(,"decostand")
[1] "pa"

Arcsine transformation

Please NOTE: The arcsine is asinine: the analysis of proportions in ecology

  • Transformations on proportion data (0-1)
  • Useful when you have a positive skew in data
    • Spreads the end of the scale while compressing the middle

Standardizations

Sums

  • Can be applied to any range of x
  • Output will range 0 - 1
  • Converts values to a relative value (equalizes the area under the curve)
  • Useful when you have large difference in total abundance

Rows

ttl_species <- apply(rawdata, 1, sum)
rowprop_data <- rawdata / ttl_species
  
rowprop_data
     species_A species_B species_C species_D species_E species_F
[1,]       0.1       0.1       0.1       0.3       0.3       0.1
[2,]       0.1       0.1       0.2       0.3       0.3       0.0
[3,]       0.1       0.1       0.2       0.3       0.3       0.0
[4,]       0.3       0.3       0.2       0.1       0.1       0.0
[5,]       0.0       0.0       0.0       1.0       0.0       0.0
decostand(rawdata, margin = 1, method = "total")
     species_A species_B species_C species_D species_E species_F
[1,]       0.1       0.1       0.1       0.3       0.3       0.1
[2,]       0.1       0.1       0.2       0.3       0.3       0.0
[3,]       0.1       0.1       0.2       0.3       0.3       0.0
[4,]       0.3       0.3       0.2       0.1       0.1       0.0
[5,]       0.0       0.0       0.0       1.0       0.0       0.0
attr(,"decostand")
[1] "total"

Columns

colprop_data <- rawdata %*% diag(1/apply(rawdata,2,sum))
LS0tCnRpdGxlOiAiSW50cm8gdG8gTXVsdGl2YXJpYXRlIFN0YXRzIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUpCmBgYAoKIyMgRGlmZmVyZW5jZSBiZXR3ZWVuIFRyYW5zZm9ybWF0aW9ucyBhbmQgU3RhbmRhcmRpemF0aW9ucwoKLSBUcmFuc2Zvcm1hdGlvbnMgYXJlIGFwcGxpZWQgdG8gZWFjaCBlbGVtZW50IGluIGEgbWF0cml4Ci0gU3RhbmRhcmRpemF0aW9uIGFkanVzdCBlbGVtZW50cyBpbiBhIG1hdHJpeCBieSBhIHJvdyBvciBjb2x1bW4gc3RhdGlzdGljIAoKIyMjIENyZWF0ZSBzb21lIGRhdGEKCmBgYHtyfQoKcmF3ZGF0YSA8LSBtYXRyaXgoYygxLDEsMSwzLDMsMSwKICAgICAgICAgICAgICAgICAgICAyLDIsNCw2LDYsMCwKICAgICAgICAgICAgICAgICAgICAxMCwxMCwyMCwzMCwzMCwwLAogICAgICAgICAgICAgICAgICAgIDMsMywyLDEsMSwwLAogICAgICAgICAgICAgICAgICAgIDAsMCwwLDIwLDAsMCksIG5jb2wgPSA2LCBieXJvdyA9IFRSVUUpCmNvbG5hbWVzKHJhd2RhdGEpIDwtIHBhc3RlKCJzcGVjaWVzIix0b3VwcGVyKGxldHRlcnNbMTo2XSksIHNlcCA9ICJfIikKCnJhd2RhdGEKYGBgCgojIyMgQ2FsY3VsYXRpbmcgcm93IGFuZCBjb2x1bW4gc3RhdGlzdGljcwoKIyMjIyBSb3dzCgpgYGB7cn0KIyBSb3cgc3Vtcwpyb3dTdW1zKHJhd2RhdGEpCgphcHBseShyYXdkYXRhLCAxLCBzdW0pCgojIE1heCB2YWx1ZXMKYXBwbHkocmF3ZGF0YSwgMSwgbWF4KQpgYGAKCiMjIyMgQ29sdW1ucyAKCmBgYHtyfQojIFN1bXMKYXBwbHkocmF3ZGF0YSwgMiwgc3VtKQpjb2xTdW1zKHJhd2RhdGEpCgojIE1heAphcHBseShyYXdkYXRhLCAyLCBtYXgpCmBgYAoKIyMjIE1vbm90b25pYyB0cmFuc2Zvcm1hdGlvbnMgCgojIyMjIExvZyB0cmFuc2Zvcm1hdGlvbnMgCgotIFVzZWZ1bCBmb3Igd2hlbiB5b3UgaGF2ZSBhIHdpZGUgc3ByZWFkIGluIGRhdGEgdmFsdWVzCgotIElyIGlzIGltcG9ydGFudCB0aGF0IHlvdSBhZGQgMSB0byB2YWx1ZXMgdG8gYWNjb3VudCBmb3IgemVyb3MgYGxvZzEwKHgrMSlgCgpgYGB7cn0KbG9nZGF0YSA8LSBhcHBseShyYXdkYXRhICwgYygxLDIpLCBmdW5jdGlvbih4KSBsb2cxMCh4ICsgMSkpCmBgYAoKYGBge3IgbWVzc2FnZSA9IEZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKCmhlbWxvY2sgPC0gcmVhZF9jc3YoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9jaHJpc2NoaXppbnNraS9TTlJfUl9Hcm91cC9tYXN0ZXIvZGF0YS9oZW1sb2NrX2NvdmVyLmNzdiIpCgoKCmhlbWxvY2skbG9nVHN1Z2E8LSBsb2cxMChoZW1sb2NrJFRzdWdhLmNhbmFkZW5zaXMgKzEpICMgbG9nIHRyYW5zZm9ybSAKCmdsaW1wc2UoaGVtbG9jaykKCmBgYAoKYGBge3J9CmdncGxvdChkYXRhID0gaGVtbG9jaykgKyAKICBnZW9tX2hpc3RvZ3JhbShhZXMoVHN1Z2EuY2FuYWRlbnNpcyksIGJpbndpZHRoID0gNSwgY29sb3VyID0gImJsYWNrIiwgZmlsbCA9ICJkb2RnZXJibHVlIikgKyAKICBjb29yZF9jYXJ0ZXNpYW4oeWxpbSA9IGMoMCwgMzApLCBleHBhbmQgPSBGQUxTRSkgKwogIHRoZW1lX2J3KCkKYGBgCgpgYGB7cn0KZ2dwbG90KGRhdGEgPSBoZW1sb2NrKSArIAogIGdlb21faGlzdG9ncmFtKGFlcyhsb2dUc3VnYSksIGJpbnMgPSAzMCwgY29sb3VyID0gImJsYWNrIiwgZmlsbCA9ICJyZWQiKSArIAogIGNvb3JkX2NhcnRlc2lhbih5bGltID0gYygwLCAzMCksIGV4cGFuZCA9IEZBTFNFKSArCiAgdGhlbWVfYncoKQpgYGAKCiMjIyMgUG93ZXIgdHJhbmZvcm1hdGlvbnMKCi0gU3F1YXJlIHJvb3QgdHJhbnNmb3JtYXRpb24gaXMgbW9zdCBvZnRlbiB1c2VkIGZvciBQb2lzc29uIHR5cGUgZGF0ZSAoY291bnQgZGF0YSkKLSBHcmVhdGVyIHRoZSBwb3dlciwgdGhlIGdyZWF0ZXIgdGhlIGNvbXByZXNzaW9uIG9mIHRoZSBkYXRhCi0gRmxleGlibGUgZm9yIGEgd2lkZSByYW5nZSBvZiBkYXRhCi0gQXBwbGllZCB3aGVuIHRoZSBkYXRhIGlzID4gMAoKIyMjIyMgV3JpdGUgcG93ZXIgZnVuY3Rpb24KCmBgYHtyfQpwd3JfdHJhbnMgPC0gZnVuY3Rpb24oeCwgdHJhbnMpewogIHggPC0gaWZlbHNlKHg+MCx4XigxL3RyYW5zKSwwKQogIHJldHVybih4KQp9Cgpwd3JfdHJhbnMoMjUsMikKCnB3cl90cmFucygwLDIpCgpgYGAKCiMjIyMgRGlzcGxheSB0aGUgZWZmZWN0IG9mIHRoZSBwb3dlciBmdW5jdGlvbgoKYGBge3J9Cm5ld2RhdGEgPC0gZGF0YS5mcmFtZSh4ID0gMDoxMDAsIAogICAgICAgICAgICAgICAgICAgICAgY3ViaWMgPSBwd3JfdHJhbnMoeD0wOjEwMCwgdHJhbnMgPSAzKSwKICAgICAgICAgICAgICAgICAgICAgIHBvd2VyMTAgPSBwd3JfdHJhbnMoeD0wOjEwMCwgdHJhbnMgPSAxMCkpCgpoZWFkKG5ld2RhdGEpCmBgYAoKYGBge3J9CmdncGxvdChkYXRhID0gbmV3ZGF0YSkgKwogIGdlb21fbGluZShhZXMoeCA9IHgsIHkgPSBjdWJpYyksIHNpemUgPSAxLCBjb2xvdXIgPSAiYmx1ZSIpICsKICBnZW9tX2xpbmUoYWVzKHggPSB4LCB5ID0gcG93ZXIxMCksIHNpemUgPSAxLCBjb2xvdXIgPSAicmVkIikgKwogIGxhYnMoeSA9ICJWYWx1ZSIpICsKICBjb29yZF9jYXJ0ZXNpYW4oeGxpbSA9IGMoMCwxMDAuNSksIHlsaW0gPSBjKDAsNSksIGV4cGFuZCA9IEYpKwogIHRoZW1lX2NsYXNzaWMoKQogIApgYGAKCgojIyMjIFByZXNlbmNlIGFic2VuY2UgdHJhbnNmb3JtYXRpb24KCi0gVHJhbnNmb3JtcyBxdWFudGl0YXRpdmUgZGF0YSB0byBub24tcXVhbml0YXRpdmUgKGJpbmFyeSkKLSBBcHBsaWNhYmxlIHRvIHNwZWNpZXMgZGF0YQotIE1vc3QgdXNlZnVsIHdoZW4gdGhlcmUgaXMgbm90IGEgbG90IG9mIHF1YW50aXRhdGl2ZSBkYXRhIGF2YWlsYWJsZSAoaS5lLiwgTE9UUyBvZiB6ZXJvcykKLSBTZXZlcmUgdHJhbnNmb3JtYXRpb24gKGkuZS4sIGxvb3NlIGxvdHMgb2YgaW5mb3JtYXRpb24pIAoKYGBge3J9CmxpYnJhcnkodmVnYW4pCmRlY29zdGFuZChyYXdkYXRhLCBtZXRob2QgPSAicGEiKQpgYGAKCiMjIyMgQXJjc2luZSB0cmFuc2Zvcm1hdGlvbgoKUGxlYXNlIE5PVEU6IFtUaGUgYXJjc2luZSBpcyBhc2luaW5lOiB0aGUgYW5hbHlzaXMgb2YgcHJvcG9ydGlvbnMgaW4gZWNvbG9neV0oaHR0cDovL29ubGluZWxpYnJhcnkud2lsZXkuY29tL2RvaS8xMC4xODkwLzEwLTAzNDAuMS9hYnN0cmFjdCkKCi0gVHJhbnNmb3JtYXRpb25zIG9uIHByb3BvcnRpb24gZGF0YSAoMC0xKQotIFVzZWZ1bCB3aGVuIHlvdSBoYXZlIGEgcG9zaXRpdmUgc2tldyBpbiBkYXRhCiAgICAtIFNwcmVhZHMgdGhlIGVuZCBvZiB0aGUgc2NhbGUgd2hpbGUgY29tcHJlc3NpbmcgdGhlIG1pZGRsZSAKCiMjIyBTdGFuZGFyZGl6YXRpb25zCgojIyMgU3VtcyAKLSBDYW4gYmUgYXBwbGllZCB0byBhbnkgcmFuZ2Ugb2YgeAotIE91dHB1dCB3aWxsIHJhbmdlIDAgLSAxCi0gQ29udmVydHMgdmFsdWVzIHRvIGEgcmVsYXRpdmUgdmFsdWUgKGVxdWFsaXplcyB0aGUgYXJlYSB1bmRlciB0aGUgY3VydmUpIAotIFVzZWZ1bCB3aGVuIHlvdSBoYXZlIGxhcmdlIGRpZmZlcmVuY2UgaW4gdG90YWwgYWJ1bmRhbmNlCgojIyMjIFJvd3MKCmBgYHtyfQp0dGxfc3BlY2llcyA8LSBhcHBseShyYXdkYXRhLCAxLCBzdW0pCnJvd3Byb3BfZGF0YSA8LSByYXdkYXRhIC8gdHRsX3NwZWNpZXMKICAKcm93cHJvcF9kYXRhCgpkZWNvc3RhbmQocmF3ZGF0YSwgbWFyZ2luID0gMSwgbWV0aG9kID0gInRvdGFsIikKYGBgCgojIyMjIENvbHVtbnMKCmBgYHtyfQpjb2xwcm9wX2RhdGEgPC0gcmF3ZGF0YSAlKiUgZGlhZygxL2FwcGx5KHJhd2RhdGEsMixzdW0pKQpgYGAKCg==