# 03 Visual Perception: R code
# Jerzy Wieczorek
# 9/8/15
# 36-721 Statistical Graphics and Visualization

# Set working directory
setwd("D:\\Dropbox\\CMU\\36-721 Dataviz F'15\\Lecture 3")


#### READ AND PREP DATA ####

# Read in the data
nhanes = read.csv("nhanes.csv")

# Compute mean weights, by time and gender
MeanWeights = tapply(X = nhanes$WEIGHT_KG,
                     INDEX = list(nhanes$MONTHS, nhanes$GENDER),
                     FUN = mean)
MeanWeights = as.data.frame(MeanWeights)
# Look at these mean values
MeanWeights
##     Female     Male
## 0 4.345455 4.872727
## 1 5.170588 5.823077
## 2 6.053846 6.621429
## 3 6.310000 7.433333
## 4 7.284000 8.038462
## 5 7.750000 7.716667
## 6 7.614286 8.687500
#### CHOOSING COLOR, POINT SYMBOL, LINE TYPE ####

# Default scatterplot colored by gender:
# uses default plotting symbol (pch=1)
# and first two default colors (col=1:2)
plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG, col = nhanes$GENDER,
     xlab = 'Age (months)', ylab = 'Weight (kg)')
legend('topleft', legend = levels(nhanes$GENDER), col = 1:2, pch = 1)

# Now, let's choose our own set of colors and symbols,
# such as red cross for Female and blue triangle for Male.

# First, check the order of levels in the GENDER variable
# (it is a "factor" variable,
#  so it stores values as integers 1:2
#  and maps each integer to a label "Female" or "Male")
levels(nhanes$GENDER)
## [1] "Female" "Male"
# We see that 1 = Female, 2 = Male.

# Now set up color palette:
myColors = c("Red", "Blue")
# Next look up plot symbols on the cheat sheet:
# http://www.statmethods.net/advgraphs/parameters.html
# We want symbols 3 (cross) and 2 (triangle):
mySymbols = c(3, 2)
# Finally, plot again:
plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG,
     col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER],
     xlab = 'Age (months)', ylab = 'Weight (kg)')
legend('topleft', legend = levels(nhanes$GENDER),
       col = myColors, pch = mySymbols)

# Let's also add lines with different line types (lty)
# and just use default lty = 1:2
plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG,
     col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER],
     xlab = 'Age (months)', ylab = 'Weight (kg)')
lines(0:6, MeanWeights$Female, lty = 1, col = myColors[1])
lines(0:6, MeanWeights$Male, lty = 2, col = myColors[2])
# We can show both the line type and plot symbol in the legend
legend('topleft', legend = levels(nhanes$GENDER),
       col = myColors, pch = mySymbols, lty = 1:2)

#### text ####

# If we use colors to group,
# or our groups are well-separated spatial clusters,
# we can add direct labels instead of a legend
# by using the text() function

plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG,
     col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER],
     xlab = 'Age (months)', ylab = 'Weight (kg)')
text(x = c(4, 2), y = c(5, 9), labels = levels(nhanes$GENDER),
     col = myColors)

#### matplot ####

# If we are plotting several y-variables against the same x,
# we can use matplot to plot them all at once,
# if y variables are columns of a matrix or data frame:
matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)')

# By default it will plot points (no lines),
# with different colors and numbers for each group.

# Let's revise it to use our choices from before:
# type = 'p' for points, 'l' for lines, or 'b' for both.
# Here the col and pch map to columns, not to rows,
# so we only need to give
# mySymbols instead of mySymbols[nhanes$Gender], etc.
matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)',
        type = 'b', pch = mySymbols, col = myColors, lty = 1:2)
legend('topleft', legend = levels(nhanes$GENDER),
       col = myColors, pch = mySymbols, lty = 1:2)

#### RColorBrewer ####

# Install R package for the color palettes
# that match the Color Brewer website
# http://colorbrewer2.org/

# Install package the first time you do this:
## install.packages("RColorBrewer")

# Once it's been installed, you only need to load it
# in future R sessions
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 3.1.3
# See all the palettes available
display.brewer.all()

# Choose a qualitative color palette with blue and red:
# Request first 2 colors from the palette 'Set1'
display.brewer.pal(2, 'Set1')
## Warning in display.brewer.pal(2, "Set1"): minimal value for n is 3, displaying requested palette with 3 different levels

# Warning tells us we need to request 3+ color levels;
# Just save the first two levels: first red, then blue,
# to match the order from before
cbQualColors = brewer.pal(3, 'Set1')[1:2]
# See that they are saved as character strings of hex values
cbQualColors
## [1] "#E41A1C" "#377EB8"
# Redo the plot with these new colors
matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)',
        type = 'b', pch = mySymbols, col = cbQualColors, lty = 1:2)
legend('topleft', legend = levels(nhanes$GENDER),
       col = cbQualColors, pch = mySymbols, lty = 1:2)

# Now let's color by age, and only symbol by gender,
# so that we can plot LENGTH_CM on x-axis instead of MONTHS.
# Use a sequential color scheme for the 7 MONTHS values;
# first few seem too light, so request more colors and only use later ones
display.brewer.pal(9, 'YlGn')

cbSeqColors = brewer.pal(9, 'YlGn')[3:9]
# Plot with color mapped to MONTHS + 1,
# since MONTHS are numbers 0:6
# but we need to index the colors in cbSeqColors at 1:7
plot(nhanes$LENGTH_CM, nhanes$WEIGHT_KG,
     col = cbSeqColors[nhanes$MONTHS + 1], pch = mySymbols[nhanes$GENDER],
     xlab = 'Age (months)', ylab = 'Weight (kg)')
legend('topleft', legend = levels(nhanes$GENDER), pch = mySymbols)

#### mfrow, layout, mtext ####

# We can arrange multiple plots in the same figure
# as a grid using par(mfrow = c(A, B))
# where the plots will be arranged in A rows and B columns.

# Before modifying par, save the old version so we can reset it;
# don't bother saving read-only arguments, to avoid warnings laterl
oldPar = par(no.readonly = TRUE)

# Now let's plot Female and Male in side-by-side plots.
# Set ylim manually to ensure matching scales.
par(mfrow = c(1, 2))
with(subset(nhanes, GENDER == 'Female'),
     plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG),
     xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Female')
)
with(subset(nhanes, GENDER == 'Male'),
     plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG),
     xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Male')
)
# Restore original settings of par
par(oldPar)

# Use mtext() function to write joint titles
# for the whole set of these small-multiples plots;
# play around with side and line settings
# to position the joint title where you want it.
mtext("Weight vs Age, by Gender", cex = 2, line = 2.5)

# If you want a layout that is not a simple grid,
# use layout() function instead.
# Let's plot 2 subplots by gender on top,
# and a third joint plot (combined) on bottom.
layout(matrix(c(1, 2, 3, 3), 2, 2, byrow = TRUE))
# See the result
layout.show(3)

with(subset(nhanes, GENDER == 'Female'),
     plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG),
     col = myColors[1], pch = mySymbols[1],
     xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Female')
)
with(subset(nhanes, GENDER == 'Male'),
     plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG),
     col = myColors[2], pch = mySymbols[2],
     xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Male')
)
with(nhanes,
     plot(jitter(MONTHS), WEIGHT_KG,
     col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER],
     xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Combined')
)

# If you get stuck in a layout setting
# and want to revert to the basic one-plot layout,
# you can run dev.off() to reset the plot window and settings
## dev.off()