# 03 Visual Perception: R code
# Jerzy Wieczorek
# 9/8/15
# 36-721 Statistical Graphics and Visualization

# Set working directory
setwd("D:\\Dropbox\\CMU\\36-721 Dataviz F'15\\Lecture 3")

#### READ AND PREP DATA ####

# Compute mean weights, by time and gender
MeanWeights = tapply(X = nhanes$WEIGHT_KG, INDEX = list(nhanes$MONTHS, nhanes$GENDER), FUN = mean) MeanWeights = as.data.frame(MeanWeights) # Look at these mean values MeanWeights ## Female Male ## 0 4.345455 4.872727 ## 1 5.170588 5.823077 ## 2 6.053846 6.621429 ## 3 6.310000 7.433333 ## 4 7.284000 8.038462 ## 5 7.750000 7.716667 ## 6 7.614286 8.687500 #### CHOOSING COLOR, POINT SYMBOL, LINE TYPE #### # Default scatterplot colored by gender: # uses default plotting symbol (pch=1) # and first two default colors (col=1:2) plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG, col = nhanes$GENDER,
xlab = 'Age (months)', ylab = 'Weight (kg)')
legend('topleft', legend = levels(nhanes$GENDER), col = 1:2, pch = 1) # Now, let's choose our own set of colors and symbols, # such as red cross for Female and blue triangle for Male. # First, check the order of levels in the GENDER variable # (it is a "factor" variable, # so it stores values as integers 1:2 # and maps each integer to a label "Female" or "Male") levels(nhanes$GENDER)
## [1] "Female" "Male"
# We see that 1 = Female, 2 = Male.

# Now set up color palette:
myColors = c("Red", "Blue")
# Next look up plot symbols on the cheat sheet:
# We want symbols 3 (cross) and 2 (triangle):
mySymbols = c(3, 2)
# Finally, plot again:
plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG,
col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER],
xlab = 'Age (months)', ylab = 'Weight (kg)')
legend('topleft', legend = levels(nhanes$GENDER), col = myColors, pch = mySymbols) # Let's also add lines with different line types (lty) # and just use default lty = 1:2 plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG, col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER], xlab = 'Age (months)', ylab = 'Weight (kg)') lines(0:6, MeanWeights$Female, lty = 1, col = myColors[1])
lines(0:6, MeanWeights$Male, lty = 2, col = myColors[2]) # We can show both the line type and plot symbol in the legend legend('topleft', legend = levels(nhanes$GENDER),
col = myColors, pch = mySymbols, lty = 1:2)

#### text ####

# If we use colors to group,
# or our groups are well-separated spatial clusters,
# by using the text() function

plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG,
col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER],
xlab = 'Age (months)', ylab = 'Weight (kg)')
text(x = c(4, 2), y = c(5, 9), labels = levels(nhanes$GENDER), col = myColors) #### matplot #### # If we are plotting several y-variables against the same x, # we can use matplot to plot them all at once, # if y variables are columns of a matrix or data frame: matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)') # By default it will plot points (no lines), # with different colors and numbers for each group. # Let's revise it to use our choices from before: # type = 'p' for points, 'l' for lines, or 'b' for both. # Here the col and pch map to columns, not to rows, # so we only need to give # mySymbols instead of mySymbols[nhanes$Gender], etc.
matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)',
type = 'b', pch = mySymbols, col = myColors, lty = 1:2)
legend('topleft', legend = levels(nhanes$GENDER), col = myColors, pch = mySymbols, lty = 1:2) #### RColorBrewer #### # Install R package for the color palettes # that match the Color Brewer website # http://colorbrewer2.org/ # Install package the first time you do this: ## install.packages("RColorBrewer") # Once it's been installed, you only need to load it # in future R sessions library(RColorBrewer) ## Warning: package 'RColorBrewer' was built under R version 3.1.3 # See all the palettes available display.brewer.all() # Choose a qualitative color palette with blue and red: # Request first 2 colors from the palette 'Set1' display.brewer.pal(2, 'Set1') ## Warning in display.brewer.pal(2, "Set1"): minimal value for n is 3, displaying requested palette with 3 different levels # Warning tells us we need to request 3+ color levels; # Just save the first two levels: first red, then blue, # to match the order from before cbQualColors = brewer.pal(3, 'Set1')[1:2] # See that they are saved as character strings of hex values cbQualColors ## [1] "#E41A1C" "#377EB8" # Redo the plot with these new colors matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)', type = 'b', pch = mySymbols, col = cbQualColors, lty = 1:2) legend('topleft', legend = levels(nhanes$GENDER),
col = cbQualColors, pch = mySymbols, lty = 1:2)

# Now let's color by age, and only symbol by gender,
# so that we can plot LENGTH_CM on x-axis instead of MONTHS.
# Use a sequential color scheme for the 7 MONTHS values;
# first few seem too light, so request more colors and only use later ones
display.brewer.pal(9, 'YlGn')

cbSeqColors = brewer.pal(9, 'YlGn')[3:9]
# Plot with color mapped to MONTHS + 1,
# since MONTHS are numbers 0:6
# but we need to index the colors in cbSeqColors at 1:7
plot(nhanes$LENGTH_CM, nhanes$WEIGHT_KG,
col = cbSeqColors[nhanes$MONTHS + 1], pch = mySymbols[nhanes$GENDER],
xlab = 'Age (months)', ylab = 'Weight (kg)')
legend('topleft', legend = levels(nhanes$GENDER), pch = mySymbols) #### mfrow, layout, mtext #### # We can arrange multiple plots in the same figure # as a grid using par(mfrow = c(A, B)) # where the plots will be arranged in A rows and B columns. # Before modifying par, save the old version so we can reset it; # don't bother saving read-only arguments, to avoid warnings laterl oldPar = par(no.readonly = TRUE) # Now let's plot Female and Male in side-by-side plots. # Set ylim manually to ensure matching scales. par(mfrow = c(1, 2)) with(subset(nhanes, GENDER == 'Female'), plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG),
xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Female')
)
with(subset(nhanes, GENDER == 'Male'),
plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG), xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Male') ) # Restore original settings of par par(oldPar) # Use mtext() function to write joint titles # for the whole set of these small-multiples plots; # play around with side and line settings # to position the joint title where you want it. mtext("Weight vs Age, by Gender", cex = 2, line = 2.5) # If you want a layout that is not a simple grid, # use layout() function instead. # Let's plot 2 subplots by gender on top, # and a third joint plot (combined) on bottom. layout(matrix(c(1, 2, 3, 3), 2, 2, byrow = TRUE)) # See the result layout.show(3) with(subset(nhanes, GENDER == 'Female'), plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG),
col = myColors[1], pch = mySymbols[1],
xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Female')
)
with(subset(nhanes, GENDER == 'Male'),
plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG), col = myColors[2], pch = mySymbols[2], xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Male') ) with(nhanes, plot(jitter(MONTHS), WEIGHT_KG, col = myColors[nhanes$GENDER], pch = mySymbols[nhanes\$GENDER],
xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Combined')
)

# If you get stuck in a layout setting
# and want to revert to the basic one-plot layout,
# you can run dev.off() to reset the plot window and settings
## dev.off()