# 03 Visual Perception: R code # Jerzy Wieczorek # 9/8/15 # 36-721 Statistical Graphics and Visualization # Set working directory setwd("D:\\Dropbox\\CMU\\36-721 Dataviz F'15\\Lecture 3") #### READ AND PREP DATA #### # Read in the data nhanes = read.csv("nhanes.csv") # Compute mean weights, by time and gender MeanWeights = tapply(X = nhanes$WEIGHT_KG, INDEX = list(nhanes$MONTHS, nhanes$GENDER), FUN = mean) MeanWeights = as.data.frame(MeanWeights) # Look at these mean values MeanWeights #### CHOOSING COLOR, POINT SYMBOL, LINE TYPE #### # Default scatterplot colored by gender: # uses default plotting symbol (pch=1) # and first two default colors (col=1:2) plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG, col = nhanes$GENDER, xlab = 'Age (months)', ylab = 'Weight (kg)') legend('topleft', legend = levels(nhanes$GENDER), col = 1:2, pch = 1) # Now, let's choose our own set of colors and symbols, # such as red cross for Female and blue triangle for Male. # First, check the order of levels in the GENDER variable # (it is a "factor" variable, # so it stores values as integers 1:2 # and maps each integer to a label "Female" or "Male") levels(nhanes$GENDER) # We see that 1 = Female, 2 = Male. # Now set up color palette: myColors = c("Red", "Blue") # Next look up plot symbols on the cheat sheet: # http://www.statmethods.net/advgraphs/parameters.html # We want symbols 3 (cross) and 2 (triangle): mySymbols = c(3, 2) # Finally, plot again: plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG, col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER], xlab = 'Age (months)', ylab = 'Weight (kg)') legend('topleft', legend = levels(nhanes$GENDER), col = myColors, pch = mySymbols) # Let's also add lines with different line types (lty) # and just use default lty = 1:2 plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG, col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER], xlab = 'Age (months)', ylab = 'Weight (kg)') lines(0:6, MeanWeights$Female, lty = 1, col = myColors[1]) lines(0:6, MeanWeights$Male, lty = 2, col = myColors[2]) # We can show both the line type and plot symbol in the legend legend('topleft', legend = levels(nhanes$GENDER), col = myColors, pch = mySymbols, lty = 1:2) #### text #### # If we use colors to group, # or our groups are well-separated spatial clusters, # we can add direct labels instead of a legend # by using the text() function plot(jitter(nhanes$MONTHS), nhanes$WEIGHT_KG, col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER], xlab = 'Age (months)', ylab = 'Weight (kg)') text(x = c(4, 2), y = c(5, 9), labels = levels(nhanes$GENDER), col = myColors) #### matplot #### # If we are plotting several y-variables against the same x, # we can use matplot to plot them all at once, # if y variables are columns of a matrix or data frame: matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)') # By default it will plot points (no lines), # with different colors and numbers for each group. # Let's revise it to use our choices from before: # type = 'p' for points, 'l' for lines, or 'b' for both. # Here the col and pch map to columns, not to rows, # so we only need to give # mySymbols instead of mySymbols[nhanes$Gender], etc. matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)', type = 'b', pch = mySymbols, col = myColors, lty = 1:2) legend('topleft', legend = levels(nhanes$GENDER), col = myColors, pch = mySymbols, lty = 1:2) #### RColorBrewer #### # Install R package for the color palettes # that match the Color Brewer website # http://colorbrewer2.org/ # Install package the first time you do this: ## install.packages("RColorBrewer") # Once it's been installed, you only need to load it # in future R sessions library(RColorBrewer) # See all the palettes available display.brewer.all() # Choose a qualitative color palette with blue and red: # Request first 2 colors from the palette 'Set1' display.brewer.pal(2, 'Set1') # Warning tells us we need to request 3+ color levels; # Just save the first two levels: first red, then blue, # to match the order from before cbQualColors = brewer.pal(3, 'Set1')[1:2] # See that they are saved as character strings of hex values cbQualColors # Redo the plot with these new colors matplot(0:6, MeanWeights, xlab = 'Age (months)', ylab = 'Mean weight (kg)', type = 'b', pch = mySymbols, col = cbQualColors, lty = 1:2) legend('topleft', legend = levels(nhanes$GENDER), col = cbQualColors, pch = mySymbols, lty = 1:2) # Now let's color by age, and only symbol by gender, # so that we can plot LENGTH_CM on x-axis instead of MONTHS. # Use a sequential color scheme for the 7 MONTHS values; # first few seem too light, so request more colors and only use later ones display.brewer.pal(9, 'YlGn') cbSeqColors = brewer.pal(9, 'YlGn')[3:9] # Plot with color mapped to MONTHS + 1, # since MONTHS are numbers 0:6 # but we need to index the colors in cbSeqColors at 1:7 plot(nhanes$LENGTH_CM, nhanes$WEIGHT_KG, col = cbSeqColors[nhanes$MONTHS + 1], pch = mySymbols[nhanes$GENDER], xlab = 'Age (months)', ylab = 'Weight (kg)') legend('topleft', legend = levels(nhanes$GENDER), pch = mySymbols) #### mfrow, layout, mtext #### # We can arrange multiple plots in the same figure # as a grid using par(mfrow = c(A, B)) # where the plots will be arranged in A rows and B columns. # Before modifying par, save the old version so we can reset it; # don't bother saving read-only arguments, to avoid warnings laterl oldPar = par(no.readonly = TRUE) # Now let's plot Female and Male in side-by-side plots. # Set ylim manually to ensure matching scales. par(mfrow = c(1, 2)) with(subset(nhanes, GENDER == 'Female'), plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG), xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Female') ) with(subset(nhanes, GENDER == 'Male'), plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG), xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Male') ) # Restore original settings of par par(oldPar) # Use mtext() function to write joint titles # for the whole set of these small-multiples plots; # play around with side and line settings # to position the joint title where you want it. mtext("Weight vs Age, by Gender", cex = 2, line = 2.5) # If you want a layout that is not a simple grid, # use layout() function instead. # Let's plot 2 subplots by gender on top, # and a third joint plot (combined) on bottom. layout(matrix(c(1, 2, 3, 3), 2, 2, byrow = TRUE)) # See the result layout.show(3) with(subset(nhanes, GENDER == 'Female'), plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG), col = myColors[1], pch = mySymbols[1], xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Female') ) with(subset(nhanes, GENDER == 'Male'), plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG), col = myColors[2], pch = mySymbols[2], xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Male') ) with(nhanes, plot(jitter(MONTHS), WEIGHT_KG, col = myColors[nhanes$GENDER], pch = mySymbols[nhanes$GENDER], xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Combined') ) # If you get stuck in a layout setting # and want to revert to the basic one-plot layout, # you can run dev.off() to reset the plot window and settings ## dev.off()