# 04 Grammar of Graphics: R code
# Jerzy Wieczorek
# 9/10/15
# 36-721 Statistical Graphics and Visualization

# Set working directory
setwd("/home/jerzy/Downloads/36-721 Dataviz F15/Lecture 4/")


#### LOAD GGPLOT2 ####

# Load ggplot2 package:
# Install it once if you haven't
## install.packages("ggplot2")
# Then you only need to load it once per R session
library(ggplot2)
# Set a black-and-white theme instead of default gray theme
# (just so the figures show up better on projector in class)
theme_set(theme_bw())


#### SIMPLE EXAMPLES FROM SLIDES ####

# Subset the diamonds dataset that comes with ggplot2
dsmall = diamonds[sample(nrow(diamonds),100),]

# Bar chart
ggplot(data = dsmall, aes(x = cut, fill = cut)) + 
  geom_bar(stat = "bin") + coord_cartesian()

# Pie chart
# (this is a bad idea in practice; just showing GoG's flexibility)
ggplot(data = dsmall, aes(x = factor(1), fill = cut)) + 
  geom_bar(stat = "bin") + coord_polar(theta = "y")

# Race track plot
# (this is a terrible idea! again, just demo'ing GoG's flexibility)
ggplot(data = dsmall, aes(x = cut, fill = cut)) + 
  geom_bar(stat = "bin") + coord_polar(theta = "y")

#### READ NHANES DATASET ####

# Read in the data
nhanes = read.csv("nhanes.csv")


#### COMPARE MANUAL LAYOUT TO ggplot2 FACETS ####

# Base R:
# Deal with finicky par(),
# subset by Gender repeatedly,
# repeatedly enter labels,
# set ylim manually to ensure matching scales...
oldPar = par(no.readonly = TRUE)
par(mfrow = c(1, 2))
with(subset(nhanes, GENDER == 'Female'),
     plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG),
          xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Female')
)
with(subset(nhanes, GENDER == 'Male'),
     plot(jitter(MONTHS), WEIGHT_KG, ylim = range(nhanes$WEIGHT_KG),
          xlab = 'Age (months)', ylab = 'Weight (kg)', main = 'Male')
)

par(oldPar)

# ggplot2:
# All in "one step," following a simple specification
ggplot(data = nhanes, aes(x = MONTHS, y = WEIGHT_KG)) +
  geom_point(position = position_jitter(width = .2)) +
  facet_grid(~ GENDER) +
  xlab("Age (months)") + ylab("Weight (kg)")

#### ggplot2: aes, geom, stat ####

# Define the dataset and the shared aes
# using the ggplot command,
# and also use same ColorBrewer palette as last time
p = ggplot(nhanes,
           aes(x = LENGTH_CM, y = WEIGHT_KG, color = GENDER)) +
  scale_color_brewer(palette = "Set1")

# Try plotting it?
## p
# No layers yet, so there's nothing to plot...

# Add a geom_point layer, making a scatterplot
p + geom_point()

# Add transparency, setting alpha-channel to 70%,
# to help with overplotting
p + geom_point(alpha = 0.5)

# Could plot a line instead
# (by default it connects the points,
#  although that is not appropriate here:
#  each point is a different person,
#  not the same one repeated over time)
p + geom_line()