# 02 Legible Graphics: R code
# Jerzy Wieczorek
# 9/3/15
# 36-721 Statistical Graphics and Visualization

# Set working directory
setwd("/home/jerzy/Downloads/36-721 Dataviz F15/Lecture 2/")


#### READ DATA, MAKE A PLOT, GET HELP ####

# Read in the data
nhanes = read.csv("nhanes.csv")
# See a quick overview of the dataset:
# all columns and their content
summary(nhanes)
##        ID           GENDER        MONTHS                     RACETH  
##  Min.   :62207   Female:101   Min.   :0.000   Hispanic          :83  
##  1st Qu.:64577   Male  :108   1st Qu.:1.000   Non-Hispanic Black:53  
##  Median :66783                Median :3.000   Non-Hispanic White:73  
##  Mean   :67057                Mean   :2.909                          
##  3rd Qu.:69557                3rd Qu.:5.000                          
##  Max.   :71910                Max.   :6.000                          
##    WEIGHT_KG        LENGTH_CM       HEAD_CM     
##  Min.   : 3.600   Min.   :48.3   Min.   :34.60  
##  1st Qu.: 5.400   1st Qu.:58.5   1st Qu.:39.40  
##  Median : 6.800   Median :62.3   Median :41.50  
##  Mean   : 6.689   Mean   :62.3   Mean   :41.27  
##  3rd Qu.: 7.900   3rd Qu.:66.6   3rd Qu.:43.10  
##  Max.   :10.800   Max.   :86.6   Max.   :48.40
# Access a single column and take its mean
mean(nhanes$LENGTH_CM)
## [1] 62.29522
# Default scatterplot
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM)

# Read documentation for plot:
?plot

# See some usage examples:
example(plot)

See also the Quick-R website’s sections on Basic Graphs and Advanced Graphs.

#### SAVE PLOTS ####

# Save a bitmap png
png("SimplePNG.png")
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM)
dev.off()

# Adjust png image size, font size, and resolution
png("NicerPNG.png", width = 500, height = 500, pointsize = 16)
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM)
dev.off()

# Adjust png resolution and units too
png("NicestPNG.png", width = 5, height = 5, units = "in",
    pointsize = 16, res = 300)
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM)
dev.off()

# Save a vector pdf
pdf("SimplePDF.pdf")
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM)
dev.off()

# Adjust pdf image size and font size
pdf("NicerPDF.pdf", width = 5.4, height = 5.4, pointsize = 12)
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM)
dev.off()

# No need to adjust resolution for vector images
#### LABEL AND ANNOTATE PLOTS ####

# Edit the x-axis and y-axis labels
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM,
     xlab = "Weight (kg)", ylab = "Length (cm)")

# Add a main title
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM,
     xlab = "Weight (kg)", ylab = "Length (cm)",
     main = "Length vs weight for babies 0-6 months old")

# Orient tick labels horizontally
# (las = LAbel Style)
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM,
     xlab = "Weight (kg)", ylab = "Length (cm)",
     main = "Length vs weight for babies 0-6 months old",
     las = 1)

# Color points by GENDER
# (col = COLor)
# and break main title into two lines
# (\n = newline)
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM,
     xlab = "Weight (kg)", ylab = "Length (cm)",
     main = "Length vs weight for babies 0-6 months old\ncolored by gender",
     las = 1, col = nhanes$GENDER)

# Add a legend to existing plot:
# must specify plotting symbol to show points,
# or line type / width to show lines
# (pch = Plotting CHaracter,
#  lty = Line TYpe, lwd = Line WiDth)
legend('topleft', legend = levels(nhanes$GENDER), col = 1:2, pch = 1)

# Manual control over tick locations and labels:
# Plot without axes
# (xaxt = X-AXis Type, yaxt = Y-AXis Type)
plot(nhanes$WEIGHT_KG, nhanes$LENGTH_CM,
     xlab = "Weight (kg)", ylab = "Length (cm)",
     main = "Length vs weight for babies 0-6 months old",
     las = 1, xaxt = 'n', yaxt = 'n')
# Add axis ticks to existing graph, at certain places
# (side = 1 for x-axis)
axis(side = 1, at = c(4, 6, 8, 10))
# Add axis ticks, with certain labels
# (side = 2 for y-axis)
axis(side = 2, at = c(50, 80), labels = c("Short", "Tall"))