1 Scatterplots: Intro
Basics, customizing axes, and manual scales
Postdoctoral researcher
arzbecker.1 (at) osu (dot) edu | lianarzb (at) buffalo (dot) edu
Communicative Disorders and Sciences, University at Buffalo
Basics, customizing axes, and manual scales
# Set working directory
setwd("C:/Users/lianjarzbecker/Downloads")
# List of packages to install and load
packages <- c("expss", "ggplot2","ggalt")
# Function to install and load packages if not already installed
install_and_load <- function(package_name) {
if (!package_name %in% installed.packages()[, "Package"]) {
message(paste("Installing package", package_name))
suppressMessages(suppressWarnings(install.packages(package_name)))
} else {
message(paste("Package", package_name, "is already installed"))
}
suppressMessages(suppressWarnings(library(package_name, character.only = T)))
}
invisible(lapply(packages, install_and_load))
# Read data from CSV file and label variables
dogs <- read.csv("dogs.csv", na.strings = c("","NA"))
dogs2 <- na.omit(dogs)
dogs2 = apply_labels(dogs2,
breed = "Breed",
group = "Group",
height = "Height (in)",
weight = "Weight (lb)",
life_expect = "Life span (yr)",
affection = "Affectionate",
kids = "Good with kids",
dogs = "Good with other dogs",
shedding = "Shedding level",
grooming = "Coat grooming frequency",
drooling = "Drooling level",
coatT = "Coat type",
coatL = "Coat length",
strangers = "Stranger openness",
playful = "Playfulness",
protec = "Protectiveness",
adapt = "Adaptability",
train = "Trainability",
energy = "Energy level",
bark = "Barking level",
stim = "Mental stimulation needs")
# Factorize variables
dogs2$groupF <- as.factor(dogs2$group)
dogs2$barkF <- factor(dogs2$bark,
levels = c(1, 2, 3, 4, 5),
labels = c("Only to alert", "Rarely",
"Sometimes", "Often", "Very vocal"))
# Create plot0
library(ggplot2)
plot0 <- ggplot(dogs2, aes(x = weight, y = height)) +
ggtitle("plot0") + # Add title
geom_point() # Add points
# Print plot0
print(plot0)
# Create plot1
plot1 <- ggplot(dogs2, aes(x = weight, y = height)) +
ggtitle("plot1") + geom_point() +
scale_x_continuous(name = "Weight (lb)", limits = c(0, 180),
breaks = seq(0, 175, by = 25)) +
scale_y_continuous(name = "Height (in)", limits = c(0, 36),
breaks = seq(0, 35, by = 5))
print(plot1)
# Create plot2
plot2 <- ggplot(dogs2, aes(x = weight, y = height)) +
ggtitle("plot2") + geom_point() +
scale_x_continuous(name = "Weight in pounds", limits = c(0, 200),
breaks = seq(0, 175, by = 50)) +
scale_y_continuous(name = "Height in inches", limits = c(0, 50),
breaks = seq(0, 40, by = 10))
print(plot2)
# Create plot3 (plotting discrete variables with scatterplots is oft ill-advised)
plot3 <- ggplot(dogs2, aes(x = weight, y = groupF)) +
ggtitle("plot3") + geom_point() +
scale_x_continuous(name = "Weight (lb)", limits = c(0, 175),
breaks = seq(0, 175, by = 25)) +
scale_y_discrete(name = "Group",
limits = c("Working", "Sporting", "Hound",
"Herding", "FSS", "Miscellaneous",
"Non-sporting", "Terrier", "Toy"),
labels = c("Toy", "Terrier" = "Ter",
"Non-sporting" = "N-S",
"Miscellaneous" = "Mis", "Hound" = "Hou",
"Herding" = "Her", "FSS",
"Sporting" = "Spo", "Working" = "Wor"))
print(plot3)
# Create plot4 (exact same as plot3, just adding jitter)
# Jitter adds "noise" to the point plotting but
# still not ideal for these variables...a boxplot would be more suited
plot4 <- ggplot(dogs2, aes(x = weight, y = groupF)) +
ggtitle("plot4") +
geom_point(position = position_jitter(width = .25, height = .25)) +
scale_x_continuous(name = "Weight (lb)", limits = c(0, 175),
breaks = seq(0, 175, by = 25)) +
scale_y_discrete(name = "Group",
limits = c("Working", "Sporting", "Hound",
"Herding", "FSS", "Miscellaneous",
"Non-sporting", "Terrier", "Toy"),
labels = c("Toy", "Terrier" = "Ter",
"Non-sporting" = "N-S",
"Miscellaneous" = "Mis", "Hound" = "Hou",
"Herding" = "Her", "FSS",
"Sporting" = "Spo", "Working" = "Wor"))
print(plot4)
# Factorize energy and shedding
dogs2$energyF <- factor(dogs2$energy,
levels = c(1, 2, 3, 4, 5),
labels = c("Couch potato", "Low",
"Moderate", "Active", "High"))
dogs2$sheddingF <- factor(dogs2$shedding,
levels = c(1, 2, 3, 4, 5),
labels = c("None", "Low", "Moderate",
"High", "Hair everywhere"))
# Create plot5
# It's not best practice to assign this many aesthetics
# with this type of dataset, but it's for illustrative purposes
plot5<- ggplot(dogs2, aes(x = weight, y = height,
shape = energyF, # Specify additional aesthetics
color = barkF, fill = sheddingF)) +
ggtitle("plot5") + geom_point() +
scale_x_continuous(name = "Weight (lb)", limits = c(0, 175),
breaks = seq(0, 175, by = 25)) +
scale_y_continuous(name = "Height (in)", limits = c(5, 30),
breaks = seq(5, 30, by = 5))
print(plot5)
# Create plot6
# Exact same as plot5 but added manual scales and reorganized legend
plot6<- ggplot(dogs2, aes(x = weight, y = height,
color = barkF, fill = sheddingF, shape = energyF)) +
ggtitle("plot6") + geom_point() +
scale_x_continuous(name = "Weight (lb)", limits = c(0, 175),
breaks = seq(0, 175, by = 25)) +
scale_y_continuous(name = "Height (in)", limits = c(5, 30),
breaks = seq(5, 30, by = 5)) +
# For all manual scales, must provide value quantity = number of factor levels
scale_shape_manual(values = c(21, 22, 23, 24, 25)) +
scale_color_manual(values = c("gray", "blue", "orange", "red", "black")) +
scale_fill_manual(values = c("#09d95f", "#7109d9", "#827448",
"#21a6b0", "#fa00af")) + # Colors as hex codes
labs(color = "Barking level: color",
fill = "Shedding level: fill",
shape = "Energy level: shape") +
# Have to override the legend because default shape is a solid circle
guides(color = guide_legend(override.aes = list(shape = 21)),
fill = guide_legend(override.aes = list(shape = 21)))
print(plot6)