Lian Arzbecker

Postdoctoral researcher


Curriculum vitae


arzbecker.1 (at) osu (dot) edu | lianarzb (at) buffalo (dot) edu


Motor Speech Disorders Lab

Communicative Disorders and Sciences, University at Buffalo



1 Scatterplots: Intro


Basics, customizing axes, and manual scales


Table of contents

  1. 🏠️Setting up the workspace
  2. ⚫️Creaing a basic ggplot2 scatterplot
  3. ✏️Customizing continuous axes
  4. 🖌️Customizing discrete axes (🎁Adding jitter)
  5. 🎨Point shape, color, and fill (🎁Renaming legend items)
⬇️
Click here to download the example CSV file, be sure to save as "dogs.csv" 
🏠️

1. Setting up the workspace

The code in this step is explained in Setting up your RStudio workspace
# Set working directory
setwd("C:/Users/lianjarzbecker/Downloads")


# List of packages to install and load
packages <- c("expss", "ggplot2","ggalt")

# Function to install and load packages if not already installed
install_and_load <- function(package_name) {
  if (!package_name %in% installed.packages()[, "Package"]) {
    message(paste("Installing package", package_name))
    suppressMessages(suppressWarnings(install.packages(package_name)))
  } else {
    message(paste("Package", package_name, "is already installed"))
  }
  suppressMessages(suppressWarnings(library(package_name, character.only = T)))
}

invisible(lapply(packages, install_and_load))

# Read data from CSV file and label variables
dogs <- read.csv("dogs.csv", na.strings = c("","NA"))
dogs2 <- na.omit(dogs)
dogs2 = apply_labels(dogs2,
                     breed = "Breed",
                     group = "Group",
                     height = "Height (in)",
                     weight = "Weight (lb)",
                     life_expect = "Life span (yr)",
                     affection = "Affectionate",
                     kids = "Good with kids",
                     dogs = "Good with other dogs",
                     shedding = "Shedding level",
                     grooming = "Coat grooming frequency",
                     drooling = "Drooling level",
                     coatT = "Coat type",
                     coatL = "Coat length",
                     strangers = "Stranger openness",
                     playful = "Playfulness",
                     protec = "Protectiveness",
                     adapt = "Adaptability",
                     train = "Trainability",
                     energy = "Energy level",
                     bark = "Barking level",
                     stim = "Mental stimulation needs")

# Factorize variables
dogs2$groupF <- as.factor(dogs2$group)
dogs2$barkF <- factor(dogs2$bark,
                      levels = c(1, 2, 3, 4, 5),
                      labels = c("Only to alert", "Rarely",
                                 "Sometimes", "Often", "Very vocal"))
⚫️

2. Creating a basic ggplot2 scatterplot

  • Function "ggplot", "ggtitle", and "geom_point",
  • Parameters:
    • Data frame name
    • Aesthetic mappings, specifying which variables to on which axis
    • Plot title as a character vector
  • Purpose: Creates a scatterplot to visualize the data
# Syntax usage, any text in bold is user input
plot <- ggplot(data, aes(x = x, y = y)) + ggtitle("title") + geom_point()
# Create plot0
library(ggplot2)
plot0 <- ggplot(dogs2, aes(x = weight, y = height)) +
  ggtitle("plot0") + # Add title
  geom_point() # Add points


# Print plot0
print(plot0)
✏️

3. Customizing continuous axes

  • Function "scale_x_continuous" and "scale_y_continuous"
  • Parameters:
    • Name as a character vector
    • Limits to define the range of the axis
    • Breaks to define where tick marks and labels appear along the axis
  • Purpose: Customizes axes to improve readability and interpretation
# Syntax usage
plot <- ggplot(data, aes(x = x, y = y)) + ggtitle("title") +  geom_point() +
  scale_x_continuous(name = "
x-axis name", limits = c(0, 10),
                     breaks = seq(
1, 9, by = 1)) +
  scale_y_continuous(name = "
y-axis name", limits = c(0, 10),
                     breaks = seq(
1, 9, by = 1))
# Create plot1
plot1 <- ggplot(dogs2, aes(x = weight, y = height)) +
  ggtitle("plot1") + geom_point() +
  scale_x_continuous(name = "Weight (lb)", limits = c(0, 180),
                     breaks = seq(0, 175, by = 25)) +
  scale_y_continuous(name = "Height (in)", limits = c(0, 36),
                     breaks = seq(0, 35, by = 5))
print(plot1)


# Create plot2
plot2 <- ggplot(dogs2, aes(x = weight, y = height)) +
  ggtitle("plot2") + geom_point() +
  scale_x_continuous(name = "Weight in pounds", limits = c(0, 200),
                     breaks = seq(0, 175, by = 50)) +
  scale_y_continuous(name = "Height in inches", limits = c(0, 50),
                     breaks = seq(0, 40, by = 10))
print(plot2)
🖌️

4. Customizing discrete axes

  • Function "scale_x_discrete" and "scale_y_discrete"
  • Parameters:
    • Name as a character vector
    • Limits to define the range of the axis
    • Labels to define order of items on the axis
  • Purpose: Customizes axes to improve readability and interpretation
# Syntax usage
plot <- ggplot(data, aes(x = x, y = y)) + ggtitle() + geom_point() +
  scale_x_discrete(name = "
x-axis name",
                   limits = c("
First group", "Second group", "Third group"),
                   labels = c("
First group" = "First",
                              "
Second group" = "Second",
                              "
Third group" = "Third")) +
  scale_y_discrete(name = "
y-axis name",
                   limits = c("
First group", "Second group", "Third group"),
                   labels = c("
First group" = "First",
                              "
Second group" = "Second",
                              "
Third group" = "Third") )
🎁
Bonus: Adding jitter
  • Position "position_jitter()"
  • Parameters: jitter width and height
  • Purpose: Improves readability and interpretation
# Syntax usage
plot <- ggplot(data, aes(x = x, y = y)) + ggtitle("title") +
  geom_point(position = position_jitter(width =
1, height = 1))
# Create plot3 (plotting discrete variables with scatterplots is oft ill-advised)
plot3 <- ggplot(dogs2, aes(x = weight, y = groupF)) +
  ggtitle("plot3") + geom_point() +
  scale_x_continuous(name = "Weight (lb)", limits = c(0, 175),
                     breaks = seq(0, 175, by = 25)) +
  scale_y_discrete(name = "Group",
                   limits = c("Working", "Sporting", "Hound",
                              "Herding", "FSS", "Miscellaneous",
                              "Non-sporting", "Terrier", "Toy"),
                   labels = c("Toy", "Terrier" = "Ter",
                              "Non-sporting" = "N-S",
                              "Miscellaneous" = "Mis", "Hound" = "Hou",
                              "Herding" = "Her", "FSS",
                              "Sporting" = "Spo", "Working" = "Wor"))
  print(plot3)


# Create plot4 (exact same as plot3, just adding jitter)
# Jitter adds "noise" to the point plotting but
# still not ideal for these variables...a boxplot would be more suited
plot4 <- ggplot(dogs2, aes(x = weight, y = groupF)) +
  ggtitle("plot4") +
  geom_point(position = position_jitter(width = .25, height = .25)) +
  scale_x_continuous(name = "Weight (lb)", limits = c(0, 175),
                     breaks = seq(0, 175, by = 25)) +
  scale_y_discrete(name = "Group",
                   limits = c("Working", "Sporting", "Hound",
                              "Herding", "FSS", "Miscellaneous",
                              "Non-sporting", "Terrier", "Toy"),
                   labels = c("Toy", "Terrier" = "Ter",
                              "Non-sporting" = "N-S",
                              "Miscellaneous" = "Mis", "Hound" = "Hou",
                              "Herding" = "Her", "FSS",
                              "Sporting" = "Spo", "Working" = "Wor"))
  print(plot4)
🎨

5. Point shape, color, and fill

  • Function "scale_shape_manual", "scale_color_manual", and "scale_fill_manual"
  • Parameters:
    • Shape code as a numeric, see below
    • Color and fill as character vectors of either color names or hex color codes
  • Purpose: Customizes point aesthetics
# Syntax usage
plot <- ggplot(data, aes(x = x, y = y, shape = factor1,
                         color =
factor2, fill = factor3)) + ggtitle("title") +
  geom_point() +

  scale_shape_manual(values = c(shape number code)) +
  scale_color_manual(values = c("
color name1" OR "#hex color code")) +
  scale_fill_manual(values = c("
color name2,"color name3")) 
🎁
Bonus: Renaming legend items
  • Function "labs" and "guides"
  • Parameters: legend names as a character vector
  • Purpose: Customizes legend names and order
# Syntax usage
+ labs(
aesthetic1 = "Aesthetic 1",
       
aesthetic2 = "Aesthetic 2") +
  guides(
aesthetic1 = guide_legend(override.aes = list(shape = 19)))
# Factorize energy and shedding
dogs2$energyF <- factor(dogs2$energy,
                        levels = c(1, 2, 3, 4, 5),
                        labels = c("Couch potato", "Low",
                                   "Moderate", "Active", "High"))

dogs2$sheddingF <- factor(dogs2$shedding,
                          levels = c(1, 2, 3, 4, 5),
                          labels = c("None", "Low", "Moderate",
                                     "High", "Hair everywhere"))


# Create plot5
# It's not best practice to assign this many aesthetics
# with this type of dataset, but it's for illustrative purposes
plot5<- ggplot(dogs2, aes(x = weight, y = height,
                          shape = energyF, # Specify additional aesthetics
                          color = barkF, fill = sheddingF)) +
  ggtitle("plot5") + geom_point() +
  scale_x_continuous(name = "Weight (lb)", limits = c(0, 175),
                     breaks = seq(0, 175, by = 25)) +
  scale_y_continuous(name = "Height (in)", limits = c(5, 30),
                     breaks = seq(5, 30, by = 5))
print(plot5)


# Create plot6
# Exact same as plot5 but added manual scales and reorganized legend
plot6<- ggplot(dogs2, aes(x = weight, y = height,
                          color = barkF, fill = sheddingF, shape = energyF)) +
  ggtitle("plot6") + geom_point() +
  scale_x_continuous(name = "Weight (lb)", limits = c(0, 175),
                     breaks = seq(0, 175, by = 25)) +
  scale_y_continuous(name = "Height (in)", limits = c(5, 30),
                     breaks = seq(5, 30, by = 5)) +
  # For all manual scales, must provide value quantity = number of factor levels
  scale_shape_manual(values = c(21, 22, 23, 24, 25)) +
  scale_color_manual(values = c("gray", "blue", "orange", "red", "black")) +
  scale_fill_manual(values = c("#09d95f", "#7109d9", "#827448",
                               "#21a6b0", "#fa00af")) + # Colors as hex codes
  labs(color = "Barking level: color",
       fill = "Shedding level: fill",
       shape = "Energy level: shape") +
  # Have to override the legend because default shape is a solid circle
  guides(color = guide_legend(override.aes = list(shape = 21)),
         fill = guide_legend(override.aes = list(shape = 21)))
print(plot6)