0 Setting up your RStudio workspace
Preparing the environment and loading data files
Postdoctoral researcher
arzbecker.1 (at) osu (dot) edu | lianarzb (at) buffalo (dot) edu
Communicative Disorders and Sciences, University at Buffalo
Preparing the environment and loading data files
# Set working directory
setwd("C:/Users/lianjarzbecker/Downloads")
# Verify working directory
getwd()
# Package list
packages <- c("readxl", "expss")
### No need to edit anything below this triple comment symbol
# Function for installing missing packages
install_and_load <- function(package_name) {
if (!package_name %in% installed.packages()[, "Package"]) {
message(paste("Installing package", package_name))
suppressMessages(suppressWarnings(install.packages(package_name)))
} else {
message(paste("Package", package_name, "is already installed"))
}
suppressMessages(suppressWarnings(library(package_name, character.only = T)))
}
# Install and load packages
invisible(lapply(packages, install_and_load))
### No need to edit anything above this triple comment symbol
# Backward arrow names & saves as data frame
dogs <- read.csv("dogs.csv")
# Handling missing data: empty cells are filled with "NA"
dogs <- read.csv("dogs.csv", na.strings = c("","NA"))
# Verify structure & first few rows of data
str(dogs)
head(dogs)
# Same data, just in different file formats
# Reads both .xlsx and .xls files
library(readxl)
dogs_excel <- read_xlsx("dogs.xlsx")
# Function "read.table" assumes space delimiter & no header
# need to indicate the separator is a tab and there is a header
dogs3_text <- read.table("dogs.txt", sep = "\t", header = T)
# Give variables labels. Might seem redundant but think of future you!
library(expss)
dogs = apply_labels(dogs,
breed = "Breed",
group = "Group",
height = "Height (in)",
weight = "Weight (lb)",
life_expect = "Life span (yr)",
affection = "Affectionate",
kids = "Good with kids",
dogs = "Good with other dogs",
shedding = "Shedding level",
grooming = "Coat grooming frequency",
drooling = "Drooling level",
coatT = "Coat type",
coatL = "Coat length",
strangers = "Stranger openness",
playful = "Playfulness",
protec = "Protectiveness",
adapt = "Adaptability",
train = "Trainability",
energy = "Energy level",
bark = "Barking level",
stim = "Mental stimulation needs")
# Verify labels have been correctly assigned
str(dogs)
# Create new "groupF" column in "dogs" data frame
# Reference "group" column in "dogs" data frame with "$"
dogs$groupF <- factor(dogs$group,
levels = c("Herding", "Hound", "Toy", "Non-sporting",
"Sporting", "Terrier", "Working",
"Miscellaneous", "FSS"),
labels = c("Herding", "Hound", "Toy", "Non-sporting",
"Sporting", "Terrier", "Working",
"Miscellaneous", "FSS"))
# If levels and labels are identical, "as.factor" is more efficient
dogs$groupF <- as.factor(dogs$group)
# More factorizing examples below
# Example 1: Group Recognition by AKC (Yes or No)
dogs$groupR <- factor(dogs$group,
levels = c("Herding","Hound","Toy","Non-sporting",
"Sporting","Terrier","Working",
"Miscellaneous","FSS"),
labels = c("Yes","Yes","Yes","Yes", "Yes",
"Yes","Yes","No","No"))
# Example 2: Barking Behavior Classification
# (Only to alert, Rarely, Somtimes, Often, Very Vocal)
dogs$barkF <- factor(dogs$bark,
levels = c(1, 2, 3, 4, 5),
labels = c("Only to alert", "Rarely",
"Sometimes", "Often", "Very vocal"))
# Example 2a: Alternative Barking Classification
# (Low, Medium, High)
dogs$barkF2 <- factor(dogs$bark,
levels = c(1, 2, 3, 4, 5),
labels = c("Low","Low", "Medium", "High", "High"))
# RStudio workspace setup
setwd("C:/Users/lianjarzbecker/Downloads")
# List of packages to install and load
packages <- c("expss")
# Function to install and load packages if not already installed
install_and_load <- function(package_name) {
if (!package_name %in% installed.packages()[, "Package"]) {
message(paste("Installing package", package_name))
suppressMessages(suppressWarnings(install.packages(package_name)))
} else {
message(paste("Package", package_name, "is already installed"))
}
suppressMessages(suppressWarnings(library(package_name, character.only = T)))
}
# Install and load packages
invisible(lapply(packages, install_and_load))
# Read data from CSV file and label variables
dogs <- read.csv("dogs.csv", na.strings = c("","NA"))
dogs = apply_labels(dogs,
breed = "Breed",
group = "Group",
height = "Height (in)",
weight = "Weight (lb)",
life_expect = "Life span (yr)",
affection = "Affectionate",
kids = "Good with kids",
dogs = "Good with other dogs",
shedding = "Shedding level",
grooming = "Coat grooming frequency",
drooling = "Drooling level",
coatT = "Coat type",
coatL = "Coat length",
strangers = "Stranger openness",
playful = "Playfulness",
protec = "Protectiveness",
adapt = "Adaptability",
train = "Trainability",
energy = "Energy level",
bark = "Barking level",
stim = "Mental stimulation needs")
# Factorize variables
dogs$groupF <- as.factor(dogs$group)
dogs$barkF <- factor(dogs$bark,
levels = c(1, 2, 3, 4, 5),
labels = c("Only to alert", "Rarely",
"Sometimes", "Often", "Very vocal"))
# Optional: View the structure and stats summary of the data frame
str(dogs)
summary(dogs)