1. Setting up your RStudio workspace
Preparing the environment and loading data files
Postdoctoral researcher
lian (at) arzbecker (dot) com
Communicative Disorders and Sciences, University at Buffalo
Preparing the environment and loading data files
# Set working directory
setwd("C:/Users/lianjarzbecker/Downloads")
# Verify working directory
getwd()
# Package list
packages <- c("readxl", "expss")
### No need to edit anything below this triple comment symbol
# Function for installing missing packages
install_and_load <- function(pkg) {
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg, repos = "http://cran.us.r-project.org")
library(pkg, character.only = TRUE)
}
}
# Install and load packages
invisible(lapply(packages, install_and_load))
### No need to edit anything above this triple comment symbol
# Backward arrow names & saves as data frame
dogs <- read.csv("dogs.csv")
# Handling missing data: empty cells are filled with "NA"
dogs <- read.csv("dogs.csv", na.strings = c("","NA"))
# Verify structure & first few rows of data
str(dogs)
head(dogs)
# Same data, just in different file formats
# Reads both .xlsx and .xls files
library(readxl)
dogs_excel <- read_xlsx("dogs.xlsx")
# Function "read.table" assumes space delimiter & no header
# need to indicate the separator is a tab and there is a header
dogs3_text <- read.table("dogs.txt", sep = "\t", header = T)
# Give variables labels. Might seem redundant but think of future you!
library(expss)
dogs = apply_labels(dogs,
breed = "Breed",
group = "Group",
height = "Height (in)",
weight = "Weight (lb)",
life_expect = "Life span (yr)",
affection = "Affectionate",
kids = "Good with kids",
dogs = "Good with other dogs",
shedding = "Shedding level",
grooming = "Coat grooming frequency",
drooling = "Drooling level",
coatT = "Coat type",
coatL = "Coat length",
strangers = "Stranger openness",
playful = "Playfulness",
protec = "Protectiveness",
adapt = "Adaptability",
train = "Trainability",
energy = "Energy level",
bark = "Barking level",
stim = "Mental stimulation needs")
# Verify labels have been correctly assigned
str(dogs)
# Create new "groupF" column in "dogs" data frame
# Reference "group" column in "dogs" data frame with "$"
dogs$groupF <- factor(dogs$group,
levels = c("Herding", "Hound", "Toy", "Non-sporting",
"Sporting", "Terrier", "Working",
"Miscellaneous", "FSS"),
labels = c("Herding", "Hound", "Toy", "Non-sporting",
"Sporting", "Terrier", "Working",
"Miscellaneous", "FSS"))
# If levels and labels are identical, "as.factor" is more efficient
dogs$groupF <- as.factor(dogs$group)
# More factorizing examples below
# Example 1: Group Recognition by AKC (Yes or No)
dogs$groupR <- factor(dogs$group,
levels = c("Herding","Hound","Toy","Non-sporting",
"Sporting","Terrier","Working",
"Miscellaneous","FSS"),
labels = c("Yes","Yes","Yes","Yes", "Yes",
"Yes","Yes","No","No"))
# Example 2: Barking Behavior Classification
# (Only to alert, Rarely, Somtimes, Often, Very Vocal)
dogs$barkF <- factor(dogs$bark,
levels = c(1, 2, 3, 4, 5),
labels = c("Only to alert", "Rarely",
"Sometimes", "Often", "Very vocal"))
# Example 2a: Alternative Barking Classification
# (Low, Medium, High)
dogs$barkF2 <- factor(dogs$bark,
levels = c(1, 2, 3, 4, 5),
labels = c("Low","Low", "Medium", "High", "High"))
# RStudio workspace setup
setwd("C:/Users/lianjarzbecker/Downloads")
# List of packages to install and load
packages <- c("expss")
# Function to install and load packages if not already installed
install_and_load <- function(pkg) {
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg, repos = "http://cran.us.r-project.org")
library(pkg, character.only = TRUE)
}
}
# Install and load packages
invisible(lapply(packages, install_and_load))
# Read data from CSV file and label variables
dogs <- read.csv("dogs.csv", na.strings = c("","NA"))
dogs = apply_labels(dogs,
breed = "Breed",
group = "Group",
height = "Height (in)",
weight = "Weight (lb)",
life_expect = "Life span (yr)",
affection = "Affectionate",
kids = "Good with kids",
dogs = "Good with other dogs",
shedding = "Shedding level",
grooming = "Coat grooming frequency",
drooling = "Drooling level",
coatT = "Coat type",
coatL = "Coat length",
strangers = "Stranger openness",
playful = "Playfulness",
protec = "Protectiveness",
adapt = "Adaptability",
train = "Trainability",
energy = "Energy level",
bark = "Barking level",
stim = "Mental stimulation needs")
# Factorize variables
dogs$groupF <- as.factor(dogs$group)
dogs$barkF <- factor(dogs$bark,
levels = c(1, 2, 3, 4, 5),
labels = c("Only to alert", "Rarely",
"Sometimes", "Often", "Very vocal"))
# Optional: View the structure and stats summary of the data frame
str(dogs)
summary(dogs)