An Introduction to Plotly in R

Liis Hantsoo presented the following code for using Plotly on September 22nd:

#PLOTLY DEMONSTRATION

#We will be running examples of basic plots in Plotly.
#Cheat Sheet: https://images.plot.ly/plotly-documentation/images/r_cheat_sheet.pdf

#Install & load packages
install.packages("plotly")
library(plotly)
library(ggplot2)

#Check to see if Plotly is working with sample data.
plot_ly(z = ~volcano)

p <- plot_ly(midwest, x = ~percollege, color = ~state, type = "box")
p

#R-BLOGGERS TUTORIAL: Getting started with Plotly: Basic Plots
#https://www.r-bloggers.com/getting-started-with-plotly-basic-plots/
#Uses the iris dataset.
data(iris)
head(iris)

#1. Generate a histogram for the first column of iris dataset (Sepal.Length) using the plot_ly() function.
plot_ly(x = iris[,1], type = "histogram")

#2. Generate a histogram with 20 bins using the same data.
plot_ly(x = iris[,1], type = "histogram", nbinsx = 20)

#3.a. Generate a scatter plot for the first two columns of the iris dataset (Sepal.Length, Sepal.Width). Plot the first column (Sepal.Length) on the x axis and second column (Sepal.Width) on the y axis.
plot_ly(x = iris[,1], y = iris[,2], type = "scatter", mode = "markers")

#b. Using the color argument, specify that data points are colored based on the Species column. HINT: color argument takes a vector of same length of number of data points data points with the level info.
plot_ly(x = iris[,1], y = iris[,2], type = "scatter", mode = "markers", color = iris[,"Species"])

#4. Use the size argument to specify the markers’ size based on the third column (Petal.Length) of the iris dataset.
plot_ly(x = iris[,1], y = iris[,2], type = "scatter", mode = "markers", color = iris[,"Species"], size = iris[,3])

#(9. Generate a box plot of the first column in the iris dataset using the box trace type.
plot_ly(y =iris[,1], type = "box")

#(10. Generate mutliple box plots for the first column of iris dataset, where each box corresponds to a species.
plot_ly(y =iris[,1], type = "box", color = iris[,"Species"])

#5. Generate a line plot for 100 values of random normal distribution with the default mean and standard deviation. Use index values on x axis.
plot_ly(x = 1:100, y = rnorm(100), type = "scatter", mode = "lines")

#6.  Save the previous plot in an object p. Use layout function to add an appropriate title to the plot.
p <- plot_ly(x = 1:100, y = rnorm(100), type = "scatter", mode = "lines")
layout(p , title = "Line plot")

#7. Bar plot. Run the below code to generate the data.
cat <- c(rep("A", 2), rep("B", 4), rep("C", 8))
#Using table() function to summarize and as.data.frame to create a data frame.
df <- as.data.frame(table(cat))

#7a. Create bar plot.
plot_ly(x = df[,1], y = df[, 2], type = "bar")

#7b. Add color based on the categorical levels (A,B,C).
plot_ly(x = df[,1], y = df[, 2], type = "bar", color = df[,1])

#8. Generate a pie chart using the same data from the previous exercise and appropriate arguments.
plot_ly(labels = df[,1], values = df[,2], type = "pie")

#SIEVERT'S PLOTLY USER BOOK
#User Book: https://plotly-book.cpsievert.me/
#Plotly basics: The plotly package depends on ggplot2.The plot_ly() function transforms DATA into a plotly object, while the ggplotly() function transforms a GGPLOT object into a plotly object (Wickham 2009); (Sievert et al. 2016).
#https://plotly-book.cpsievert.me/two-approaches-one-object.html

#1.1 A case study of housing sales in Texas
#Create a ggplot object, p, which is Monthly median house price in the state of Texas. The top row displays the raw data (by city) and the bottom row shows 2D binning on the raw data.
#x-axis: Time
#y-axis: Home price (1x10^5 = $100,000)
#lines: Counties
head(txhousing)
txhousing
p <- ggplot(txhousing, aes(date, median)) + geom_line(aes(group = city), alpha = 0.2)
p

#Now that we have a valid ggplot2 object, p, the plotly package provides the ggplotly() function which converts a ggplot object to a plotly object
subplot(
  p, ggplotly(p, tooltip = "city"),
  ggplot(txhousing, aes(date, median)) + geom_bin2d(),
  ggplot(txhousing, aes(date, median)) + geom_hex(),
  nrows = 2, shareX = TRUE, shareY = TRUE,
  titleY = FALSE, titleX = FALSE
)

#EXTERNAL DATA
#Datasets available at https://vincentarelbundock.github.io/Rdatasets/datasets.html.
#Dataset on birthweight: https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/MASS/birthwt.csv

#Read the data in from the website, name it mydata.
mydata <- read.csv("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/MASS/birthwt.csv")
head(mydata)

#Rename the categorical data s.t. Race 1 = White, 2 = Black, 3 = Asian;
#0 = Nonsmoker, 1 = Smoker
mydata$racecat[mydata$race=="1"] <- "White"
mydata$racecat[mydata$race=="2"] <- "Black"
mydata$racecat[mydata$race=="3"] <- "Asian"
mydata$smoker[mydata$smoke=="0"] <- "Nonsmoker"
mydata$smoker[mydata$smoke=="1"] <- "Smoker" head(mydata) #2.1 Scatterplots #Sievert's Example with mpg data (Fig 2.1).  #Makes 3 plots of same data w "default" (solid), "alpha" (shaded) and "hollow" (circle) datapoints. #e.g. Hollow circles are specified via symbol = I(1) #See other markers: https://plot.ly/r/reference/#scatter-marker-symbol subplot(   plot_ly(mpg, x = ~cty, y = ~hwy, name = "default"),   plot_ly(mpg, x = ~cty, y = ~hwy) %>%
    add_markers(alpha = 0.2, name = "alpha"),
  plot_ly(mpg, x = ~cty, y = ~hwy) %>%
    add_markers(symbol = I(1), name = "hollow")
)

#Example with birthwt data.
#x-axis: Mother age
#y-axis: Infant birthweight
subplot(
  plot_ly(mydata, x = ~age, y = ~bwt, name = "default"),
  plot_ly(mydata, x = ~age, y = ~bwt) %>%
    add_markers(alpha = 0.2, name = "alpha"),
  plot_ly(mydata, x = ~age, y = ~bwt) %>%
    add_markers(symbol = I(1), name = "hollow")
)

#Let's break it down... First, just make a single scatterplot. Call it p1
#x-axis: Mother age
#y-axis: Infant birthweight
p1 <- plot_ly(mydata, x = ~age, y = ~bwt)
p1

#Now panel by smoker / nonsmoker.
#There is an example of paneling at: https://plot.ly/r/subplots/.
p1 <- plot_ly(mydata, x = ~age, y = ~bwt) %>%
  add_lines(name = ~"Nonsmoker")
p2 <- plot_ly(mydata, x = ~age, y = ~bwt) %>%
  add_lines(name = ~"Smoker")
p <- subplot(p1, p2)
p
#?? Didn't work so well! How to remove lines?!
#(...It's easy to colorcode scatterplot in ggplot!)
ggplot(mydata, aes(x=age, y=bwt, color=smoker)) + geom_point()

#Sievert's example: Plot w multiple traces (Fig 2.3).
p <- plot_ly(mpg, x = ~cty, y = ~hwy, alpha = 0.3)
subplot(
  add_markers(p, symbol = ~cyl, name = "A single trace"),
  add_markers(p, symbol = ~factor(cyl), color = I("black"))
)

#Example with birthwt data.
#x-axis: Mother age
#y-axis: Infant birthweight
p <- plot_ly(mydata, x = ~age, y = ~bwt, alpha = 0.3)
subplot(
  add_markers(p, symbol = ~racecat, name = "A single trace"),
  add_markers(p, symbol = ~factor(racecat), color = I("black"))
)
#?? Why did it make 2 separate plots? 

#Sievert's example: Colorbar (Fig 2.4)
p <- plot_ly(mpg, x = ~cty, y = ~hwy, alpha = 0.5) subplot(   add_markers(p, color = ~cyl, showlegend = FALSE) %>%
    colorbar(title = "Viridis"),
  add_markers(p, color = ~factor(cyl))
)

#Example with birthwt data.
#x-axis: Mother age
#y-axis: Infant birthweight
#Color code points by racecat.
p <- plot_ly(mydata, x = ~age, y = ~bwt, alpha = 0.5) subplot(   add_markers(p, color = ~racecat, showlegend = FALSE) %>%
    colorbar(title = "Viridis"),
  add_markers(p, color = ~factor(racecat))
)
#?? Why does it make 2 duplicate plots? 

#Boxplot w birthwt data
p <- plot_ly(mydata, x = ~bwt, color = ~racecat, type = "box")
p

p <- plot_ly(mydata, x = ~bwt, color = ~smoker, type = "box")
p

 

Leave a comment