ggplotmovies
'movies_small' is a random sample of 1000 observations from the larger movies dataset, that's inside the ggplot2movies package.
The dataset contains information on movies from IMDB.
The variable votes is the number of IMDB users who have rated a movie and the rating is the average rating for the movie.
library(ggplot2movies)
library(ggplot2)
# Sample 1000 movies from ggplot2movies
set.seed(123)
movies_small <- movies[sample(nrow(movies), 1000), ]
movies_small$rating <- factor(round(movies_small$rating))
# Build a scatter plot with mean and 95% CI
ggplot(movies_small, aes(x = rating, y = votes)) +
geom_point() +
stat_summary(fun.data = "mean_cl_normal",
geom = 'crossbar',
width = 0.2,
col = 'red') +
scale_y_log10()
diamonds
ggplot(diamonds, aes(x = carat, y = price, col = color)) +
geom_point(alpha = 0.5, size = 0.5, shape = 16) +
# To get nice formatting we're using the expression() function for the labels.
scale_x_log10(expression(log[10](Carat)), limits = c(0.1,10)) +
scale_y_log10(expression(log[10](Price)), limits = c(100,100000)) +
scale_color_brewer(palette = "YlOrRd") +
coord_equal() +
theme_classic()
1.5 * IQR
below Q1
or above Q3
.ggplotmovies
# Sample 10000 movies from ggplot2movies
set.seed(123)
movies_small <- movies[sample(nrow(movies), 10000), ]
movies_small$rating <- factor(round(movies_small$rating))
# Add a boxplot geom
d <- ggplot(movies_small, aes(x = rating, y = votes)) +
geom_point() +
geom_boxplot() +
stat_summary(fun.data = "mean_cl_normal",
geom = "crossbar",
width = 0.2,
col = "red")
# Untransformed plot
d
# Transform the scale
d + scale_y_log10()
# Transform the coordinates
d + coord_trans(y = "log10")
scale_y_log10()
& coord_trans(y = 'log10')
for transforming the scale.
scale_
functiion makes transformation before calculating the statistics.coord_
function makes transformation after calculating the statistics.scale_
& coord_
cut_interval(x, n)
makes n
groups from vector x
with equal range.cut_number(x, n)
makes n
groups from vector x
with (approximately) equal numbers of observations.cut_width(x, width)
makes groups of width
from vector x
.diamonds
p <- ggplot(diamonds, aes(x = carat, y = price))
# Use cut_interval
p + geom_boxplot(aes(group = cut_interval(carat, n = 10)))
# Use cut_number
p + geom_boxplot(aes(group = cut_number(carat, n = 10)))
# Use cut_width
p + geom_boxplot(aes(group = cut_width(carat, width = 0.25)))
n
.ggplot(diamonds, aes(x = cut, y = price, col = color)) +
geom_boxplot(varwidth = T) +
facet_grid(.~color)
A sum of “bumps” placed at the observations. The kernel function determines the shape of the bumps while the window width, h, determines their width.
geom_density()
cuts of the minimum & maximum values of our dataset,geom_density()
bw
the smoothing bandwidth to be used.adjust
adjustment of the bandwidth.kernel
kernel used for density estimation: ‘gaussian’, ‘rectangular’, ‘triangular’, ‘epanechnikov’, ‘biweight’, ‘cosine’, ‘optcosine’.test_data <- get(load('D:/Downloads/test_datasets.RData'))
# Calculating density: d
d <- density(test_data$norm)
# Use which.max() to calculate mode
mode <- d$x[which.max(d$y)]
# Finish the ggplot call
ggplot(test_data, aes(x = norm)) +
geom_density() +
geom_rug() +
geom_vline(xintercept = mode, col = "red")
# Combine density plots and histogram
# Arguments you'll need later on
fun_args <- list(mean = mean(test_data$norm), sd = sd(test_data$norm))
# Finish the ggplot
ggplot(test_data, aes(x = norm)) +
# Achieve inner data
geom_histogram(aes(y = ..density..)) +
geom_density(col = 'red') +
# dnorm gives the density
stat_function(fun = dnorm, args = fun_args, col = 'blue')
library(tidyr)
test_data <- get(load('D:/Downloads/test_datasets.RData'))
test_data2 <- gather(test_data, dist, value, 1:2)
# Plot two distributions with test_data2
ggplot(test_data2, aes(x = value, fill = dist, col = dist)) +
geom_rug(alpha = 0.4) +
geom_density(alpha = 0.4)
mammals <- readRDS('D:/Downloads/mammals.RDS')
# With faceting
ggplot(mammals, aes(x = sleep_total, fill = vore)) +
geom_density(col = NA, alpha = 0.35) +
scale_x_continuous(limits = c(0, 24)) +
coord_cartesian(ylim = c(0, 0.3)) +
facet_wrap( ~ vore, nrow = 2)
# Trim each density plot individually
ggplot(mammals, aes(x = sleep_total, fill = vore)) +
geom_density(col = NA, alpha = 0.35, trim = T) +
scale_x_continuous(limits=c(0,24)) +
coord_cartesian(ylim = c(0, 0.3))
trim = TRUE
inside geom_density()
.# Unweighted violin plot
ggplot(mammals, aes(x = vore, y = sleep_total, fill = vore)) +
geom_violin(col = NA)
# Calculate weighting measure
library(dplyr)
mammals2 <- mammals %>%
group_by(vore) %>%
mutate(n = n() / nrow(mammals)) -> mammals
# Weighted density plot
ggplot(mammals2, aes(x = sleep_total, fill = vore)) +
geom_density(aes(weight = n), col = NA, alpha = 0.35) +
scale_x_continuous(limits = c(0, 24)) +
coord_cartesian(ylim = c(0, 0.3))
# Weighted violin plot
ggplot(mammals2, aes(x = vore, y = sleep_total, fill = vore)) +
geom_violin(aes(weight = n), col = NA)
stat_density_2D()
h
.h = c(5, 0.5)
library(datasets)
# Base layers
p <- ggplot(faithful, aes(x = waiting, y = eruptions)) +
scale_y_continuous(limits = c(1, 5.5), expand = c(0, 0)) +
scale_x_continuous(limits = c(40, 100), expand = c(0, 0)) +
coord_fixed(60 / 4.5)
# Use geom_density_2d()
p + geom_density_2d()
# Use stat_density_2d() with arguments
p + stat_density_2d(aes(col = ..level..), h = c(5, 0.5))
viridis
viridis
package contains multi-hue color palettes suitable for continuous variables.library(viridis)
# Add viridis color scale
p +
stat_density_2d(geom = "tile", aes(fill = ..density..), h=c(5,.5), contour = FALSE) +
scale_fill_viridis()
# mtcars_fact: categorical variables have been converted into actual factor columns.
mtcars_fact <- mtcars
mtcars_fact$cyl <- factor(mtcars$cyl)
mtcars_fact$vs <- factor(mtcars$vs)
mtcars_fact$am <- factor(mtcars$am)
mtcars_fact$gear <- factor(mtcars$gear)
mtcars_fact$carb <- factor(mtcars$carb)
# ggpairs
library(GGally)
ggpairs(mtcars_fact[1:3])
# pairs
pairs(iris[1:4])
# chart.Correlation
library(PerformanceAnalytics)
chart.Correlation(iris[1:4])
library(ggplot2)
library(reshape2)
cor_list <- function(x) {
L <- M <- cor(x)
M[lower.tri(M, diag = TRUE)] <- NA
M <- melt(M)
names(M)[3] <- "points"
L[upper.tri(L, diag = TRUE)] <- NA
L <- melt(L)
names(L)[3] <- "labels"
merge(M, L)
}
# Calculate xx with cor_list
library(dplyr)
xx <- iris %>%
group_by(Species) %>%
do(cor_list(.[1:4]))
head(xx)
## # A tibble: 6 x 5
## # Groups: Species [1]
## Species Var1 Var2 points labels
## <fct> <fct> <fct> <dbl> <dbl>
## 1 setosa Petal.Length Petal.Length NA NA
## 2 setosa Petal.Length Petal.Width 0.332 NA
## 3 setosa Petal.Length Sepal.Length NA 0.267
## 4 setosa Petal.Length Sepal.Width NA 0.178
## 5 setosa Petal.Width Petal.Length NA 0.332
## 6 setosa Petal.Width Petal.Width NA NA
# Finish the plot
ggplot(xx, aes(x = Var1, y = Var2)) +
geom_point(aes(col = points, size = abs(points)), shape = 16) +
geom_text(aes(col = labels, size = abs(labels), label = round(labels, 2))) +
scale_size(range = c(0, 6)) +
scale_color_gradient2("r", limits = c(-1, 1)) +
scale_y_discrete("", limits = rev(levels(xx$Var1))) +
scale_x_discrete("") +
guides(size = FALSE) +
geom_abline(slope = -1, intercept = nlevels(xx$Var1) + 1) +
coord_fixed() +
facet_grid(. ~ Species) +
theme(axis.text.y = element_text(angle = 45, hjust = 1),
axis.text.x = element_text(angle = 45, hjust = 1),
strip.background = element_blank())
library(dplyr)
africa <- get(load('D:/Downloads/africa.RData'))
africa_sample <- sample_n(africa, size = 50)
# Explore africa
str(africa)
## 'data.frame': 40093 obs. of 3 variables:
## $ Sand: num 24 36 56 52 65 43 42 47 57 51 ...
## $ Silt: num 12 14 18 21 3 14 22 19 15 14 ...
## $ Clay: num 64 50 26 27 32 43 36 34 28 35 ...
str(africa_sample)
## 'data.frame': 50 obs. of 3 variables:
## $ Sand: num 54 40 26 15 46 23 90 26 34 50 ...
## $ Silt: num 30 44 24 15 14 45 5 12 7 9 ...
## $ Clay: num 16 16 50 70 40 32 5 62 59 41 ...
# Add an ID column from the row.names
africa_sample$ID <- row.names(africa_sample)
# Gather africa_sample
library(tidyr)
africa_sample_tidy <- gather(africa_sample, key, value, -ID)
head(africa_sample_tidy)
## ID key value
## 1 40064 Sand 54
## 2 41236 Sand 40
## 3 78765 Sand 26
## 4 41412 Sand 15
## 5 28924 Sand 46
## 6 42684 Sand 23
# Finish the ggplot command
ggplot(africa_sample_tidy, aes(x = factor(ID), y = value, fill = key)) +
geom_col() +
coord_flip()
# Currently not supported by ggplot2 (20180806)
library(ggplot2)
library(ggtern)
# Original plot:
ggtern(data = africa, aes(x = Sand, y = Silt, z = Clay)) +
geom_point(shape = 16, alpha = 0.2)
# Plot 1
ggtern(africa, aes(x = Sand, y = Silt, z = Clay)) +
geom_density_tern()
# Plot 2
ggtern(africa, aes(x = Sand, y = Silt, z = Clay)) +
stat_density_tern(geom = 'polygon', aes(fill = ..level.., alpha = ..level..)) +
guides(fill = FALSE)
library(geomnet)
# Merge edges and vertices
mmnet <- merge(madmen$edges, madmen$vertices,
by.x = "Name1", by.y = "label",
all = TRUE)
head(mmnet)
## Name1 Name2 Gender
## 1 Betty Draper Henry Francis female
## 2 Betty Draper Random guy female
## 3 Don Draper Allison male
## 4 Don Draper Bethany Van Nuys male
## 5 Don Draper Betty Draper male
## 6 Don Draper Bobbie Barrett male
# Plot
ggplot(data = mmnet, aes(from_id = Name1, to_id = Name2)) +
geom_net(aes(col = Gender), size = 6, linewidth = 1, labelon = T, fontsize = 3, labelcolour = 'black')
# Modify
pink_and_blue <- c(female = "#FF69B4", male = "#0099ff")
ggplot(data = mmnet, aes(from_id = Name1, to_id = Name2)) +
geom_net(aes(col = Gender),
size = 6,
linewidth = 1,
labelon = TRUE,
fontsize = 3,
labelcolour = "black",
# Make the graph directed
directed = T) +
# Add manual color scale
scale_color_manual(values = pink_and_blue) +
# Set x-axis limits
xlim(-0.05, 1.05) +
# Set void theme
theme_void()
ggfortify
: An all-purpose plot converter between base
graphics and ggplot2
grid
graphics.lm
# Create linear model: res
res <- lm(Volume ~ Girth, data = trees)
# Plot res (base)
plot(res)
# Import ggfortify and use autoplot()
# ncol = 2 to convert this to a ggplot2 plot.
library(ggfortify)
autoplot(res, ncol =2)
mts
or ts
Time series objects (class mts
or ts
) also have their own methods for plot()
.
ggfortify
can also take advantage of this functionality.
library(vars)
Canda <- data('Canada')
class(Canada)
## [1] "mts" "ts"
# Inspect structure of Canada
str(Canada)
## Time-Series [1:84, 1:4] from 1980 to 2001: 930 930 930 931 933 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "e" "prod" "rw" "U"
# Call plot() on Canada
plot(Canada)
# Call autoplot() on Canada
autoplot(Canada)
dist
cmdscale()
: From the stats package performs Classical Multi-Dimensional Scaling and returns point coodinates as a matrix.# Autoplot + ggplot2 tweaking
autoplot(eurodist) +
coord_fixed()
# Autoplot of MDS
autoplot(cmdscale(eurodist, eig = TRUE),
label = TRUE,
label.size = 3,
size = 0)
kmeans()
, a function in the stats
package, to perform clustering on iris[-5]
with 3 groups.# Perform clustering
iris_k <- kmeans(iris[-5], 3)
# Autoplot: color according to cluster, plus shape according to species
autoplot(iris_k, data = iris, frame = T, shape = 'Species')
geom_polygon()
: Make a choropleths like drawing a polygon.ggplot2::map_data()
# Get the map data
library(ggplot2)
usa<- map_data('usa')
library(ggmap)
# Build the map
ggplot(usa, aes(x = long, y = lat, group = group)) +
geom_polygon() +
coord_map() +
theme_nothing()
readOGR()
to read in the shapefiles and store the data as germany.
dsn
, data source name, the folder of the shapefiles. Should be “shapes”.layer
, the level you are interested in. You want state information, so “DEU_adm1”.# All required packages are available
# Import shape information: germany
library(rgdal)
germany <- readOGR(dsn = 'D:/Downloads/shape_files', layer = 'DEU_adm1')
## OGR data source with driver: ESRI Shapefile
## Source: "D:\Downloads\shape_files", layer: "DEU_adm1"
## with 16 features
## It has 16 fields
# fortify germany: bundes
library(ggfortify)
bundes <- fortify(germany)
# Plot map of germany
library(ggmap)
ggplot(bundes, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = '#DDDDDD', col = 'white') +
coord_map() +
theme_nothing()
# Import unemployment of germany
unemp <- read.table('D:/Downloads/germany_unemployment.txt', header = T)
# re-add state names to bundes
bundes$state <- factor(as.numeric(bundes$id))
levels(bundes$state) <- germany$NAME_1
# Merge bundes and unemp: bundes_unemp
bundes_unemp <- merge(bundes, unemp)
# Update the ggplot call
library(ggthemes)
ggplot(bundes_unemp, aes(x = long, y = lat, group = group, fill = unemployment)) +
geom_polygon() +
coord_map()
#theme_map()
#Cause legend & map overlapping
ggmap::get_map(location = '*', zoom = *, maptype = '*')
library(ggmap)
germany_06 <- get_map(location = 'germany', zoom = 6)
# Plot map and polygon on top:
ggmap(germany_06) +
geom_polygon(data = bundes,
aes(x = long, y = lat, group = group),
fill = NA, col = "red") +
coord_map()
library(devtools)
assignInNamespace("version_info",
c(devtools:::version_info,
list("3.5" = list(version_min = "3.3.0", version_max = "99.99.99", path = "bin"))),
"devtools")
find_rtools() # is TRUE now
n devtools::install_github("thomasp85/tweenr")
devtools::install_github("thomasp85/gganimate")
# Data
library(tidyr)
library(stringr)
res <- read.csv('D:/GitHub/NTU-CS-X/Final Project/Youbike/data/Youbike_res.csv')
map <- get_map(location = c(min(res$lng), min(res$lat), max(res$lng), max(res$lat)), maptype = "toner")
res1 <- read.csv('D:/GitHub/NTU-CS-X/Final Project/Youbike/data/Youbike_res1.csv')
res1_g <- gather(res1, time, per, X7:X24)
res1_g$time <- str_replace(res1_g$time, 'X', '')
library(gganimate)
library(RColorBrewer)
res.stat.map.ani <- ggmap(map, darken = c(0.5, "white")) %+% res1_g + aes(x = lng, y = lat, z = per) +
stat_summary_2d(fun = median, alpha = 0.6) +
scale_fill_gradientn(name = 'Median', colours = brewer.pal(11, "RdYlGn"), space = 'Lab') +
labs(title = 'Monday, {closest_state}', x = "Longitude", y = "Latitude") +
coord_map() +
transition_states(time, transition_length = 1, state_length = 1)
print(res.stat.map.ani)