3 + 3 #加法
## [1] 6
3 - 3 #減法
## [1] 0
3 * 3 #乘法
## [1] 9
3 / 3 #除法
## [1] 1
3 %% 3 #餘數
## [1] 0
變數種類
class(3.5)
## [1] "numeric"
class(3L) #Capital 'L' after an integer forces it to be stored as an integer.
## [1] "integer"
class(TRUE)
## [1] "logical"
class('R studio')
## [1] "character"
變數運算
3.5 + 3L
## [1] 6.5
3.5 + 'three'
## Error in 3.5 + "three": 二元運算子中有非數值引數
不同型別的變數型態之間的某些操作是不被允許的,例如數字與字串相加。
向量運算
vector1 <- c(100,300,500)
vector2 <- c(200,200,-100)
vector3 <- vector1 + vector2 #向量能直接進行運算
total <- sum(vector3) #使用sum向量加總
mean <- mean(vector3) #使用mean計算平均值
vector3
## [1] 300 500 400
total
## [1] 1200
mean
## [1] 400
向量命名
names(vector3) <- c('America','England','Japan') #用names替向量命名
vector3
## America England Japan
## 300 500 400
向量選取
selection <- vector3 > 350
selection #直接比大小回傳邏輯值
## America England Japan
## FALSE TRUE TRUE
select_vec1 <- vector3[selection] #使用中括號進行選取
select_vec1
## England Japan
## 500 400
select_vec2 <- vector3[1:2] #也能以引數位置選取,注意1是第一項。
select_vec2
## America England
## 300 500
建立矩陣
#星際大戰票房
new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)
#宣告 box_office
box_office <- c(new_hope, empire_strikes, return_jedi)
#建立 star_wars_matrix
star_wars_matrix <- matrix(box_office, byrow = T, nrow = 3)
#一個具有 3 列(narrow = 3)的矩陣,數值(box_office)依序以列(byrow = TRUE)方向填入
star_wars_matrix
## [,1] [,2]
## [1,] 460.998 314.4
## [2,] 290.475 247.9
## [3,] 309.306 165.8
矩陣命名
#用來命名的向量 region 與 titles
region <- c("US", "non-US")
titles <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi")
#colnames()是欄命名function
colnames(star_wars_matrix) <- region
#rownames()是列命名function
rownames(star_wars_matrix) <- titles
star_wars_matrix
## US non-US
## A New Hope 460.998 314.4
## The Empire Strikes Back 290.475 247.9
## Return of the Jedi 309.306 165.8
運算合併
#rowSums()列總和(colSums()欄總和)
worldwide <- rowSums(star_wars_matrix)
#cbind()欄合併(rbind()列合併)
all_wars_matrix <- cbind(star_wars_matrix, worldwide)
all_wars_matrix
## US non-US worldwide
## A New Hope 460.998 314.4 775.398
## The Empire Strikes Back 290.475 247.9 538.375
## Return of the Jedi 309.306 165.8 475.106
矩陣選取 使用[列,欄],來選取,連續選取以冒號連結,全選留白。
#選出全部電影的非美國票房收入
non_us_all <- all_wars_matrix[,2]
non_us_all
## A New Hope The Empire Strikes Back Return of the Jedi
## 314.4 247.9 165.8
#選出前兩部電影的非美國票房收入
non_us_some <- all_wars_matrix[1:2,2]
non_us_some
## A New Hope The Empire Strikes Back
## 314.4 247.9
因素向量是用來儲存類別型變數的統計資料,類別型變數與連續型變數最主要的差異在於類別型變數有類別個數的上限,而連續型變數則會有無窮多的個數。用factor顯示級別。
#無法在類別之間比較好壞的【名目類別型變數】,如:動物。
animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")
factor_animals_vector <- factor(animals_vector)
factor_animals_vector
## [1] Elephant Giraffe Donkey Horse
## Levels: Donkey Elephant Giraffe Horse
#天生有比較特性的【順序類別型變數】,如:溫度。
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
factor_temperature_vector
## [1] High Low High Low Medium
## Levels: Low < Medium < High
級別命名
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
#使用levels()重新命名級別,,預設以字母排序
levels(factor_survey_vector) <- c('Female', 'Male')
factor_survey_vector
## [1] Male Female Female Male Male
## Levels: Female Male
#使用summary()產出摘要,注意summary(survey_vector)結果不同
summary(survey_vector)
## Length Class Mode
## 5 character character
summary(factor_survey_vector)
## Female Male
## 2 3
選取比較
#使用factor()建立順序類別
speed_vector <- c("fast", "slow", "slow", "fast", "insane")
factor_speed_vector <- factor(speed_vector, ordered = TRUE, levels = c("slow", "fast", "insane"))
#選出資料
da2 <- factor_speed_vector[2]
da5 <- factor_speed_vector[5]
#資料分析師 2 是否比資料分析師 5 快?
da2 > da5
## [1] FALSE
矩陣中所有的元素都是相同類型的,資料框中將觀測值儲存為列,將變數儲存為欄,通常包含了不同類型的資料。 使用data.frame()將多個等長度向量組成資料框。選取內容方式與矩陣相同,但[]內可直接打列欄名稱,或以$連接。
head(mtcars)#head()用來顯示dataframe的前六項
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
tail(mtcars)#tail()用來顯示dataframe的後六項
## mpg cyl disp hp drat wt qsec vs am gear carb
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.6 1 1 4 2
str(mtcars)#str()用來快速了解dataframe結構
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
mtcars$qsec#選取資料框內容以$連接欄列名稱
## [1] 16.46 17.02 18.61 19.44 17.02 20.22 15.84 20.00 22.90 18.30 18.90
## [12] 17.40 17.60 18.00 17.98 17.82 17.42 19.47 18.52 19.90 20.01 16.87
## [23] 17.30 15.41 17.05 18.90 16.70 16.90 14.50 15.50 14.60 18.60
subset(mtcars, subset = qsec > 20)#使用subset()設定篩選條件
## mpg cyl disp hp drat wt qsec vs am gear carb
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
清單可以蒐集多樣性的物件,包含矩陣、向量、資料框甚至清單,這些物件甚至不需要跟彼此相關。 你可以將清單視為一種超級資料類型,基本上你可以將任何資訊都儲存在清單中!
#使用list()組合各種資料,同時分別命名
my_vector <- 1:10
my_matrix <- matrix(1:9, ncol = 3)
my_df <- mtcars[1:3,]
my_list <- list(vec = my_vector, mat = my_matrix, df = my_df)
my_list
## $vec
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $mat
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
##
## $df
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
清單選擇
#運用[[]]選出清單中的特定資料,再使用[]選出該指定資料的內容
my_list[['df']][2,]
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4
Logical Operators
T > F#TRUE == 1,FALSE == 0
## [1] TRUE
c(T, T, F) & c(T, F, F)#and
## [1] TRUE FALSE FALSE
c(T, T, F) | c(T, F, F)#or
## [1] TRUE TRUE FALSE
!c(T, T, F)#not
## [1] FALSE FALSE TRUE
c(T, T, F) && c(T, F, F)#注意&和&&結果不同,&&只回傳第一組比較值
## [1] TRUE
c(T, T, F) || c(T, F, F)#同上
## [1] TRUE
Conditional Statements
注意if對齊,以及if位置層級關係。
number = 4
if (number < 10) {
if (number < 5) {
result <- "extra small"
} else {
result <- "small"
}
} else if (number < 100) {
result <- "medium"
} else {
result <- "large"
}
print(result)
## [1] "extra small"
Indefinite Loops
While,無窮迴圈,記得加入跳出迴圈條件,或是使用break結束。
i <- 1
while (i <= 10) {
print(3*i)
if (3*i%%8 == 0) {
break
}
i <- i + 1
}
## [1] 3
## [1] 6
## [1] 9
## [1] 12
## [1] 15
## [1] 18
## [1] 21
## [1] 24
Definite Loops
For,兩種寫法。
nyc <- list(pop = 8405837,
boroughs = c("Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island"),
capital = FALSE)
# Loop version 1
for (p in nyc) {
print(p)
}
## [1] 8405837
## [1] "Manhattan" "Bronx" "Brooklyn" "Queens"
## [5] "Staten Island"
## [1] FALSE
# Loop version 2
for (i in 1:length(nyc)) {
print(nyc[[i]])
}
## [1] 8405837
## [1] "Manhattan" "Bronx" "Brooklyn" "Queens"
## [5] "Staten Island"
## [1] FALSE
For loops for Matrix
使用print()、paste()重組句子。
ttt <- matrix(c('O',NA,'X',NA,'O','O','X',NA,'X'), nrow = 3, byrow = T)#建構矩陣
ttt
## [,1] [,2] [,3]
## [1,] "O" NA "X"
## [2,] NA "O" "O"
## [3,] "X" NA "X"
for (i in 1) {#只抓出第一列
for (j in 1:ncol(ttt)) {
print(paste("On row", i ,"and column", j ,"the board contains" ,ttt[i,j]))
}
}
## [1] "On row 1 and column 1 the board contains O"
## [1] "On row 1 and column 2 the board contains NA"
## [1] "On row 1 and column 3 the board contains X"
抓句中字母
rquote <- "rSTUDIO needs lots of practice!"
chars <- strsplit(rquote, split = "")[[1]]#strspilt()將句子裁切,""中放裁切條件
rcount <- 0
for (char in chars) {
if(char == 'r') {
rcount = rcount + 1
}
if(char == 'u') {
break
}
}
rcount
## [1] 2
args(mean)
## NULL
linkedin <- c(16, 9, 13, 5, NA, 17, 14)
facebook <- c(17, NA, 5, 16, 8, 13, 14)
mean(abs(linkedin - facebook),na.rm = T)#na.rm決定是否排除NA
## [1] 4.8
Writing Functions
two_dice <- function() {
possibilities <- 1:6
dice1 <- sample(possibilities, size = 1)#sample()隨機選數字
dice2 <- sample(possibilities, size = 1)
dice1 + dice2
}
two_dice()#每次的結果不同
## [1] 3
R Packages
if(!require(ggplot2))install.packages("ggplot2",repos = "http://cran.us.r-project.org")
## Loading required package: ggplot2
library(ggplot2)
search()
## [1] ".GlobalEnv" "package:ggplot2" "package:stats"
## [4] "package:graphics" "package:grDevices" "package:utils"
## [7] "package:datasets" "package:methods" "Autoloads"
## [10] "package:base"
Lapply
nyc
## $pop
## [1] 8405837
##
## $boroughs
## [1] "Manhattan" "Bronx" "Brooklyn" "Queens"
## [5] "Staten Island"
##
## $capital
## [1] FALSE
lapply(nyc, class)
## $pop
## [1] "numeric"
##
## $boroughs
## [1] "character"
##
## $capital
## [1] "logical"
unlist(lapply(nyc, class))
## pop boroughs capital
## "numeric" "character" "logical"
lapply(list(1,2,3), function(x, factor) {#Using anonymous functions
x * factor
}
, factor = 3)
## [[1]]
## [1] 3
##
## [[2]]
## [1] 6
##
## [[3]]
## [1] 9
Sapply
lapply(list(runif (10), runif (10)),
function(x) c(min = min(x), mean = mean(x), max = max(x)))
## [[1]]
## min mean max
## 0.0422843 0.4691537 0.9274780
##
## [[2]]
## min mean max
## 0.03935985 0.58862369 0.98087867
sapply(list(runif (10), runif (10)),
function(x) c(min = min(x), mean = mean(x), max = max(x)), USE.NAMES = T)
## [,1] [,2]
## min 0.2321807 0.0414638
## mean 0.5003465 0.4242402
## max 0.7638852 0.8803119
Vapply
#numeric(3) is the format here.
#numeric refers to classes of variables.
#'3' refers to length.
vapply(list(runif (10), runif (10)),
function(x) c(min = min(x), mean = mean(x), max = max(x)), numeric(3))
## [,1] [,2]
## min 0.01576775 0.0009929538
## mean 0.66297311 0.4849325555
## max 0.99883821 0.9324300122
Useful Functions
Mathematical utilities
Data Utilities
seq1 = seq(1, 7, by = 2)
rep1 = rep(seq1, times = 2)
sort1 = sort(rep1, decreasing = T)
rev1 = rev(sort1)
append1 = append(sort1, rev1)
append1
## [1] 7 7 5 5 3 3 1 1 1 1 3 3 5 5 7 7
Regular Expressions
emails <- c("john.doe@ivyleague.edu", "education@world.gov", "dalai.lama@peace.org",
"invalid.edu", "quant@bigdatacollege.edu", "cookie.monster@sesame.tv", "kiara@@fakemail.edu")
#"@.*\\.edu$"代表"@"開頭,".edu"結尾,其中".*"表任何字串,"\\."表真的"."
hits =grep(emails, pattern = '@.*\\.edu$')
emails[hits]
## [1] "john.doe@ivyleague.edu" "quant@bigdatacollege.edu"
## [3] "kiara@@fakemail.edu"
#sub(pattern, replacement, object)
sub("@.*\\.edu$", "@datacamp.edu", emails)
## [1] "john.doe@datacamp.edu" "education@world.gov"
## [3] "dalai.lama@peace.org" "invalid.edu"
## [5] "quant@datacamp.edu" "cookie.monster@sesame.tv"
## [7] "kiara@datacamp.edu"
awards <- c("Won 1 Oscar.",
"Won 1 Oscar. Another 9 wins & 24 nominations.",
"1 win and 2 nominations.",
"2 wins & 3 nominations.",
"Nominated for 2 Golden Globes. 1 more win & 2 nominations.",
"4 wins & 1 nomination.")
sub(".*\\s([0-9]+)\\snomination.*$", "\\1", awards)
## [1] "Won 1 Oscar." "24" "2" "3"
## [5] "2" "1"
Times and Dates
str1 <- "2012-03-15"
class(str1)
## [1] "character"
date1 <- as.Date(str1, format = '%Y-%m-%d')
class(date1)
## [1] "Date"
str2 <- "2012-3-12 14:23:08"
class(str2)
## [1] "character"
time2 <- as.POSIXct(str2, format = '%Y-%m-%d %H:%M:%S')
class(time2)
## [1] "POSIXct" "POSIXt"
format(time2, '%I:%M%p')
## [1] "02:23下午"
#Calculations with Dates & Times
as.Date("2015-03-12") - as.Date("2015-02-27")
## Time difference of 13 days
birth <- as.POSIXct("1879-03-14 14:37:23")
death <- as.POSIXct("1955-04-18 03:47:12")
einstein <- death - birth
einstein
## Time difference of 27792.51 days
Load Packages
#install.packages("gapminder")
if(!require(gapminder))install.packages("gapminder",repos = "http://cran.us.r-project.org")
## Loading required package: gapminder
library(gapminder)
if(!require(dplyr))install.packages("dplyr",repos = "http://cran.us.r-project.org")
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dplyr)
Filter()
# Filter for China in 2002
gapminder %>%
filter(country == 'China', year == 2002)
## # A tibble: 1 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 China Asia 2002 72.0 1280400000 3119.
Arrange()
gapminder %>%
filter(year == 1957) %>%
arrange(desc(pop))
## # A tibble: 142 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 China Asia 1957 50.5 637408000 576.
## 2 India Asia 1957 40.2 409000000 590.
## 3 United States Americas 1957 69.5 171984000 14847.
## 4 Japan Asia 1957 65.5 91563009 4318.
## 5 Indonesia Asia 1957 39.9 90124000 859.
## 6 Germany Europe 1957 69.1 71019069 10188.
## 7 Brazil Americas 1957 53.3 65551171 2487.
## 8 United Kingdom Europe 1957 70.4 51430000 11283.
## 9 Bangladesh Asia 1957 39.3 51365468 662.
## 10 Italy Europe 1957 67.8 49182000 6249.
## # ... with 132 more rows
Mutate()
gapminder %>%
filter(year == 2007) %>%
mutate(GDP = pop * gdpPercap) %>%
arrange(desc(GDP))
## # A tibble: 142 x 7
## country continent year lifeExp pop gdpPercap GDP
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 United States Americas 2007 78.2 301139947 42952. 1.29e13
## 2 China Asia 2007 73.0 1318683096 4959. 6.54e12
## 3 Japan Asia 2007 82.6 127467972 31656. 4.04e12
## 4 India Asia 2007 64.7 1110396331 2452. 2.72e12
## 5 Germany Europe 2007 79.4 82400996 32170. 2.65e12
## 6 United Kingdom Europe 2007 79.4 60776238 33203. 2.02e12
## 7 France Europe 2007 80.7 61083916 30470. 1.86e12
## 8 Brazil Americas 2007 72.4 190010647 9066. 1.72e12
## 9 Italy Europe 2007 80.5 58147733 28570. 1.66e12
## 10 Mexico Americas 2007 76.2 108700891 11978. 1.30e12
## # ... with 132 more rows
GGPLOT2 for Visualization
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
#Create a scatter plot with pop on the x-axis and lifeExp on the y-axis
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
geom_point()
Log Scales
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
geom_point() +
scale_x_log10()
#Compare these two graphs, Notice the points are more spread out on the x-axis in this graph.
#It's now easier to see that there isn't a correlation between population and life expectancy.
Additional Aesthetics
ggplot(gapminder_1952, aes(x = pop, y = lifeExp, color = continent, size = gdpPercap)) +
geom_point() +
scale_x_log10()
Faceting
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
geom_point() +
scale_x_log10() +
facet_wrap(~ continent)
Summarzie()
# Filter for 1957 then summarize the median life expectancy and the maximum GDP per capita
gapminder %>%
filter(year == 1957) %>%
summarize(medianLifeExp = median(lifeExp), maxGdpPercap = max(gdpPercap))
## # A tibble: 1 x 2
## medianLifeExp maxGdpPercap
## <dbl> <dbl>
## 1 48.4 113523.
Group_by()
gapminder %>%
group_by(year, continent) %>%
summarize(medianLifeExp = median(lifeExp), maxGdpPercap = max(gdpPercap))
## # A tibble: 60 x 4
## # Groups: year [?]
## year continent medianLifeExp maxGdpPercap
## <int> <fct> <dbl> <dbl>
## 1 1952 Africa 38.8 4725.
## 2 1952 Americas 54.7 13990.
## 3 1952 Asia 44.9 108382.
## 4 1952 Europe 65.9 14734.
## 5 1952 Oceania 69.3 10557.
## 6 1957 Africa 40.6 5487.
## 7 1957 Americas 56.1 14847.
## 8 1957 Asia 48.3 113523.
## 9 1957 Europe 67.6 17909.
## 10 1957 Oceania 70.3 12247.
## # ... with 50 more rows
Visualizing Summarized Data
#Summarize medianGdpPercap within each continent within each year: by_year_continent
by_year_continent <- gapminder %>%
group_by(year, continent) %>%
summarize(medianGdpPercap = median(gdpPercap))
by_year_continent
## # A tibble: 60 x 3
## # Groups: year [?]
## year continent medianGdpPercap
## <int> <fct> <dbl>
## 1 1952 Africa 987.
## 2 1952 Americas 3048.
## 3 1952 Asia 1207.
## 4 1952 Europe 5142.
## 5 1952 Oceania 10298.
## 6 1957 Africa 1024.
## 7 1957 Americas 3781.
## 8 1957 Asia 1548.
## 9 1957 Europe 6067.
## 10 1957 Oceania 11599.
## # ... with 50 more rows
#Plot the change in medianGdpPercap in each continent over time
ggplot(by_year_continent, aes(x= year, y = medianGdpPercap,color = continent)) +
geom_point() +
expand_limits(y = 0)
Line Plot
#Continue with the plot above.
#Create a line plot showing the change in medianGdpPercap by continent over time
ggplot(by_year_continent, aes(x = year, y = medianGdpPercap, color = continent)) +
geom_line() +
expand_limits(y = 0)
Bar Plot
#Summarize the median gdpPercap by year and continent in 1952
by_continent = gapminder %>%
filter(year == 1952) %>%
group_by(continent) %>%
summarize(medianGdpPercap = median(gdpPercap))
by_continent
## # A tibble: 5 x 2
## continent medianGdpPercap
## <fct> <dbl>
## 1 Africa 987.
## 2 Americas 3048.
## 3 Asia 1207.
## 4 Europe 5142.
## 5 Oceania 10298.
#Create a bar plot showing medianGdp by continent
ggplot(by_continent, aes(x = continent, y = medianGdpPercap)) +
geom_col()
Histogram
gapminder_1952 <- gapminder %>%
filter(year == 1952)
#Create a histogram of population (pop), with x on a log scale
ggplot(gapminder_1952, aes(x = pop)) +
geom_histogram() +
scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Boxplot
#ggtitle() for adding title for the plot.
ggplot(gapminder_1952, aes(x = continent, y = gdpPercap)) +
geom_boxplot() +
scale_y_log10() +
ggtitle("Comparing GDP per capita across continents")