-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdplyrGroupAndSort.r
More file actions
45 lines (35 loc) · 1.4 KB
/
dplyrGroupAndSort.r
File metadata and controls
45 lines (35 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# libraries and data
library(tidyverse)
library(dslabs)
data(murders)
# Demonstrate the use of groupby
# compute separate average and standard deviation for male/female heights
heights %>%
group_by(sex) %>%
summarize(average = mean(height), standard_deviation = sd(height))
# compute median murder rate in 4 regions of country
murders <- murders %>%
mutate(murder_rate = total/population * 100000)
murders %>%
group_by(region) %>%
summarize(median_rate = median(murder_rate))
# calculate and extract the murder rate with one pipe
us_murder_rate <- murders %>%
summarize(rate = sum(total) / sum(population * 100000) %>%
.$rate
#Demonstrate the sorting of data
# set up murders object
murders <- murders %>%
mutate(murder_rate = total/population * 100000)
# arrange by population column, smallest to largest
murders %>% arrange(population) %>% head()
# arrange by murder rate, smallest to largest
murders %>% arrange(murder_rate) %>% head()
# arrange by murder rate in descending order
murders %>% arrange(desc(murder_rate)) %>% head()
# arrange by region alphabetically, then by murder rate within each region
murders %>% arrange(region, murder_rate) %>% head()
# show the top 10 states with highest murder rate, not ordered by rate
murders %>% top_n(10, murder_rate)
# show the top 10 states with highest murder rate, ordered by rate
murders %>% arrange(desc(murder_rate)) %>% top_n(10)