-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy patheda.R
More file actions
87 lines (54 loc) · 1.71 KB
/
eda.R
File metadata and controls
87 lines (54 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
## This is just a scratchpad file so ignore its contents. The application code is located in app.R
rm(list=ls())
library(tidyverse)
library(lubridate)
library(purrr)
data <- read_csv('data/statsports.csv')
glimpse(data)
vars <- c(
"Accelerations Z3 to Z6",
"Deceleration Z3 to Z6",
"Distance Per Min",
"Distance Total",
#"Dynamic Stress Load",
#"Sprints",
"Player Display Name",
"Type")
data <- data %>%
select(vars) %>%
filter(Type == 'Training') %>%
select(-c("Type"))
data_by_athlete <- data %>%
group_by(`Player Display Name`) %>%
summarise_all(mean) %>%
mutate_if(is.numeric, scale)
glimpse(data_by_athlete)
model <- kmeans(data_by_athlete %>% select(-c(`Player Display Name`)), centers = 3)
library(tidyr)
centers_df <- as.data.frame(model$centers) %>% rownames_to_column("cluster")
gather(centers_df, k = metric, value = value, -cluster) %>%
ggplot(aes(metric, cluster, fill = value)) +
geom_tile() +
scale_fill_gradient(high = "green", low = "red")
data <- data_by_athlete %>% select(-c(`Player Display Name`))
pcs <- prcomp(data)
summary(pcs)
attributes(pcs)
pcs$x
model <- kmeans(data, centers = 3)
dd2 <- cbind(data_by_athlete, pcs$x, model$cluster)
ggplot(dd2, aes(PC1, PC2, label = `Player Display Name`, color = factor(model$cluster))) +
geom_point() +
geom_text()
scaled_data_for_clustering <- data_by_athlete %>%
select(-c(`Player Display Name`)) %>%
mutate_all(scale)
tot_withinss <- map_dbl(1:10, function(k) {
print(k)
model <- kmeans(scaled_data_for_clustering, centers = k)
model$tot.withinss
})
data.frame(
k = 1:10,
tot_withinss = tot_withinss
) %>% ggplot(aes(k, tot_withinss)) + geom_line() + scale_x_continuous(breaks = 1:10)