-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.r
More file actions
71 lines (54 loc) · 1.71 KB
/
analysis.r
File metadata and controls
71 lines (54 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# install.packages("RSQLite")
library(DBI)
library(RSQLite)
library(ggplot2)
library(dplyr)
# CONNECTING TO THE DATABASE
con_r <- dbConnect(RSQLite::SQLite(), dbname = "database.db")
# READING AND DISPLAYING DATA
df <- dbReadTable(con_r, "users") # df stands for data frame
print("--- User Data ---")
print(head(df))
# BASIC STATISTICAL ANALYSIS
grades_mean <- mean(df$grade)
cat("\nMean Grade of Users: ", grades_mean, "\n")
approved_users <- subset(df, grade >= 70)
cat("Number of Approved Users: ", nrow(approved_users), "\n")
# SIMPLE GRAPHICAL REPRESENTATION
hist(df$grade,
main = "Grade Distribution",
xlab = "Grades",
col = "lightblue",
border = "white")
# RUNNING SQL QUERIES
above_80 <- dbGetQuery(con_r, "SELECT name,
grade FROM users WHERE grade > 80 ORDER BY grade DESC")
print(above_80)
# CREATING A BOXPLOT
boxplot(df$grade,
main = "Grades Summary",
col = "orange",
horizontal = TRUE)
abline(v = mean(df$grade), col = "red", lwd = 2) # Adding a red line for mean
# CREATING MULTIPLE COLUMNS WITH DPLYR
df <- df %>%
mutate(
status = ifelse(grade >= 70, "Approved", "Not Approved"),
category = case_when(
grade >= 90 ~ "Excellent",
grade >= 70 ~ "Good",
grade >= 50 ~ "Average",
TRUE ~ "Needs Improvement"
)
)
# HISTOGRAM WITH GGLPOT2
p1 <- ggplot(df, aes(x=grade)) +
geom_histogram(binwidth=5, fill = "#4e79a7", color = "white") +
theme_minimal() +
geom_vline(xintercept = mean(df$grade), color = "red", size = 1)
print(p1)
# SAVING AND EXPORTING RESULTS AS CSV
write.csv(df, "user_data.csv", row.names = FALSE)
print("File 'user_data.csv' has been succesfully created.")
# DISCONNECTING FROM THE DATABASE
dbDisconnect(con_r)