bibliographic_analysis/r code.R at master · LinhLTP/bibliographic_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# Load required libraries
pacman::p_load(
  rio,            # Import/export data
  here,           # File paths management
  tidyverse,      # Data manipulation and visualisation
  ggplot2,        # Data visualisation
  data.table,     # Data manipulation
  dplyr,          # Data wrangling
  bibliometrix,   # Bibliometric analysis
  readxl,         # Read Excel files
  tidyr           # Data tidying
)

# Combine multiple .bib files into a single file
d <- list.files("path", pattern="\\.bib$", full.names=T)  # Set path for bibliographic files
Unlist_files <- d %>%
  lapply(readLines) %>%    # Read each .bib file
  unlist()                 # Combine all content into a single vector
write(Unlist_files, file = "path/Bib.bib") # Write combined .bib content to a new file

# Convert the combined .bib file into a data frame
file <- "path/Bib.bib"
M <- convert2df(file = file, dbsource = "scopus", format = "bibtex")
openxlsx::write.xlsx(M, "M.xlsx", rowNames=TRUE)

# Bibliometric Analysis
results <- biblioAnalysis(M, sep = ";")
S <- summary(object = results, k = 10, pause = FALSE)
openxlsx::write.xlsx(S, "S.xlsx", rowNames=TRUE)

## Extract and clean Author's Keywords (DE)
KW <- M %>%
  mutate(DE1 = strsplit(as.character(DE), ";")) %>%    # Split keywords by ";"
  unnest(DE1)                                          # Unnest the list into rows

KW$DE1 <- KW$DE1 %>%
  str_replace_all(KW$DE1, "[^a-zA-Z0-9]", " ") %>%    # Remove non-alphanumeric characters
  trimws("both")                                      # Trim leading and trailing spaces

openxlsx::write.xlsx(KW, "Keywords.xlsx", rowNames=TRUE)

## Extract and clean Author (AU)
AU <- M %>%
  mutate(Author = strsplit(as.character(AU), ";")) %>%
  unnest(Author)

AU$Author <- AU$Author %>%
  str_replace_all(AU$Author, "[^a-zA-Z0-9]", " ") %>%
  trimws("both")

openxlsx::write.xlsx(AU, "AU author.xlsx", rowNames=TRUE)

## Extract and clean Authors’ Affiliations
C1 <- M %>%
  mutate(Author_Affiliation = strsplit(as.character(C1), ";")) %>%
  unnest(Author_Affiliation)

C1$Author_Affiliation <- trimws(C1$Author_Affiliation, "both")

C1 <- splitstackshape::cSplit(C1, "Author_Affiliation", ",")
C1 <- C1 %>%
  mutate(across(Author_Affiliation_1:Author_Affiliation_7, ~ str_replace(., "THE UNIVERSITY OF GLASGOW", "UNIVERSITY OF GLASGOW")),
         across(Author_Affiliation_1:Author_Affiliation_7, as.character))

## split to row
C1 <- M %>%
  mutate(Author_Affiliation = strsplit(as.character(C1), ";")) %>%
  unnest(Author_Affiliation)

Affiliation <-C1  %>%
  mutate(Author_Affiliation = strsplit(as.character(Author_Affiliation), ",")) %>%
  unnest(Author_Affiliation)

Affiliation$Author_Affiliation <- Affiliation$Author_Affiliation %>%
  str_replace_all("[^a-zA-Z0-9]", " ") %>%
  trimws("both")

Affiliation <- Affiliation %>%
  mutate(
    Author_Affiliation = case_when(
      str_detect(Author_Affiliation, "THE UNIVERSITY OF GLASGOW") ~ "UNIVERSITY OF GLASGOW",
      .default = as.character(Author_Affiliation)
    )
  )

openxlsx::write.xlsx(Affiliation, "Authors Affiliations.xlsx", rowNames=TRUE)

# Network analysis
## Country collaboration
M <- metaTagExtraction(M, Field = "AU_CO", sep = ";").  # collaboration network

NetMatrix <- biblioNetwork(M,
                           analysis = "collaboration",
                           network = "countries",
                           sep = ";")

net=networkPlot(NetMatrix,
                n = dim(NetMatrix)[1],
                Title = "Country Collaboration",
                type = "sphere",
                size=TRUE,
                remove.multiple=FALSE,
                labelsize=0.8)

net=networkPlot(NetMatrix,
                n = dim(NetMatrix)[1],
                Title = "Country Collaboration",
                type = "circle",
                size=TRUE,
                remove.multiple=FALSE,
                labelsize=0.8)

## Affiliation collaboration
AU_UN <- M %>%
  mutate(university = strsplit(as.character(AU_UN), ";")) %>%   # string split
  unnest(university)

AU_UN$university <- AU_UN$university %>%
  str_replace_all("[^a-zA-Z0-9]", " ") %>%
  trimws("both")

lookup <- AU_UN %>%                                             # word cleaning
  mutate(
    university1 = case_when(
      str_detect(university, "THE UNIVERSITY OF GLASGOW") ~ "UNIVERSITY OF GLASGOW",
      str_detect(university, "GLASGOW") ~ "UNIVERSITY OF GLASGOW",
      str_detect(university, "SOCIAL POLICY AND CRIMINOLOGY UNIVERSITY OF STIRLING") ~ "NIVERSITY OF STIRLING",
      .default = as.character(university)
    )
  )

lookup <- lookup[ , c("university", "university1")]             # lookup table

### clean data in original file
M$AU_UN <- str_replace_all(M$AU_UN, "[^a-zA-Z0-9\\;]", " ").    # remove non regex except ;

M$AU_UN <- stringi::stri_replace_all_regex(
  str = M$AU_UN,
  pattern = paste0("\\b", lookup$university, "\\b")             # add word boundaries
  replacement = lookup$university1,
  vectorize_all = FALSE,
  opts_regex = stringi::stri_opts_regex(case_insensitive = FALSE) # case_insensitive = FALSE <- ex: HEY  vs hey <-- capitalise / normal
)

NetMatrix <- biblioNetwork(M,                                   # university collaboration network
                           analysis = "collaboration",
                           network = "universities",
                           sep = ";")

net=networkPlot(NetMatrix,
                n = dim(NetMatrix)[1],
                Title = "University Collaboration",
                type = "circle",
                size=TRUE,
                remove.multiple=FALSE,
                labelsize=0.8)

## Author collaboration
NetMatrix <- biblioNetwork(M,
                           analysis = "collaboration",
                           network = "authors",
                           sep = ";")

net=networkPlot(NetMatrix,
                n = dim(NetMatrix)[1],
                Title = "Author Collaboration",
                type = "sphere",
                size=TRUE,
                remove.multiple=FALSE,
                labelsize=0.8)

# Keywords co-occurrences
NetMatrix <- biblioNetwork(M,
                           analysis = "co-occurrences",
                           network = "author_keywords",
                           sep = ";")

net=networkPlot(NetMatrix,
                normalize="association",
                weighted=T,
                n = 40,
                Title = "Author Keyword Co-occurrences",
                type = "sphere",
                size=T,edgesize = 6,labelsize=0.7, remove.multiple = T)

net_groups_kw <- as.data.frame.table(net$cluster_res)