Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
docs
docs/
inst/doc
.DS_Store
74 changes: 37 additions & 37 deletions R/describe_curation.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,44 +18,44 @@
#' full.dat <- bugsigdbr::importBugSigDB()
#' createTaxonTable(full.dat, n=20)

createTaxonTable <- function(dat, n=10){
createTaxonTable <- function(dat, n = 10, format = "Simple") {
dmap <- c("kingdom", "phylum", "class", "order", "family", "genus", "species")
names(dmap) <- substring(dmap, 1, 1)

output <-
data.frame(getMostFrequentTaxa(dat, sig.type = "both", n = n),
stringsAsFactors = FALSE) %>%
mutate(metaphlan_name = Var1) %>%
separate(
col = Var1,
sep = "\\|",
into = dmap,
fill = "right"
) %>%
separate(col = Var1, sep = "\\|", into = dmap, fill = "right") %>%
mutate(across(kingdom:species, ~ str_replace(., ".__", ""))) %>%
rename(n_signatures = Freq)

output <-
output %>% mutate(n_signatures = sapply(output$metaphlan_name, function(x) {
sum(grepl(
pattern = x,
x = dat$`MetaPhlAn taxon names`,
fixed = TRUE
))
})) %>%
mutate(`Total Signatures` = sapply(metaphlan_name, function(x)
output %>%
mutate(total_signatures = sapply(metaphlan_name, function(x)
.countTaxon(dat = dat, x = x, direction = "both"))) %>%
mutate(`Increased Signatures` = sapply(metaphlan_name, function(x)
mutate(increased_signatures = sapply(metaphlan_name, function(x)
.countTaxon(dat = dat, x = x, direction = "increased"))) %>%
mutate(`Decreased Signatures` = sapply(metaphlan_name, function(x)
mutate(decreased_signatures = sapply(metaphlan_name, function(x)
.countTaxon(dat = dat, x = x, direction = "decreased"))) %>%
mutate(Taxon = gsub(".+\\|", "", output$metaphlan_name))

output %>% separate(col="Taxon", into=c("Taxonomic Level", "Taxon Name"), sep="__") %>%
mutate(Taxon = gsub(".+\\|", "", metaphlan_name)) %>%
select(-n_signatures)

output <- output %>%
separate(col = "Taxon", into = c("Taxonomic Level", "Taxon Name"), sep = "__") %>%
mutate(`Taxonomic Level` = unname(dmap[`Taxonomic Level`])) %>%
rowwise() %>%
mutate( `Binomial Test pval` = .createBinomTestSummary(`Increased Signatures`, `Total Signatures`, wordy = FALSE)) %>%
ungroup() %>%
relocate(`Taxon Name`, `Taxonomic Level`, `Total Signatures`, `Increased Signatures`, `Decreased Signatures`, `Binomial Test pval`)
rowwise() %>%
mutate(`Binomial Test pval` = .createBinomTestSummary(increased_signatures, total_signatures, wordy = FALSE)) %>%
ungroup()

simple_cols <- c("Taxon Name", "Taxonomic Level", "total_signatures",
"increased_signatures", "decreased_signatures", "Binomial Test pval")

if (format == "Simple") {
output %>% select(all_of(simple_cols))
} else {
output %>% relocate(all_of(simple_cols))
}
}

.countTaxon = function(dat, x, direction = c("both", "increased", "decreased")){
Expand Down Expand Up @@ -115,24 +115,24 @@ createStudyTable <- function(bsdb.df, includeAlso = NULL) {
# Core of the change is in how study IDs are generated, see function in
# simple.R. NB: the function also fixes DOI links as side effect, now.

bsdb_with_StudyIDs.df <- .make_unique_study_ID(bsdb.df)
bsdb_with_StudyCodes.df <- .make_unique_study_ID(bsdb.df)

# some dplyr-fu to summarize tables, with more recent syntax
study_table_fixed <- bsdb_with_StudyIDs.df %>%
group_by(`Study Identifier`) %>%
study_table_fixed <- bsdb_with_StudyCodes.df %>%
group_by(`Study code`) %>%
reframe(
Cases = max(`Group 1 sample size`),
Controls = max(`Group 0 sample size`),
MaxCases = max(`Group 1 sample size`),
MaxControls = max(`Group 0 sample size`),
across(
all_of(
c("Study design", "Condition", "PMID", "DOI", "URL", includeAlso)
),
.fns = function(x)
paste(unique(x), collapse = "; ")
),
`Number of signatures` = n()
N_signatures = n()
) %>%
relocate(`Number of signatures`, .after = Condition)
relocate(N_signatures, .after = Condition)

return(study_table_fixed)
}
Expand All @@ -147,10 +147,10 @@ globalVariables(
"Study.Design",
"Taxon Name",
"Binomial Test pval",
"Total Signatures",
"total_signatures",
"Abundance in Group 1",
"Decreased Signatures",
"Increased Signatures",
"decreased_signatures",
"increased_signatures",
"Taxonomic Level",
"metaphlan_name",
"Freq",
Expand All @@ -164,10 +164,10 @@ globalVariables(
"PMID",
"URL",
"uniqueRank",
"Study Identifier",
"Study code",
"Group 0 sample size",
"Group 1 sample size",
"Number of signatures"
"N_signatures"
)
)

16 changes: 12 additions & 4 deletions vignettes/capstoneanalysis_BearklandM.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,18 @@ lapply(unique(clusts), function(i) names(clusts)[clusts == i])

```{r}
#all taxon increased and decreased freq
allfreqs <- bugSigSimple::createTaxonTable(subset.final, n = 500) %>% #could change number
arrange(I(decreased_signatures - increased_signatures))
incfreqs <- filter(allfreqs, I(increased_signatures - decreased_signatures) > 0)
decfreqs <- filter(allfreqs, I(increased_signatures - decreased_signatures) < 0)
#allfreqs <- bugSigSimple::createTaxonTable(subset.final, n = 500, format = "Full") %>% #could change number
# mutate(delta = `Increased Signatures` - `Decreased Signatures`) %>%
# arrange(delta)

allfreqs <- bugSigSimple::createTaxonTable(subset.final, n = 500, format = "Simple") %>%
mutate(delta = increased_signatures - decreased_signatures) %>%
arrange(delta)
#arrange(I(decreased_signatures - increased_signatures))
#incfreqs <- filter(allfreqs, I(increased_signatures - decreased_signatures) > 0)
#decfreqs <- filter(allfreqs, I(increased_signatures - decreased_signatures) < 0)
incfreqs <- filter(allfreqs, delta > 0)
decfreqs <- filter(allfreqs, delta < 0)
kableExtra::kbl(allfreqs) %>%
kable_paper("hover", full_width = FALSE)
```
Expand Down
18 changes: 12 additions & 6 deletions vignettes/capstoneanalysis_fatima.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,9 @@ table(my.dat.cond[,"Condition"])
## Studies table

```{r}

createStudyTable(my.dat.cond) %>%
kbl() %>%
kable_styling()

```

## Taxa table
Expand Down Expand Up @@ -110,8 +108,12 @@ createStudyTable(my.dat.bd) %>%
## Taxa table

```{r}
createTaxonTable(my.dat.bd, n = 20) %>%
kbl() %>%
#createTaxonTable(my.dat.bd, n = 20) %>%
# kbl() %>%
# kable_styling()

createTaxonTable(my.dat.bd, n = 20) %>%
kbl() %>%
kable_styling()
```

Expand All @@ -128,8 +130,12 @@ table(my.dat.mdd[,"Condition"])

```{r}

createStudyTable(my.dat.mdd) %>%
kbl() %>%
#createStudyTable(my.dat.mdd) %>%
# kbl() %>%
# kable_styling()

createStudyTable(my.dat.mdd) %>%
kbl() %>%
kable_styling()

```
Expand Down
1 change: 1 addition & 0 deletions vignettes/fieldworkanalysis_samara.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ summary(lm(Streptococcus ~ Lactobacillus, data = df))
wilcox.test(df$Streptococcus, df$Lactobacillus)
t.test(df$Streptococcus, df$Lactobacillus)
```

```{r}
secount <-
curatedMetagenomicData::returnSamples(all_healthy_adult_vagina_metadata, dataType = "relative_abundance", counts = TRUE)
Expand Down