Team1_MicroPowerPlus/app.R at master · USFOneHealthCodeathon2020/Team1_MicroPowerPlus · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
library(shiny)
library(plotly)
library(tidyverse)
library(shinythemes)

# increase max R-Shiny user-input file size from 5 to 30 MB
options(shiny.maxRequestSize = 30 * 1024 ^ 2)

# read in the static example-data
df_sim_data <- read_csv("human_gut_power_simulation_results.csv")

# the ui object has all the information for the user-interface
ui <- fluidPage(
  h1("MicroPower Plus"),
  theme = shinytheme("sandstone"),
  tabsetPanel(
    # make the main tab
    tabPanel(
      "Estimate effect size and power",
      fluid = TRUE,
      br(),
      sidebarLayout(
        sidebarPanel(
          # Input: select Human microbiome-site for example-data
          selectInput(
            "sampleType",
            "Choose a Sample Type:",
            c(
              "Human Gut" = "gut",
              "Oral" = "oral",
              "Lungs" = "lung",
              "Vagina" = "vagina"
            )
          ),

          # Input: choose a pre-computed sample-size (up to 20 for proof-of-concept; may add more later)
          selectInput(
            "sampleSize",
            "Choose a Sample Size:",
            c(
              # "Three" = "3",
              "Five" = "5",
              "Ten" = "10",
              "Fifteen" = "15",
              "Twenty" = "20"
            )
          ),

          # Input: select a distance-measure to use to calculate effect-size (?)--doesn't work yet, just a concept
          selectInput(
            "mdistance",
            "Choose a Distance Measure:",
            c("Weighted Jaccard" = "wjac",
              "Wegihted Unifrac" = "wfrac")
          ),

          # Input: Slider to select desired power-level
          sliderInput(
            "power",
            "Choose desired power:",
            min = 0,
            max = 1,
            value = 0.8
          ),
          h4("Reference Data Sources"),

          p(
            a(href = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3004851/", "1) Charlson et al. (2010)"),
            "Microbiota from the right and left nasopharynxand oropharynx of 29 smoking vs 33 non-smoking healthy adults were compared to determine the microbial configuration and effects of cigarette-smoking.",
            p(
              a(href = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3480531/", "2) Charlson et al. (2012)"),
              "Microbial populations found within the respiratory tract of transplant-patients were compared to non-transplanted control-subjects. Lung-transplant patients had a higher bacterial burden in the Broncho alveolarlavage, a more frequent showing of dominant organisms, an increased distance between communities (signifying more distinct populations), and a smaller respiratory-tract microbial diversity.",
              p(
                a(href = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3564958/", "3) HMP Consortium (2012b)"),
                "Microbiome samples from 18 body sites of 242 healthy, Western adults were compared to describe individual variation.",
                p(
                  a(href = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3368382/", "4) Wu et al. (2011)"),
                  "The effect size that was studied inthis experiment was fecal communities that grouped into enterotypes characterized by various levels of Bacteroidesand Prevotella.It was deduced that alternative enterotype states are associated with a long-term diet."
                )
              )
            )
          )
          # sliderInput(
          #   "eSize",
          #   "Choose Effect Size:",
          #   min = 0,
          #   max = 10,
          #   value = 5
          # )

        ),
        mainPanel(# plotlyOutput("plot1"),
          #         br(),
          plotlyOutput("plot2"))
      )
    ),
    tabPanel(
      "Use your own Data",
      fluid = TRUE,
      br(),

      # Input: option for user to upload their own OTU/ASV-table (only a concept--not functional yet)
      fileInput("user.otu", "Pilot-study OTU or ASV-table", placeholder = "Or select example data-set below"),
      h4("Sample Size"),
      fluidRow(column(3,      numericInput(
        "exp",
        p("Experimental"),
        value = 1
      )),
      column(3,      numericInput(
        "control",
        p("Control"),
        value = 1
      ))),
      fluidRow(column(
        3, textInput("email", h4("Email"),
                     value = "Enter email..")
      )),
      p("The results will be emailed to you..")

    ),
    tabPanel(
      "Calculate Effect Size",
      fluid = TRUE,
      br(),
      a(href = "https://academic.oup.com/bioinformatics/article/31/15/2461/188732#26918939", "Data-sources and example effect-size calculations"),

      # This "parameter glossary" isn't strictly useful on its own without Sherry's markdown tutorial
      h4("Parameter Glossary"),
      p(
        "“adonis” is	a	function	for	the	analysis	and	partitioning	sums	of	squares	using	semimetric
and	metric	distance	matrices."
      ),
      p(
        "Null	hypothesis :	There	is	no	different	between	these	two	or	more	comparable	groups."
      ),
      p(
        "R-square is	the	important	statistic	for	interpreting	Adonis	as	it	gives	you	the	effect	size.
(For	example:	an	R-squared	of	0.44	means	that	44%	of	the	variation	in	distances	is	explained
by	the	grouping	being	tested.	The	p-value	tells	you	whether	or	not	this	result	was	likely	a
result	of	chance.	A	p-value	of	0.05	means	that	there	is	a	5%	chance	that	you	detected	a
difference	between	groups.)",
        p(
          "Small	p-value	with	small	R-square :	this	situation	normally	because	of	large	sample	size.
Actualy	only	small	part	can	be	explained,	however	large	sample	size	make	the	p-value
small."
        ),
        p(
          "Omega-squared provides	a	less	biased	measure	of	effect	size	for	ANOVA-type
analyses	by	accounting	for	the	mean-squared	error	of	the	observed	samples."
        ),
        sidebarLayout(
          sidebarPanel(
            # Input: option for user to upload their own OTU/ASV-table (only a concept--not functional yet)
            fileInput("user.otu", "Load File", placeholder = "Or select example data-set below"),
            h4("Sample Size"),
            fluidRow(column(
              6,      textInput("expName",
                                p("Experimental Name"),
                                value = "Experimental Name")
            ),
            column(6
                   ,      textInput("controlName",
                                    p("Control Name"),
                                    value = "Control Name")
            )),
            h4("Reference Data Sources"),

            p(
              a(href = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3004851/", "1) Charlson et al. (2010)"),
              "Microbiota from the right and left nasopharynxand oropharynx of 29 smoking vs 33 non-smoking healthy adults were compared to determine the microbial configuration and effects of cigarette-smoking.",
              p(
                a(href = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3480531/", "2) Charlson et al. (2012)"),
                "Microbial populations found within the respiratory tract of transplant-patients were compared to non-transplanted control-subjects. Lung-transplant patients had a higher bacterial burden in the Broncho alveolarlavage, a more frequent showing of dominant organisms, an increased distance between communities (signifying more distinct populations), and a smaller respiratory-tract microbial diversity.",
                p(
                  a(href = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3564958/", "3) HMP Consortium (2012b)"),
                  "Microbiome samples from 18 body sites of 242 healthy, Western adults were compared to describe individual variation.",
                  p(
                    a(href = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3368382/", "4) Wu et al. (2011)"),
                    "The effect size that was studied inthis experiment was fecal communities that grouped into enterotypes characterized by various levels of Bacteroidesand Prevotella.It was deduced that alternative enterotype states are associated with a long-term diet."
                  )
                )
              )
            )
          ),
          mainPanel(
            plotlyOutput("plot3")
          )
        )
      )

    ),
    tabPanel(
      "Literature",
      fluid = TRUE,
      br(),
      h4("Importance of Microbiome"),
      p(
        "The micobiome is the genetic material found within microbes that reside in organisms.  In recent years, there has been an increase in researching the associations between the microbiome and an organism’s overall wellbeing.  There have been connections between health conditions such as asthma, allergies, autoimmune conditions and other various diseases with the microbiome (Debelius, et al., 2016)",
        h4("Importance of Statistical Power Calculations within Microbiome"),
        p(
          "Statistical Power Calculations aid in figuring out the sample size in order to determine the probability of acquiring significant results from a statistical test when an effect is present.  It is difficult to determine a correlational relationship between certain taxa and disease.  These differences can stem from differing definitions of what a clinical population signifies within different studies, how to handle sample preparation, and the overall bioinformatics and statistical tools (Debelius, et al., 2016). Determining effect size is of crucial importance to aid in determining the differences within community profiling.  Effect size is the quantitative portion of the differences between two or more groups.  For example, power and sample-size estimation along with PERMANOVA has been utilized in order to ensure that effect expected from the interference of interest is detectable (Kelly, et al., 2015)"
        ),
        h4("Limitations of Power Analyses"),
        p(
          "Power analyses do not generalize favorably, in which if one were to alter the methodology to collect the data or even alter the statistical steps to analyze the data then the power analysis will have to be conducted again. For example, a power analysis could suggest an amount of subjects that is inappropriate for the usual statistical procedure, which will make the gathered data less precise (Statistical Consulting Group, 2020).  Moreover, power analyses give the optimum and best case scenario estimates of the fundamental amount of subjects necessary to detect the effect (Statistical Consulting Group, 2020).  Most of the time these estimates are based on assumptions, and if the assumptions are incorrect then one would have less power. As power analyses are based off assumptions then in turn a range of numbers needed is produced not a precise number, thus reducing accuracy of the experiment.
	Despite the recent technological advancements in statistical testing some software packages do not bear in mind certain factors that affect the power. For example, some packages can recommend differing sample sizes rather than the optimal sample size for a procedure (Statistics Solutions, 2019).  This signifies that power analyses can create overall guidelines for the sample size, but it is unable to indicate the complexities that an experiment could possibly have.
	Even gathering the effect sizes of power calculations can cause unforeseen errors.  Researchers gather effect sizes in either an empirical approach or on the basis of goals approach (Gelman & Carlin, 2014). With the empirical method, one presumes that the effect size is equal to the estimate from a preceding study. The basis of goals approach allows for the researcher to infer the effect size that would be the lowest number that is substantively important.  These approaches can cause studies to be too minute thus leading to a misinterpretation of the findings. (Turner & Houle, 2018).  Some researchers and statistical authorities recommend against utilizing power functions as there is an inappropriate use of these power calculations.  (Gelman & Carlin, 2014).  They believe that effect size and power is usually overestimated and many times subsequent analysis after the completed experiment is used to analyze nonsignificant findings.
"
        )
      )
    ),
    tabPanel(
      "Effect Size Method Tutorial",
      fluid = TRUE,
      br(),
      includeMarkdown('PERMANOVA.Rmd')

    )
  )
)

# define fonts for plot
f1 <- list(family = "Arial, sans-serif",
           size = 24,
           color = "black")

f2 <- list(family = "Arial, sans-serif",
           size = 20,
           color = "black")

f3 <- list(family = "Arial, sans-serif",
           size = 16,
           color = "black")

# set plot margins
m <- list(
  l = 20,
  r = 20,
  b = 10,
  t = 100,
  pad = 4
)


server <- function(input, output, session) {
  # output$plot1 <- renderPlotly(
  #   plot3 <- plot_ly(
  #     x = c(matrix(rexp(
  #       input$power, rate = .1
  #     ))),
  #     y = c(matrix(rexp(
  #       input$power, rate = .1
  #     ))),
  #     name = "Power (%Probability you can detect diff)",
  #     type = "bar"
  #   ) %>% layout(
  #     title = "Power (%Probability you can detect diff)",
  #     yaxis = list(title = "power"),
  #     xaxis = list(title = "microbes")
  #   )
  # )
  effect_size <- reactive({
    get_effect_size_from_sample_size_and_power(df_sim_data,
                                               input$sampleSize,
                                               input$power,
                                               input$sampleType,
                                               input$mdistance)
  })

  output$plot2 <- renderPlotly(
    plot3 <- plot_ly(
      y = c(effect_size(), 0.099, 0.019, 0.023, 0.024, 0, 0.230),
      x = c(
        "Estimated Effect Size",
        "Oral: Azithromycin vs No Azithromycin<sup>2</sup>",
        "Lung: Azithromycin vs No Azithromycin<sup>2</sup>",
        "Nares: Smoker vs NonSmoker<sup>1</sup>",
        "Oral: Smoker vs NonSmoker<sup>1</sup>",
        "Gut: Before vs After Feeding<sup>3</sup>",
        "Human Anterior Nares vs Stool<sup>4</sup>"
      ),
      text = c(
        round(effect_size(), digits = 3),
        '0.099',
        '0.019',
        '0.023',
        '0.024',
        '0',
        '0.230'
      ),
      textposition = 'auto',
      name = "Effect Size (How big would difference have to be?)",
      type = "bar",
      height = 700,
      marker = list(
        color = c(
          'rgba(222,45,38,0.8)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)'
        )
      )
      # color = c(" blue, blue, blue, blue, blue, blue"),
    ) %>% layout(
      title = paste(
        "Effect Size<br>(How big would the difference\nbetween groups have to be to be detected?)"
      ),
      margin = m,
      titlefont = f1,
      yaxis = list(
        title = "Effect size",
        titlefont = f1,
        showticklabels = TRUE,
        tickfont = f3,
        range = c(0, 0.3)
      ),
      xaxis = list(
        title = "Microbiome data-set",
        titlefont = f2,
        showticklabels = TRUE,
        tickangle = 45,
        tickfont = f3,
        categoryarray = c(
          "Estimated Effect Size",
          "Oral: Azithromycin vs No Azithromycin<sup>2</sup>",
          "Lung: Azithromycin vs No Azithromycin<sup>2</sup>",
          "Nares: Smoker vs NonSmoker<sup>1</sup>",
          "Oral: Smoker vs NonSmoker<sup>1</sup>",
          "Gut: Before vs After Feeding<sup>3</sup>",
          "Human Anterior Nares vs Stool<sup>4</sup>"
        ),
        categoryorder = "array"
      )
    )
  )

  output$plot3 <- renderPlotly(
    plot4 <- plot_ly(
      y = c(0.03, 0.099, 0.019, 0.023, 0.024, 0, 0.230),
      x = c(
        "Estimated Effect Size",
        "Oral: Azithromycin vs No Azithromycin<sup>2</sup>",
        "Lung: Azithromycin vs No Azithromycin<sup>2</sup>",
        "Nares: Smoker vs NonSmoker<sup>1</sup>",
        "Oral: Smoker vs NonSmoker<sup>1</sup>",
        "Gut: Before vs After Feeding<sup>3</sup>",
        "Human Anterior Nares vs Stool<sup>4</sup>"
      ),
      text = c(
        '0.03',
        '0.099',
        '0.019',
        '0.023',
        '0.024',
        '0',
        '0.230'
      ),
      textposition = 'auto',
      name = "Calculated Effect Size compared to effect sizes reported in the literature",
      type = "bar",
      height = 700,
      marker = list(
        color = c(
          'rgba(222,45,38,0.8)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)',
          'rgba(204,204,204,1)'
        )
      )
      # color = c(" blue, blue, blue, blue, blue, blue"),
    ) %>% layout(
      title = paste(
        "Calculated Effect Size compared to \neffect sizes reported in the literature"
      ),
      margin = m,
      titlefont = f1,
      yaxis = list(
        title = "Effect size",
        titlefont = f1,
        showticklabels = TRUE,
        tickfont = f3,
        range = c(0, 0.3)
      ),
      xaxis = list(
        title = "Microbiome data-set",
        titlefont = f2,
        showticklabels = TRUE,
        tickangle = 45,
        tickfont = f3,
        categoryarray = c(
          "Estimated Effect Size",
          "Oral: Azithromycin vs No Azithromycin<sup>2</sup>",
          "Lung: Azithromycin vs No Azithromycin<sup>2</sup>",
          "Nares: Smoker vs NonSmoker<sup>1</sup>",
          "Oral: Smoker vs NonSmoker<sup>1</sup>",
          "Gut: Before vs After Feeding<sup>3</sup>",
          "Human Anterior Nares vs Stool<sup>4</sup>"
        ),
        categoryorder = "array"
      )
    )
  )
}


calculate_effect_size_model_for_sample_size <-
  function(df, sample_size) {
    ###Calculate a model to predict the effect size given power
    bp_model <- df %>% filter(Sample_Size == sample_size)
    #bp_model <- subset(bp, power < 0.95 & power > 0.2)
    bp_model <-
      data.frame(log_omega2 = log10(bp_model[["simulated_omega2"]]),
                 log_power = log10(bp_model[["power"]]))
    bp_model <- subset(bp_model, log_omega2 > -Inf)
    View(bp_model)
    bp_lm <- lm(log_omega2 ~ log_power, data = bp_model)
    return(bp_lm)
  }

get_effect_size_from_power <- function(model, power) {
  effect_size <-
    10 ^ predict(model, newdata = data.frame(log_power = log10(power)))
  return(effect_size)
}


get_effect_size_from_sample_size_and_power <-
  function(df_sim_data,
           sample_size,
           power,
           sample_type,
           mdistance) {
    model <-
      calculate_effect_size_model_for_sample_size(df_sim_data, sample_size)
    effect_size <- get_effect_size_from_power(model, power)
    return(effect_size)
  }


shinyApp(ui, server)