-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunction_correctBetas.r
More file actions
120 lines (106 loc) · 4.1 KB
/
function_correctBetas.r
File metadata and controls
120 lines (106 loc) · 4.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#####======================================================================#####
### Function for correcting Illumina 450/850K beta values for tumor purity
#####======================================================================#####
##Author: Mattias Aine (mattias.aine@med.lu.se)
##Affiliation: Johan Staaf lab @ Lund University / Oncology & Pathology
################################################################################
##load required packages
if(!requireNamespace("flexmix", quietly = TRUE)) {
install.packages("flexmix") }
if(! "flexmix" %in% names(sessionInfo()$otherPkgs) ) {
library("flexmix") }
################################################################################
##main function
##define function with input = betas and purity estimate
##output = line parameters for L1/L2/L3
adjustBeta<-function(methylation=NULL,purity=NULL,snames=NULL,nmax=3,nrep=3,seed=TRUE) {
#grab unique row-seed from vector slot 1 (multicore reproducibility)
#delete seed afterwards
if(seed) {
set.seed( as.integer(methylation[1]) )
methylation<-methylation[-1]
}
#define variables
x<-as.numeric(purity)
x2<-1-as.numeric(purity)
y<-as.numeric(methylation)
#calculate global corr
gl.corr<-suppressWarnings(cor(x,y))
gl.corr[is.na(gl.corr)]<-0
gl.corr<-round(gl.corr,3)
#add small gaussian noise to x - problem with large number of zero samples
y2<-y+rnorm(length(y),mean=0,sd=.005)
#do modeling
model <- stepFlexmix(y2 ~ x,k = 1:nmax, nrep = nrep,verbose = FALSE)
model <- getModel(model, "BIC")
#get clusters
cl<-clusters(model)
#make sure clusters are numbered 1 to 3, odd cases exist where one pop has zero members from flexMix
#can rename because flexmix object not used after this
cl<-as.integer(factor(cl))
##get line parameters for each pop - calculate from data - use original y
res.norm<-unlist(lapply(1:nmax,function(z) {
if(z %in% cl) {
m<-lm(y[cl==z]~x[cl==z])
r<-coefficients(m)[1]+residuals(m)
names(r)<-snames[cl==z]
r
} else { NULL }
}))
res.norm<-res.norm[snames]
##get line parameters for each pop - calculate from data - use original y
res.tum<-unlist(lapply(1:nmax,function(z) {
if(z %in% cl) {
m<-lm(y[cl==z]~x2[cl==z])
r<-coefficients(m)[1]+residuals(m)
names(r)<-snames[cl==z]
r
} else { NULL }
}))
res.tum<-res.tum[snames]
##in very rare instances flexmix calls 3 populations but one groups has zero members. -> has parameters for non-existant pop in output?!
#res.int<-round(as.numeric(unlist(lapply(slot(model,"components"),function(z) slot(z[[1]],"parameters")$coef[1]))),3)
#res.slope<-round(as.numeric(unlist(lapply(slot(model,"components"),function(z) slot(z[[1]],"parameters")$coef[2]))),3)
##get line parameters for each pop - calculate from data - use original y and 1-purity
res.int<-unlist(lapply(1:nmax,function(z) {
if(z %in% cl) {
m<-lm(y[cl==z]~x2[cl==z])
r<-coefficients(m)[1]
round(as.numeric(r),3)
} else { NULL }
}))
##intercept is never zero but if only one member in flexMix population slope will be NA - fix
##if pop only has one member then both tumor and normal estimate will be original beta value
##could fix by assigning to nearest pop with >1 member but not certain this any better..
res.slope<-unlist(lapply(1:nmax,function(z) {
if(z %in% cl) {
m<-lm(y[cl==z]~x2[cl==z])
r<-coefficients(m)[2]
if(is.na(r)) { r<-0 } ##fix for small number of NAs - set slope to zero
round(as.numeric(r),3)
} else { NULL }
}))
##cap at 0 and 1
res.tum[res.tum > 1] <- 1
res.tum[res.tum < 0] <- 0
res.norm[res.norm > 1] <- 1
res.norm[res.norm < 0] <- 0
#round
res.tum<-round(res.tum,3)
res.norm<-round(res.norm,3)
res.orig<-round(methylation,3)
##return some stats
return( list(y.norm=res.norm,
y.tum=res.tum,
y.orig=res.orig,
groups=cl,
n.groups=length(levels(factor(cl))),
med.norm=round(median(res.norm),3),
glob.cor=gl.corr,
avg.betaDiff=round(mean(res.orig-res.tum),3),
model.intercepts=res.int,
model.slopes=res.slope
)
)
}
###END