Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Araformatics group
Araformatics project
Commits
2dbcd896
Commit
2dbcd896
authored
Dec 06, 2021
by
Patino Medina, Laura
Browse files
Upload New File
parent
dd6e6131
Changes
1
Hide whitespace changes
Inline
Side-by-side
scripts/GOenrichment_proj.R
0 → 100644
View file @
2dbcd896
#!/usr/bin/env Rscript
#BIF30806 Project group: Araformatic
#(2021)
#GO enrichment
#Script to run topGO package for testing GO terms of differential gene
#expression analysis
# Scrip written based on:
# https://ucdavis-bioinformatics-training.github.io/2018-June-RNA-Seq-Workshop/friday/enrichment.html
# Documentation available of topGO usage
# https://www.r-bloggers.com/2015/09/passing-arguments-to-an-r-script-from-command-lines/
#Usage: Rscript GOenrichment_proj.R input_file.txt
# input_file: txt file, genes as rows, leafs as columns and values
# are adjusted p value from DGE
#Take arguments from command line
args
=
commandArgs
(
trailingOnly
=
TRUE
)
# test if there is at least one argument: if not, return an error
if
(
length
(
args
)
==
0
)
{
stop
(
"Argument missing"
,
call.
=
FALSE
)
}
else
if
(
length
(
args
)
==
1
)
{
# default output file
print
(
"Input file specified"
)
}
#Install packages from source.
source
(
"https://bioconductor.org/biocLite.R"
)
biocLite
(
c
(
"topGO"
,
"KEGGREST"
,
"org.At.tair.db"
,
"Rgraphviz"
))
if
(
!
requireNamespace
(
"BiocManager"
,
quietly
=
TRUE
))
install.packages
(
"BiocManager"
)
BiocManager
::
install
(
"topGO"
)
BiocManager
::
install
(
"AnnotationDbi"
)
BiocManager
::
install
(
"org.At.tair.db"
)
BiocManager
::
install
(
"Rgraphviz"
)
BiocManager
::
install
(
"multtest"
)
library
(
topGO
)
library
(
"org.At.tair.db"
)
library
(
KEGGREST
)
library
(
AnnotationDbi
)
library
(
Rgraphviz
)
library
(
multtest
,
verbose
=
FALSE
)
####### ADJUSTED P-VALUE #http://rileylab.org/wp-content/uploads/2016/08/multtest.pdf
#useful info: #multiple_dexp[,c(1,which(colnames(multiple_dexp) == current_leaf))]
#GO enrichment on multiple leafs(samples)
#list with samples names
multiple_dexp
<-
read.delim
(
args
[
1
],
stringsAsFactors
=
F
)
#multiple_dexp<- read.delim("multiple_input.txt",stringsAsFactors = F)
sample_names
<-
colnames
(
multiple_dexp
)
# all names
sample_names
[
2
:
length
(
sample_names
)]
pcutoff
<-
0.05
#gene names from DGE analysis output
genes
<-
multiple_dexp
$
AGI
for
(
leaf
in
sample_names
[
2
:
length
(
sample_names
)]){
current_leaf
=
leaf
#filtering genes.replace to 1 if above threshold, otherwise 0 (significant)
filter_multiple
<-
ifelse
(
multiple_dexp
[,
which
(
colnames
(
multiple_dexp
)
==
current_leaf
)]
<
pcutoff
,
1
,
0
)
# adj.P.Val name of the column
names
(
filter_multiple
)
<-
genes
geneList
<-
filter_multiple
# create topGOdata object
GOdata_sample
<-
new
(
"topGOdata"
,
ontology
=
"BP"
,
allGenes
=
geneList
,
geneSelectionFun
=
function
(
x
)(
x
==
1
),
annot
=
annFUN.org
,
mapping
=
"org.At.tair.db"
)
#Run the Fishers exact Test (raw p-vals)
fisher_sample
<-
runTest
(
GOdata_sample
,
algorithm
=
"elim"
,
statistic
=
"fisher"
)
sample_result
<-
GenTable
(
GOdata_sample
,
raw.p.value
=
fisher_sample
,
topNodes
=
length
(
fisher_sample
@
score
),
numChar
=
120
)
# print(sample_result) #shows some statistics
#multiple testing correction (FDR p value)
#http://www.bioconductor.org/packages/release/bioc/html/multtest.html
raw_pvalues
<-
as.double
(
sample_result
$
raw.p.value
)
cor_pval
<-
mt.rawp2adjp
(
raw_pvalues
,
"BY"
)
adjp
<-
cor_pval
$
adjp
#if want order_adjp <- res_FDR$adjp[order(res_FDR$index), ]
#Add FDR p value to topGO output
FDR_by_sample
<-
sample_result
FDR_by_sample
<-
cbind
(
FDR_by_sample
,
adjp
)
#GOgraph -> with raw pval
par
(
cex
=
0.3
)
showSigOfNodes
(
GOdata_sample
,
score
(
fisher_sample
),
firstSigNodes
=
5
,
useInfo
=
'all'
)
printGraph
(
GOdata_sample
,
fisher_sample
,
firstSigNodes
=
5
,
fn.prefix
=
current_leaf
,
useInfo
=
"def"
,
pdfSW
=
TRUE
)
#termsP.value named vector of p-values.
#dev.off()
#write table with information per leaf
table
=
paste
(
current_leaf
,
"txt"
,
sep
=
"."
)
write.table
(
FDR_by_sample
,
col.names
=
T
,
row.names
=
F
,
file
=
table
,
sep
=
"\t"
,
quote
=
F
)
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment