Commit 2dbcd896 authored by Patino Medina, Laura's avatar Patino Medina, Laura
Browse files

Upload New File

parent dd6e6131
#!/usr/bin/env Rscript
#BIF30806 Project group: Araformatic
#(2021)
#GO enrichment
#Script to run topGO package for testing GO terms of differential gene
#expression analysis
# Scrip written based on:
# https://ucdavis-bioinformatics-training.github.io/2018-June-RNA-Seq-Workshop/friday/enrichment.html
# Documentation available of topGO usage
# https://www.r-bloggers.com/2015/09/passing-arguments-to-an-r-script-from-command-lines/
#Usage: Rscript GOenrichment_proj.R input_file.txt
# input_file: txt file, genes as rows, leafs as columns and values
# are adjusted p value from DGE
#Take arguments from command line
args = commandArgs(trailingOnly = TRUE)
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("Argument missing", call.=FALSE)
} else if (length(args)==1) {
# default output file
print("Input file specified")
}
#Install packages from source.
source("https://bioconductor.org/biocLite.R")
biocLite(c("topGO", "KEGGREST", "org.At.tair.db", "Rgraphviz"))
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("topGO")
BiocManager::install("AnnotationDbi")
BiocManager::install("org.At.tair.db")
BiocManager::install("Rgraphviz")
BiocManager::install("multtest")
library(topGO)
library("org.At.tair.db")
library(KEGGREST)
library(AnnotationDbi)
library(Rgraphviz)
library(multtest, verbose = FALSE)
####### ADJUSTED P-VALUE #http://rileylab.org/wp-content/uploads/2016/08/multtest.pdf
#useful info: #multiple_dexp[,c(1,which(colnames(multiple_dexp) == current_leaf))]
#GO enrichment on multiple leafs(samples)
#list with samples names
multiple_dexp<- read.delim(args[1],stringsAsFactors = F)
#multiple_dexp<- read.delim("multiple_input.txt",stringsAsFactors = F)
sample_names <-colnames(multiple_dexp)# all names
sample_names[2:length(sample_names)]
pcutoff <- 0.05
#gene names from DGE analysis output
genes <- multiple_dexp$AGI
for(leaf in sample_names[2:length(sample_names)]){
current_leaf = leaf
#filtering genes.replace to 1 if above threshold, otherwise 0 (significant)
filter_multiple <- ifelse(multiple_dexp[,which(colnames(multiple_dexp) == current_leaf)] < pcutoff,1,0) # adj.P.Val name of the column
names(filter_multiple) <- genes
geneList<- filter_multiple
# create topGOdata object
GOdata_sample <- new("topGOdata",
ontology = "BP",
allGenes = geneList,
geneSelectionFun = function(x)(x == 1),
annot = annFUN.org, mapping = "org.At.tair.db")
#Run the Fishers exact Test (raw p-vals)
fisher_sample <- runTest(GOdata_sample, algorithm = "elim", statistic = "fisher")
sample_result <- GenTable(GOdata_sample, raw.p.value = fisher_sample,
topNodes = length(fisher_sample@score), numChar = 120)
# print(sample_result) #shows some statistics
#multiple testing correction (FDR p value)
#http://www.bioconductor.org/packages/release/bioc/html/multtest.html
raw_pvalues <- as.double(sample_result$raw.p.value)
cor_pval <- mt.rawp2adjp(raw_pvalues, "BY")
adjp <-cor_pval$adjp #if want order_adjp <- res_FDR$adjp[order(res_FDR$index), ]
#Add FDR p value to topGO output
FDR_by_sample <- sample_result
FDR_by_sample <- cbind(FDR_by_sample, adjp)
#GOgraph -> with raw pval
par(cex = 0.3)
showSigOfNodes(GOdata_sample, score(fisher_sample), firstSigNodes = 5, useInfo = 'all')
printGraph(GOdata_sample, fisher_sample, firstSigNodes = 5, fn.prefix = current_leaf, useInfo = "def", pdfSW = TRUE)#termsP.value named vector of p-values.
#dev.off()
#write table with information per leaf
table = paste(current_leaf,"txt",sep=".")
write.table(FDR_by_sample, col.names=T, row.names=F, file = table, sep ="\t",
quote = F)
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment