Commit 50ec6ea4 authored by Molemaker, Maureen's avatar Molemaker, Maureen
Browse files

Merge branch 'main' of git.wur.nl:araformatics-group/araformatics-project into main

parents 6ca0efc9 7e4ea211
# import library and data
library(DESeq2)
data <- read.table("X_hisatcounts.csv", row.names = 1, header=TRUE)
# Create DeseqDataSet(dds)
condition = factor(c("reference", "reference","sample", "sample"),c("reference","sample"))
col_data = data.frame(condition)
dds <- DESeqDataSetFromMatrix(data, col_data, ~condition)
# Estimate size of factors
dds = estimateSizeFactors(dds)
sizeFactors(dds)
# Normalize data
norm_versus_non_norm(dds, 1, 2, left = 2, right = 8 )
rld = rlog(dds)
# show result normalization
plot(density(assay(dds)[,1]), main="counts")
plot(density(assay(rld)[,1]), main="log counts")
## Start diff expression analysis
# Estimate dispersions
dds = estimateDispersions(dds)
plotDispEsts(dds)
# Do glm test
dds = nbinomWaldTest(dds)
res = results(dds)
# show results
head(res)
res$padj = ifelse(is.na(res$padj), 1, res$padj)
res_sub = res[res$padj<0.01,]
dim(res_sub)
res_sort = res_sub[order(res_sub$log2FoldChange, decreasing=TRUE),]
head(res_sort, 10)
write.table(res, col.names=NA, row.names=T, file ="expressionshisat3.tsv", sep ="\t")
run_analysis <- function(file_name) {
data <- read.table(file_name, row.names = 1, header=TRUE)
condition = factor(c("reference","sample", "sample"),c("reference","sample"))
col_data = data.frame(condition)
dds <- DESeqDataSetFromMatrix(data, col_data, ~condition)
dds = estimateSizeFactors(dds)
rld = rlog(dds)
# show result normalization
plot(density(assay(dds)[,1]), main="counts")
plot(density(assay(rld)[,1]), main="log counts")
dds = estimateDispersions(dds)
plotDispEsts(dds)
# Do glm test
dds = nbinomWaldTest(dds)
res = results(dds)
print("hi")
write.table(res, col.names=NA, row.names=T, file =filename+ ".tsv", sep ="\t")
}
run_analysis("X_hisatcounts.csv")
# Author: Joran Schoorlemmer
# Student nr: 1004586
# Description: Run differential analysis functions
# Usage: Rscript Diff_analysis.R <yourcountfilenames.txt>
# import
library(DESeq2)
# Functions
run_analysis <- function(file_name) {
data <- read.table(file_name, row.names = 1, header=TRUE)
condition = factor(c("reference","reference", "sample", "sample"),c("reference","sample"))
col_data = data.frame(condition)
# init dds
dds <- DESeqDataSetFromMatrix(data, col_data, ~condition)
dds = estimateSizeFactors(dds)
rld = rlog(dds)
# show result normalization
plot(density(assay(dds)[,1]), main="counts")
plot(density(assay(rld)[,1]), main="log counts")
# estimate dispersions
dds = estimateDispersions(dds)
plotDispEsts(dds)
# do glm test
dds = nbinomWaldTest(dds)
res = results(dds)
write.table(res, col.names=NA, row.names=T, file =paste(file_name, ".tsv"), sep ="\t")
}
# Run function
files <- read.delim(commandArgs(001), header=FALSE)
for (file in files[,1]) {
run_analysis(file)
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment