Commit 0ddc7033 authored by Chanchaidechachai, Thanicha's avatar Chanchaidechachai, Thanicha
Browse files

test

parent e368ed39
.Rproj.user
.Rhistory
.RData
.Ruserdata
####Part 1, Profiling (Ron):
##R packages: ggplot2, ggbeeswarm, bench, Rprof, proftools,profvis, data.table, bench
install.packages("ggbeeswarm")
install.packages("bench")
install.packages("Rprof")
install.packages("proftools")
install.packages("profvis")
install.packages("data.table")
install.packages("bench")
install.packages("microbenchmark")
library(ggbeeswarm)
library(bench)
library(Rprof)
library(proftools)
library(profvis)
library(data.table)
library(bench)
library(ggplot2)
library(microbenchmark)
library(ggplot2)
########################################################
#Task 1: compare the speed of mean(x) with sum(x) / length(x) for different lengths of x.
x<- rep(1,1000000)
compare <- microbenchmark(mean(x), sum(x) / length(x), times = 1000)
compare <- microbenchmark(mean.default(x), mean(x), times = 1000)
autoplot(compare)
autoplot(compare)
########################################################
#Task 2: compare the speed of selecting one column in a matrix and in a data.frame object.
#Does the difference depend on the object size?
#with 30000 data
dat_matrix<-matrix(1,10000,3)
dat_df<-data.frame(dat_matrix)
compare <- microbenchmark(dat_df[,2], dat_matrix[,2], times = 1000)
autoplot(compare)
#with 100 data
dat_matrix<-matrix(1,100,3)
dat_df<-data.frame(dat_matrix)
compare <- microbenchmark(dat_df$x.1, dat_matrix[,2], times = 1000)
autoplot(compare)
#small data not very difference
#Do the same for the selection of a row. What do you notice?
#with 30000 data
x<-rep(1,10000)
dat_matrix<-matrix(1,3,10000)
dat_df<-data.frame(rbind(x,x,x))
compare <- microbenchmark(dat_df[1,], dat_matrix[1,], times = 100)
autoplot(compare)
##################################################
### Case study: cross validation
require(pls)
example(plsr)
r1 <- yarn$density - yarn.pls$validation$pred[,1,1]
r2 <- yarn$density - yarn.pls$validation$pred[,1,2]
#######################################################
## datatable
library(data.table)
flights <- fread("https://github.com/Rdatatable/data.table/blob/master/vignettes/flights14.csv?raw=TRUE")
# flight data is data table
flight_df<-as.data.frame(flights)
#use the flights data, and compare the speed of selecting rows
#and columns in both data.table and data.frame formats
#select row
compare <- microbenchmark(flights [1,], flight_df[1,], times = 100)
autoplot(compare)
#select column
compare <- microbenchmark(flights [,.(year,month,day,dep_delay)], flight_df[,1:4], times = 100)
autoplot(compare)
## dataframe is faster for select one row but for more complex tasks datatable can perform better
##################################################
#find determinants
a<-as.matrix((0:9))
b<-c((0:9))
d<-as.matrix((0:9))
e<-c((0:9))
ab<-c(a%*%b)
de<-c(d%*%e)
dd.for.c<-function(){
val<-NULL
for(a1 in 0:9)
for(b1 in 0:9)
for(d1 in 0:9)
for(e1 in 0:9)
val<-c(val,a1*b1-d1*e1)
table(val)
}
a1=((0:9))
b1=((0:9))
d1=((0:9))
e1=((0:9))
dd.for.c(a1=a1,b1=b1,d1=d1,e1=e1)
#improve argument
dd.for.c2 <- function() {
val <- outer(0:9, 0:9, "*")
val2 <- outer(c(val), c(val), "-")
table(val2)
}
dd.for.c2 <- function() {
val <- outer(0:9, 0:9, "*")
val2 <- outer(val, val, "-")
tabulate(val2 + 82) # +82 to correct from something from tabulate
}
?outer #The outer product of the arrays X and Y is the array A with dimension c(dim(X), dim(Y))
#Part 2, Big data (Ron):
# R packages: curl, pryr, bigmemory, biganalytics, biglm
install.packages("curl")
install.packages("pryr")
install.packages("bigmemory")
install.packages("biganalytics")
install.packages("biglm")
library(curl)
library(pryr)
library(bigmemory)
library(biglm)
#Part 3 and 4, Parallel computing / Machine learning / research (Sven):
# R packages: knitr, doParallel, GGally, ggplot2, numbers, OpenCL,
#parallel, plot3D, randomForest, rgl, Rmpi, rpart, rpart.plot, snow,
#dplyr, tidyverse, hablar
install.packages("knitr")
install.packages("doParallel")
install.packages("GGally")
install.packages("numbers")
install.packages("OpenCL")
install.packages("parallel")
install.packages("plot3D")
install.packages("randomForest")
install.packages("rgl")
install.packages("Rmpi")
install.packages("rpart")
install.packages("rpart.plot")
install.packages("snow")
install.packages("hablar")
install.packages("dplyr")
install.packages("tidyverse")
library(hablar)
library(doParallel)
library(GGally)
library(numbers)
library(OpenCL)
library(parallel)
library(plot3D)
library(randomForest)
library(rgl)
library(Rmpi)
library(rpart)
library(rpart.plot)
library(snow)
library(hablar)
library(dplyr)
library(tidyverse)
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: CP874
RnwWeave: Sweave
LaTeX: pdfLaTeX
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment