Open Notebook: December 2019

Friday, December 27, 2019

Useful R tips, 20181227 update

timestamp = format(Sys.Date(), "%Y%m%d")
write.csv(dangtb, paste("DangProteo_MS02Zscore_summary_",timestamp, ".csv",sep='' ), quote =T, row.names = F )

title: "yeast PIN, rls ratio - CR prot"
author: "H Qin"
date: '`r paste( "2018-12-21 ~ ",format(Sys.Date(), "%Y-%B-%d"))`'
output:
pdf_document: default
html_document: default

Read csv is much faster than xlsx.

tb = read.csv(fullFileName, colClasses=c("character",NA, NA, "character", rep("numeric",8 ), NA));
tb = read.table("gene_association.sgd", skip=25, sep="\t", stringsAsFactors=FALSE, quote = "", row.names = NULL)

for( mymethod in mymethods ) {
hd = hclust( dist(ctb2), mymethod);
# plot( hd, main="hamming distance, ward linkage" )
coat.cat = cutree(hd, numclus ) ###<=== change is here
col.palette = c("red","brown","blue","green");
coat.color = col.palette[coat.cat]

library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(10,"RdBu"))(256);
hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(16);

#heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10) );
heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10),
RowSideColors=coat.color, ColSideColors = spec.colors,
hclustfun = function(c) hclust( c, method=mymethod),
#distfun = function(c) as.dist(hamming.distance(c)) #Hamming is less pleasant than Euclidean
main = mymethod
);
}

options(echo=TRUE) # if you want see commands in output file
args <- commandArgs(trailingOnly = TRUE)print(args)
# trailingOnly=TRUE means that only your arguments are returned, check:
# print(commandsArgs(trailingOnly=FALSE))i = as.integer(args[1])
j = as.integer(args[2])
x = seq(i, j)
print(x)

R -f R-args.R --args 2 5
Rscript file

#from lower case to upper case

chartr(old, new, x)

tolower(x)

toupper(x)

casefold(x, upper = FALSE)

require(stringr)
conditions$media[r] = str_replace( conditions$media[r], "\\/", "")

tb$AssignmentTotal= apply(tb[, assignments], 1, FUN=function(x){sum(x,na.rm=T)} )

http://cran.r-project.org/doc/FAQ/R-FAQ.html#How-can-I-create-rotated-axis-labels_003f

cumsum()
with()

R -f filename

axis( 2, at=pretty(tbf$s), tcl=0.2, las=2 ) #rotate axis labels

text( tb$G + 0.01*nchar(tb$strain)/4, log10(tb$R0)-0.1*nchar(tb$strain)/4, tb$strain, pos=3)

layout(mat, heights= c( 1.15, rep(1, nrow(mat)-2), 1.2) );
par(mar=c(5.1,4.1,4.1,2.1)
http://www.r-bloggers.com/setting-graph-margins-in-r-using-the-par-function-and-lots-of-cow-milk/

text ( aa, bb, t, cex=0.8);

####aplha
names(fit)[ grep("alpha", names(fit))]
fit_alpha_tb = data.frame( t( fit[, grep("alpha", names(fit)) ]))
rownames(fit_alpha_tb) = names(fit)[grep("alpha", names(fit))]
fit_alpha_tb$names = gsub("_.*", "", rownames(fit_alpha_tb))

library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(8);
hmcol = colorRampPalette(brewer.pal(3,"Blues"))(8);

format(Sys.time(), "%a %b %d %H:%M:%S %Y")

format(Sys.time(), "%Y%b%d_%H%M%S")

#regular expression

require(org.Sc.sgd.db)
x <- org.Sc.sgdALIAS
ls(x)[grep("^Y..\\d{3}", ls(x))]

http://www.regular-expressions.info/rlanguage.html
http://www.r-bloggers.com/regular-expressions-in-r-vs-rstudio/

list.files for the contents of a directory.

normalizePath for a ‘canonical’ path name.

(WD <- getwd())

if (!is.null(WD)) setwd(WD)

require(xlsx) # read Excel in R.

Usage

! x
x & y
x && y
x | y
x || y
xor(x, y)

rm(list=ls() );



unlist(strsplit("a.b.c", "\\."))
-----
str(x)
attributes(x)
--------------
outer( month.abb, 1999:2003, FUn="paster");

Letters <- c( LETTERS, letters);
Letters[ ! sapply(Letters, function(xx) exists(xx) ) ]; # anonymous function as a wrapper for a primitive function

------------
 legend(100,60, seq(100,200,1), lty=1) # line legends

  Library(MASS); example(Skye); #tenary plot

 library(help = survivial)   

ColorBrewer.org

useful comnds:
x11; factor; relevel; class; loess; contour; is.element; math %in%; grep; sample; nrow; 
grepmisc: hist2d
url()
---
class and object
CA@a[1]

----

test1 <- list( time= c(4, 3,1,1,2,2,3),
  status=c(1,NA,1,0,1,1,0),
  x= c(0, 2,1,1,1,0,0),
  sex= c(0, 0,0,0,1,1,1))
coxph( Surv(time, status) ~ x + strata(sex), test1) #stratified model

----
delete NA form matrix

 > x<-matrix(1:16,4,4)
 > x[col(x)>=row(x)]<-NA
 > x[,! apply(x,2,function(x) all(is.na(x))) ]
     [,1] [,2] [,3]
[1,]   NA   NA   NA
[2,]    2   NA   NA
[3,]    3    7   NA
[4,]    4    8   12

----
? R/Splus Perl interface   RSperl
? R Pythong interface Rpy Rpython
not in CRAN
----
date.grouping <- function(d) {
  # for ea date in d calculate date beginning 6 month period which contains it
  mat <- matrix(as.numeric(unlist(strsplit(as.character(d),"-"))),nr=2)
  f <- function(x) do.call( "ISOdate", as.list(x) )
  POSIXct.dates <- apply(rbind(mat,1),2,f) + ISOdate(1970,1,1)
  breaks <- c(seq(from=min(POSIXct.dates), to=max(POSIXct.dates), by="6 mo"), Inf)
  format( as.POSIXct( cut( POSIXct.dates, breaks, include.lowest=T )), "%Y-%m" ) }

----
nonlinear regression
library(nls)
----
http://www.bioconductor.org/
----
library(lattice)
----
persp()
----
las=1 or 2
You can use the graphics parameter "srt" to rotate displayed text by a specified number of degrees,
 e.g. srt=45 to put it on an angle, srt=90 to put it vertical.  

----
cnams = dimnames(aa)[[2]]

cnams[which(cnams == 'blah3.Mg')] = 'Mg (%)'
...
dimnames(aa)[[2]] = cnams
----
eval(substitute(lf <- locfit(~s, data=age), list(s=s)))
------
  sub=sort(sample(x,200, replace=F))
  postscript("try.ps")
  matplot(x[sub],y[sub,],type="l",lwd=5)     
  dev.off()

-----

>Does anyone know if R has the functionality to calculate a simple 
>moving average. I cant seem to find it in the help menu.

filter in library ts. does filter() do what you need?
Or look at the 'running' function in the gregmisc package.


moving.average <- 
function(x, k) { 
 n <- length(x) 
 y <- rep(0, n) 
 for (i in (1+k):n) 
   y[i] <- mean(x[(i-k):i]) 
 return(y)
 } 

----
tree packages
----

# Create an Example Data Frame Containing Car x Color data, with long car names 
carnames <- c("BMW: High End, German",
              "Renault: Medium End, French",
              "Mercedes: High End, German", 
              "Seat: Imaginary, Unknown Producer")
carcolors <- c("red","white","silver","green")
datavals <- round(rnorm(16, mean=100, sd=60),1)
data <- data.frame(Car=rep(carnames,4),
                   Color=rep(carcolors, c(4,4,4,4) ),
                   Value=datavals )

# generate balloon plot with default scaling, the column labels will overlap 
# balloonplot( data$Color, data$Car, data$Value)


# try again, with column labels rodated 90 degrees, and given more space 
balloonplot( data$Car, data$Color, data$Value, colmar=3, colsrt=90)



----

Here is a very rough addlogo() using pixmap:

"addlogo" <- function(x, y, pixmap) {
    if (is.list(x)) {
        y <- x$y
        x <- x$x
    }
    else if (missing(y)) 
        stop("missing y")
    if (!is.numeric(x) || !is.numeric(y)) 
        stop("non-numeric coordinates")
    if ((nx <- length(x)) <= 1 || nx != length(y) || nx > 2) 
        stop("invalid coordinate lengths")
    pixmap@bbox[1] <- x[1]
    pixmap@bbox[2] <- y[1]
    pixmap@bbox[3] <- x[2]
    pixmap@bbox[4] <- y[2]
    pixmap@cellres[1] <- (pixmap@bbox[3] - pixmap@bbox[1]) / pixmap@size[2]
    pixmap@cellres[2] <- (pixmap@bbox[4] - pixmap@bbox[2]) / pixmap@size[1]
    plot(pixmap, add=TRUE)
    invisible(pixmap)
}

which will work with locator() too. To maintain aspect, it shouldn't alter 
the relative cell resolutions, and should just use the new x or y, bur 
this is the general case. The handling of the location of the logo is 
copied & pasted from legend().



----
x <- readLines(myfile)
strsplit(substring(x,8),split="")
----

Monday, December 16, 2019

Open Notebook

Friday, December 27, 2019

Machine learning in computational biology

*** useful R, Rstudio tips, update

Usage

Thursday, December 26, 2019

tensorflow 2 tutorial

Wednesday, December 25, 2019

Atlanta graph lecture series

Atlanta Lecture Series in Combinatorics and Graph Theory XXIV (ALS 24)

Wednesday, December 18, 2019

open source single-sign on service

Tuesday, December 17, 2019

HSYAA training data

Monday, December 16, 2019

CITI training

Friday, December 13, 2019

DoD scholarship

Wednesday, December 11, 2019

Ghafari HYSAA github infor

reference manage comparision

Saturday, December 7, 2019

macOS 10.15 Catalina: The Ars Technica review

Thursday, December 5, 2019

Ren'sPy digital story

Monday, December 2, 2019

predict housing price with tensor flow