Friday, December 27, 2019

*** useful R, Rstudio tips, update

Useful R tips, 20181227 update

timestamp = format(Sys.Date(), "%Y%m%d")
write.csv(dangtb, paste("DangProteo_MS02Zscore_summary_",timestamp, ".csv",sep='' ), quote =T, row.names = F  )


title: "yeast PIN, rls ratio - CR prot"
author: "H Qin"
date: '`r paste( "2018-12-21 ~ ",format(Sys.Date(), "%Y-%B-%d"))`'
output:
  pdf_document: default
  html_document: default

Read csv is much faster than xlsx.

tb = read.csv(fullFileName, colClasses=c("character",NA, NA, "character", rep("numeric",8 ), NA));
tb = read.table("gene_association.sgd", skip=25, sep="\t", stringsAsFactors=FALSE, quote = "", row.names = NULL)



for( mymethod in mymethods ) {
 hd =  hclust( dist(ctb2), mymethod); 
 # plot( hd, main="hamming distance, ward linkage" )
 coat.cat = cutree(hd, numclus )  ###<=== change is here
 col.palette = c("red","brown","blue","green");
 coat.color = col.palette[coat.cat]

 library(RColorBrewer);
 #hmcol = colorRampPalette(brewer.pal(10,"RdBu"))(256);
 hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(16);

 #heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10) );
 heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10), 
 RowSideColors=coat.color, ColSideColors = spec.colors,
 hclustfun = function(c) hclust( c, method=mymethod),
 #distfun = function(c) as.dist(hamming.distance(c)) #Hamming is less pleasant than Euclidean 
 main = mymethod
 );
}



options(echo=TRUE) # if you want see commands in output file
args <- commandArgs(trailingOnly = TRUE)print(args)
# trailingOnly=TRUE means that only your arguments are returned, check:
# print(commandsArgs(trailingOnly=FALSE))i = as.integer(args[1])
j = as.integer(args[2])
x = seq(i, j)
print(x)

R -f R-args.R --args 2 5
Rscript file 

#from lower case to upper case
chartr(old, new, x)
tolower(x)
toupper(x)

casefold(x, upper = FALSE)

    require(stringr)
    conditions$media[r] = str_replace( conditions$media[r], "\\/", "")


tb$AssignmentTotal= apply(tb[, assignments], 1, FUN=function(x){sum(x,na.rm=T)} )


http://cran.r-project.org/doc/FAQ/R-FAQ.html#How-can-I-create-rotated-axis-labels_003f

cumsum()
with()

R -f filename


axis( 2, at=pretty(tbf$s), tcl=0.2, las=2 )  #rotate axis labels



text( tb$G + 0.01*nchar(tb$strain)/4, log10(tb$R0)-0.1*nchar(tb$strain)/4, tb$strain, pos=3)


layout(mat, heights= c( 1.15, rep(1, nrow(mat)-2), 1.2) );
par(mar=c(5.1,4.1,4.1,2.1)
http://www.r-bloggers.com/setting-graph-margins-in-r-using-the-par-function-and-lots-of-cow-milk/

 text ( aa, bb, t, cex=0.8);

####aplha
names(fit)[ grep("alpha", names(fit))]
fit_alpha_tb = data.frame( t( fit[, grep("alpha", names(fit)) ]))
rownames(fit_alpha_tb) = names(fit)[grep("alpha", names(fit))]
fit_alpha_tb$names = gsub("_.*", "", rownames(fit_alpha_tb))


library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(8);
hmcol = colorRampPalette(brewer.pal(3,"Blues"))(8);



format(Sys.time(), "%a %b %d %H:%M:%S %Y")
format(Sys.time(), "%Y%b%d_%H%M%S")

 
 
#regular expression 
require(org.Sc.sgd.db)
x <- org.Sc.sgdALIAS
ls(x)[grep("^Y..\\d{3}", ls(x))]
 
http://www.regular-expressions.info/rlanguage.html
http://www.r-bloggers.com/regular-expressions-in-r-vs-rstudio/


list.files for the contents of a directory.
normalizePath for a ‘canonical’ path name.

(WD <- getwd())
if (!is.null(WD)) setwd(WD)


require(xlsx) # read Excel in R.

Usage

! x
x & y
x && y
x | y
x || y
xor(x, y)


rm(list=ls() );



unlist(strsplit("a.b.c", "\\."))
-----
str(x)
attributes(x)
--------------
outer( month.abb, 1999:2003, FUn="paster");

Letters <- c( LETTERS, letters);
Letters[ ! sapply(Letters, function(xx) exists(xx) ) ]; # anonymous function as a wrapper for a primitive function

------------
 legend(100,60, seq(100,200,1), lty=1) # line legends

  Library(MASS); example(Skye); #tenary plot

 library(help = survivial)   

ColorBrewer.org

useful comnds:
x11; factor; relevel; class; loess; contour; is.element; math %in%; grep; sample; nrow; 
grepmisc: hist2d
url()
---
class and object
CA@a[1]

----

test1 <- list( time= c(4, 3,1,1,2,2,3),
  status=c(1,NA,1,0,1,1,0),
  x= c(0, 2,1,1,1,0,0),
  sex= c(0, 0,0,0,1,1,1))
coxph( Surv(time, status) ~ x + strata(sex), test1) #stratified model

----
delete NA form matrix

 > x<-matrix(1:16,4,4)
 > x[col(x)>=row(x)]<-NA
 > x[,! apply(x,2,function(x) all(is.na(x))) ]
     [,1] [,2] [,3]
[1,]   NA   NA   NA
[2,]    2   NA   NA
[3,]    3    7   NA
[4,]    4    8   12

----
? R/Splus Perl interface   RSperl
? R Pythong interface Rpy Rpython
not in CRAN
----
date.grouping <- function(d) {
  # for ea date in d calculate date beginning 6 month period which contains it
  mat <- matrix(as.numeric(unlist(strsplit(as.character(d),"-"))),nr=2)
  f <- function(x) do.call( "ISOdate", as.list(x) )
  POSIXct.dates <- apply(rbind(mat,1),2,f) + ISOdate(1970,1,1)
  breaks <- c(seq(from=min(POSIXct.dates), to=max(POSIXct.dates), by="6 mo"), Inf)
  format( as.POSIXct( cut( POSIXct.dates, breaks, include.lowest=T )), "%Y-%m" ) }

----
nonlinear regression
library(nls)
----
http://www.bioconductor.org/
----
library(lattice)
----
persp()
----
las=1 or 2
You can use the graphics parameter "srt" to rotate displayed text by a specified number of degrees,
 e.g. srt=45 to put it on an angle, srt=90 to put it vertical.  

----
cnams = dimnames(aa)[[2]]

cnams[which(cnams == 'blah3.Mg')] = 'Mg (%)'
...
dimnames(aa)[[2]] = cnams
----
eval(substitute(lf <- locfit(~s, data=age), list(s=s)))
------
  sub=sort(sample(x,200, replace=F))
  postscript("try.ps")
  matplot(x[sub],y[sub,],type="l",lwd=5)     
  dev.off()

-----
>Does anyone know if R has the functionality to calculate a simple 
>moving average. I cant seem to find it in the help menu.

filter in library ts. does filter() do what you need?
Or look at the 'running' function in the gregmisc package.


moving.average <- 
function(x, k) { 
 n <- length(x) 
 y <- rep(0, n) 
 for (i in (1+k):n) 
   y[i] <- mean(x[(i-k):i]) 
 return(y)
 } 

----
tree packages
----

# Create an Example Data Frame Containing Car x Color data, with long car names 
carnames <- c("BMW: High End, German",
              "Renault: Medium End, French",
              "Mercedes: High End, German", 
              "Seat: Imaginary, Unknown Producer")
carcolors <- c("red","white","silver","green")
datavals <- round(rnorm(16, mean=100, sd=60),1)
data <- data.frame(Car=rep(carnames,4),
                   Color=rep(carcolors, c(4,4,4,4) ),
                   Value=datavals )

# generate balloon plot with default scaling, the column labels will overlap 
# balloonplot( data$Color, data$Car, data$Value)


# try again, with column labels rodated 90 degrees, and given more space 
balloonplot( data$Car, data$Color, data$Value, colmar=3, colsrt=90)



----

Here is a very rough addlogo() using pixmap:

"addlogo" <- function(x, y, pixmap) {
    if (is.list(x)) {
        y <- x$y
        x <- x$x
    }
    else if (missing(y)) 
        stop("missing y")
    if (!is.numeric(x) || !is.numeric(y)) 
        stop("non-numeric coordinates")
    if ((nx <- length(x)) <= 1 || nx != length(y) || nx > 2) 
        stop("invalid coordinate lengths")
    pixmap@bbox[1] <- x[1]
    pixmap@bbox[2] <- y[1]
    pixmap@bbox[3] <- x[2]
    pixmap@bbox[4] <- y[2]
    pixmap@cellres[1] <- (pixmap@bbox[3] - pixmap@bbox[1]) / pixmap@size[2]
    pixmap@cellres[2] <- (pixmap@bbox[4] - pixmap@bbox[2]) / pixmap@size[1]
    plot(pixmap, add=TRUE)
    invisible(pixmap)
}

which will work with locator() too. To maintain aspect, it shouldn't alter 
the relative cell resolutions, and should just use the new x or y, bur 
this is the general case. The handling of the location of the logo is 
copied & pasted from legend().



----
x <- readLines(myfile)
strsplit(substring(x,8),split="")
----

No comments:

Post a Comment