Friday, December 27, 2019

Useful R tips, 20181227 update

timestamp = format(Sys.Date(), "%Y%m%d")
write.csv(dangtb, paste("DangProteo_MS02Zscore_summary_",timestamp, ".csv",sep='' ), quote =T, row.names = F )

title: "yeast PIN, rls ratio - CR prot"
author: "H Qin"
date: '`r paste( "2018-12-21 ~ ",format(Sys.Date(), "%Y-%B-%d"))`'
output:
pdf_document: default
html_document: default

Read csv is much faster than xlsx.

tb = read.csv(fullFileName, colClasses=c("character",NA, NA, "character", rep("numeric",8 ), NA));
tb = read.table("gene_association.sgd", skip=25, sep="\t", stringsAsFactors=FALSE, quote = "", row.names = NULL)

for( mymethod in mymethods ) {
hd = hclust( dist(ctb2), mymethod);
# plot( hd, main="hamming distance, ward linkage" )
coat.cat = cutree(hd, numclus ) ###<=== change is here
col.palette = c("red","brown","blue","green");
coat.color = col.palette[coat.cat]

library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(10,"RdBu"))(256);
hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(16);

#heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10) );
heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10),
RowSideColors=coat.color, ColSideColors = spec.colors,
hclustfun = function(c) hclust( c, method=mymethod),
#distfun = function(c) as.dist(hamming.distance(c)) #Hamming is less pleasant than Euclidean
main = mymethod
);
}

options(echo=TRUE) # if you want see commands in output file
args <- commandArgs(trailingOnly = TRUE)print(args)
# trailingOnly=TRUE means that only your arguments are returned, check:
# print(commandsArgs(trailingOnly=FALSE))i = as.integer(args[1])
j = as.integer(args[2])
x = seq(i, j)
print(x)

R -f R-args.R --args 2 5
Rscript file

#from lower case to upper case

chartr(old, new, x)

tolower(x)

toupper(x)

casefold(x, upper = FALSE)

require(stringr)
conditions$media[r] = str_replace( conditions$media[r], "\\/", "")

tb$AssignmentTotal= apply(tb[, assignments], 1, FUN=function(x){sum(x,na.rm=T)} )

http://cran.r-project.org/doc/FAQ/R-FAQ.html#How-can-I-create-rotated-axis-labels_003f

cumsum()
with()

R -f filename

axis( 2, at=pretty(tbf$s), tcl=0.2, las=2 ) #rotate axis labels

text( tb$G + 0.01*nchar(tb$strain)/4, log10(tb$R0)-0.1*nchar(tb$strain)/4, tb$strain, pos=3)

layout(mat, heights= c( 1.15, rep(1, nrow(mat)-2), 1.2) );
par(mar=c(5.1,4.1,4.1,2.1)
http://www.r-bloggers.com/setting-graph-margins-in-r-using-the-par-function-and-lots-of-cow-milk/

text ( aa, bb, t, cex=0.8);

####aplha
names(fit)[ grep("alpha", names(fit))]
fit_alpha_tb = data.frame( t( fit[, grep("alpha", names(fit)) ]))
rownames(fit_alpha_tb) = names(fit)[grep("alpha", names(fit))]
fit_alpha_tb$names = gsub("_.*", "", rownames(fit_alpha_tb))

library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(8);
hmcol = colorRampPalette(brewer.pal(3,"Blues"))(8);

format(Sys.time(), "%a %b %d %H:%M:%S %Y")

format(Sys.time(), "%Y%b%d_%H%M%S")

#regular expression

require(org.Sc.sgd.db)
x <- org.Sc.sgdALIAS
ls(x)[grep("^Y..\\d{3}", ls(x))]

http://www.regular-expressions.info/rlanguage.html
http://www.r-bloggers.com/regular-expressions-in-r-vs-rstudio/

list.files for the contents of a directory.

normalizePath for a ‘canonical’ path name.

(WD <- getwd())

if (!is.null(WD)) setwd(WD)

require(xlsx) # read Excel in R.

Usage

! x
x & y
x && y
x | y
x || y
xor(x, y)

rm(list=ls() );



unlist(strsplit("a.b.c", "\\."))
-----
str(x)
attributes(x)
--------------
outer( month.abb, 1999:2003, FUn="paster");

Letters <- c( LETTERS, letters);
Letters[ ! sapply(Letters, function(xx) exists(xx) ) ]; # anonymous function as a wrapper for a primitive function

------------
 legend(100,60, seq(100,200,1), lty=1) # line legends

  Library(MASS); example(Skye); #tenary plot

 library(help = survivial)   

ColorBrewer.org

useful comnds:
x11; factor; relevel; class; loess; contour; is.element; math %in%; grep; sample; nrow; 
grepmisc: hist2d
url()
---
class and object
CA@a[1]

----

test1 <- list( time= c(4, 3,1,1,2,2,3),
  status=c(1,NA,1,0,1,1,0),
  x= c(0, 2,1,1,1,0,0),
  sex= c(0, 0,0,0,1,1,1))
coxph( Surv(time, status) ~ x + strata(sex), test1) #stratified model

----
delete NA form matrix

 > x<-matrix(1:16,4,4)
 > x[col(x)>=row(x)]<-NA
 > x[,! apply(x,2,function(x) all(is.na(x))) ]
     [,1] [,2] [,3]
[1,]   NA   NA   NA
[2,]    2   NA   NA
[3,]    3    7   NA
[4,]    4    8   12

----
? R/Splus Perl interface   RSperl
? R Pythong interface Rpy Rpython
not in CRAN
----
date.grouping <- function(d) {
  # for ea date in d calculate date beginning 6 month period which contains it
  mat <- matrix(as.numeric(unlist(strsplit(as.character(d),"-"))),nr=2)
  f <- function(x) do.call( "ISOdate", as.list(x) )
  POSIXct.dates <- apply(rbind(mat,1),2,f) + ISOdate(1970,1,1)
  breaks <- c(seq(from=min(POSIXct.dates), to=max(POSIXct.dates), by="6 mo"), Inf)
  format( as.POSIXct( cut( POSIXct.dates, breaks, include.lowest=T )), "%Y-%m" ) }

----
nonlinear regression
library(nls)
----
http://www.bioconductor.org/
----
library(lattice)
----
persp()
----
las=1 or 2
You can use the graphics parameter "srt" to rotate displayed text by a specified number of degrees,
 e.g. srt=45 to put it on an angle, srt=90 to put it vertical.  

----
cnams = dimnames(aa)[[2]]

cnams[which(cnams == 'blah3.Mg')] = 'Mg (%)'
...
dimnames(aa)[[2]] = cnams
----
eval(substitute(lf <- locfit(~s, data=age), list(s=s)))
------
  sub=sort(sample(x,200, replace=F))
  postscript("try.ps")
  matplot(x[sub],y[sub,],type="l",lwd=5)     
  dev.off()

-----

>Does anyone know if R has the functionality to calculate a simple 
>moving average. I cant seem to find it in the help menu.

filter in library ts. does filter() do what you need?
Or look at the 'running' function in the gregmisc package.


moving.average <- 
function(x, k) { 
 n <- length(x) 
 y <- rep(0, n) 
 for (i in (1+k):n) 
   y[i] <- mean(x[(i-k):i]) 
 return(y)
 } 

----
tree packages
----

# Create an Example Data Frame Containing Car x Color data, with long car names 
carnames <- c("BMW: High End, German",
              "Renault: Medium End, French",
              "Mercedes: High End, German", 
              "Seat: Imaginary, Unknown Producer")
carcolors <- c("red","white","silver","green")
datavals <- round(rnorm(16, mean=100, sd=60),1)
data <- data.frame(Car=rep(carnames,4),
                   Color=rep(carcolors, c(4,4,4,4) ),
                   Value=datavals )

# generate balloon plot with default scaling, the column labels will overlap 
# balloonplot( data$Color, data$Car, data$Value)


# try again, with column labels rodated 90 degrees, and given more space 
balloonplot( data$Car, data$Color, data$Value, colmar=3, colsrt=90)



----

Here is a very rough addlogo() using pixmap:

"addlogo" <- function(x, y, pixmap) {
    if (is.list(x)) {
        y <- x$y
        x <- x$x
    }
    else if (missing(y)) 
        stop("missing y")
    if (!is.numeric(x) || !is.numeric(y)) 
        stop("non-numeric coordinates")
    if ((nx <- length(x)) <= 1 || nx != length(y) || nx > 2) 
        stop("invalid coordinate lengths")
    pixmap@bbox[1] <- x[1]
    pixmap@bbox[2] <- y[1]
    pixmap@bbox[3] <- x[2]
    pixmap@bbox[4] <- y[2]
    pixmap@cellres[1] <- (pixmap@bbox[3] - pixmap@bbox[1]) / pixmap@size[2]
    pixmap@cellres[2] <- (pixmap@bbox[4] - pixmap@bbox[2]) / pixmap@size[1]
    plot(pixmap, add=TRUE)
    invisible(pixmap)
}

which will work with locator() too. To maintain aspect, it shouldn't alter 
the relative cell resolutions, and should just use the new x or y, bur 
this is the general case. The handling of the location of the logo is 
copied & pasted from legend().



----
x <- readLines(myfile)
strsplit(substring(x,8),split="")
----

Thursday, December 26, 2019

tensorflow 2 tutorial

https://www.tensorflow.org/tutorials/quickstart/beginner

Wednesday, December 25, 2019

Atlanta graph lecture series

Atlanta Lecture Series in Combinatorics and Graph Theory XXIV (ALS 24)

http://www.mathcs.emory.edu/~hhuan30/ALS/ALS24/index.html

Wednesday, December 18, 2019

open source single-sign on service

Tuesday, December 17, 2019

HSYAA training data

Please check the link below :

https://github.com/QinLab/HSYAA_training_images/tree/master/hsyaa_5Class_Tr1000_Te180_60x60_good/training-images

You can find trap with more cells from “exC “ folder .

Monday, December 16, 2019

CITI training

Please create an account at CITI, associate your account with UTCOM Chattanooga, and then complete the basic human subject research modules.

https://about.citiprogram.org/en/homepage/

Friday, December 13, 2019

DoD scholarship

student requirements

Wednesday, December 11, 2019

Ghafari HYSAA github infor

Here is the link for GT repository.( Please check readme file for more details)

https://github.com/QinLab/HSYAA_GT

Currently, we are working on :

1) 100 Images without increasing resolution (60 x60 ) " 100IM_max3Cells_512x512_tiff “

https://github.com/QinLab/HSYAA_GT/tree/master/100IM_max3Cells_60x60_tiff

2) 100 Images with increasing resolution (512 X512) “ 100IM_max3Cells_512x512_tiff_CUBIC “

https://github.com/QinLab/HSYAA_GT/tree/master/100IM_max3Cells_512x512_tiff_CUBIC

reference manage comparision

http://www.rdgao.com/reference-managers/

Saturday, December 7, 2019

macOS 10.15 Catalina: The Ars Technica review

https://arstechnica.com/gadgets/2019/10/macos-10-15-catalina-the-ars-technica-review/11/#h1

Thursday, December 5, 2019

Ren'sPy digital story

Python digital story telling

https://www.renpy.org/why.html

Monday, December 2, 2019

predict housing price with tensor flow

https://hackernoon.com/build-your-first-neural-network-to-predict-house-prices-with-keras-3fb0839680f4

Friday, November 29, 2019

data science website, Randy Lao

free books, and good cartoon
www.claoudml.com

Tuesday, November 26, 2019

REU projects

REU
mixture Gompertz, network aging fitting

RLS deep learning, prediction

Friday, November 22, 2019

GPU request for data science

RE: GPU Workstation for enhancing experience learning of artificial intelligence in MSDA

We would like to request a Linux GPU workstation to enrich student experiential learning of artificial intelligence (AI) in the Master of Science in Data Analytics program (MSDA). GPU-based deep learning methods are the state-of-the-art AI method in data science. Computational training of deep-learning models with real-world big data is time-consuming with CPU or low-end GPUs. Lack of GPU computing power has prohibited many UTC students from applying deep learning methods to big data that are typically in the business world. The proposed Linux workstation will improve GPU access to students in several courses in the MSDA program, including CPSC 5440 Introduction to Machine Learning, CPSC5180 Programming Languages for Advanced Data Analytics, CPSC 5530 Data visualization and Exploration, and CPSC 5240 Principle of Data Analytics.

Precision 7920 Tower Workstation
 Intel Xeon Gold 6130 2.1GHz, 3.7GHz Turbo, 16C, 10.4GT/s 3UPI, 22MB Cache, HT (125W) DDR4-2666  Windows 10 Pro for Workstations (4 Cores Plus) Multi - English, French, Spanish  NVIDIA® Quadro® P2000, 5GB, 4 DP (7X20T)  32GB 4x8GB DDR4 2666MHz RDIMM ECC  3.5" 2TB 7200rpm SATA Hard Drive 
$4,699.00

https://www.dell.com/en-us/work/shop/desktops-all-in-one-pcs/precision-7920-tower-workstation/spd/precision-7920-workstation/xctopt7920us_3

https://www.dell.com/al/business/p/precision-desktops?~ck=bt

Deep learning-based projects are popular choices for many undergraduate and graduates students. Almost all students in CPSC4180/5180 chose deep learning related course projects.

Some of my students are having trouble to get their deep learning model implanted and trained given the limited GPU computing resource and support we have. If the CSE department have our own Linux GUP workstations, our students could be more efficient and productive. Given that we expect more and more data science MS students, increasing GPU computing support seem to be strategically important for both education and research support,

The needs of machine learning and artificial intelligence are reflected by the recent Blue Sky initiative at our department and in our joint new program with College of Business of Data Analytics. GPU-based deep learning is an important skill and knowledge that our students should be trained with for their future employability. In order to provide experiential learning to the students in our department and the college, we need to provide the state-of-the-art deep learning training to our students in the field of artificial intelligence. Given the current cloud and virtual machine technology, GPU is still hard-linked with any virtual machine. So, in order to provide more GPU learning experiences to our students, we literally need to purchase more GPU hardware. It actually does not matter whether these GPU are hosted in a cloud or in workstations, because GPU cannot be virtualized to the best of our knowledge. Given that typical training in real-world data require long-computing time for deep-learning models, dedicated Linux nodes or workstations are the most practical ways to provide experience learning experiences for students to use real-world data for deep-learning projects. Our computers in 312 can be used by students to analyze toy-data, but not sufficient for any real-world data sets. In short, in order to provide real-world experiential learning experiences of AI to our students, we need provide the necessary GPU hardware to students.

CPSC 5180	Programming Languages for Advanced Data
CPSC 5200	Automata, Complexity, and Computability
CPSC 5210	Design and Analysis of Computer Algorithms	✔
CPSC 5230	Decision Support and Business Intelligence
CPSC 5240	Principles of Data Analytics		✔
CPSC 5250	Medical Informatics
CPSC 5260	Introduction to Parallel Algorithms		✔
CPSC 5270	Advanced Database and Database Security	✔
CPSC 5400	Topics in Simulation
CPSC 5410	Model Analysis and Simulation
CPSC 5420	Programming with SAS
CPSC 5440	Introduction to Machine Learning
CPSC 5450	Advanced Topics in Artificial Intelligence
CPSC 5460	Pattern Recognition
CPSC 5500	Computer Graphics Applications and Algorithms
CPSC 5510	Advanced Computer Graphics
CPSC 5530	Data Visualization and Exploration
CPSC 5560	Computer Data Communications
CPSC 5570	Internetworking
CPSC 5580	Software Defined Networks
CPSC 5590	Advanced Computer Networks	✔
CPSC 5600	Advanced Biometrics and Cryptography		✔
CPSC 5610	Advanced Information Security Management
CPSC 5620	Computer Network Security		✔
CPSC 5640	Internet Security Protocols
CPSC 5660	System Vulnerability Analysis and Auditing	✔
CPSC 5680	Computer Forensics	✔
CPSC 5700	Advanced Computer Architecture
CPSC 5710	Microcomputer Systems Architecture
CPSC 5720	Real-Time Embedded Systems
CPSC 5800	Advanced Topics in Systems Software
CPSC 5820	Legacy Computing Systems

advantage of temporal networks

Li, ..., Barabasi, Science, 2017,
tempoal network advantages.

Energy needed from state vector x0 to final state xf
E(x0, xf) = 1/2 d^T x W^01_eff x d

where Weff encode the energy structure of the network.

I did not follow S1.1 method

logical puzzles

Logical puzzle YouTube, jellologic
https://www.youtube.com/watch?v=L_eTNclIKbQ

https://www.google.com/imgres?imgurl=https%3A%2F%2Fwww.woojr.com%2Fwp-content%2Fuploads%2F2018%2F08%2Fdifficult-logic-puzzle-kids-232x300.jpg&imgrefurl=https%3A%2F%2Fwww.woojr.com%2Fprintable-logic-puzzles-for-kids%2F&docid=s2zU7g6K-OmQ4M&tbnid=3r4TSXUQqlMTtM%3A&vet=10ahUKEwiLx4rxzf7lAhXOxFkKHbeTD64QMwhUKAcwBw..i&w=232&h=300&bih=852&biw=1870&q=logic%20puzzle%20examples%20with%20answers&ved=0ahUKEwiLx4rxzf7lAhXOxFkKHbeTD64QMwhUKAcwBw&iact=mrc&uact=8#h=300&imgdii=l2hOqZ9Mt4HwDM:&vet=10ahUKEwiLx4rxzf7lAhXOxFkKHbeTD64QMwhUKAcwBw..i&w=232

Thursday, November 21, 2019

hyper spectral images

plants
band interleaved by line (BIL) image encoding
https://www.loc.gov/preservation/digital/formats/fdd/fdd000304.shtml

Tuesday, November 19, 2019

REU .computational biology related

UCSF
https://globalprojects.ucsf.edu/project/nsf-reu-molecular-biosciences

Monday, November 18, 2019

clonal haematopoiesis f

2018 Jul;559(7714):350-355. doi: 10.1038/s41586-018-0321-x. Epub 2018 Jul 11.

Insights into clonal haematopoiesis from 8,342 mosaic chromosomal alterations

https://www.ncbi.nlm.nih.gov/pubmed/29995854

uncovered in blood-derived DNA from 151,202 UK Biobank participants using phase-based computational techniques (estimated false discovery rate, 6-9%).

Seems to be the first author Loh's postdoc work

Saturday, November 16, 2019

remove Google Drive File large cache

uninstall Google Drive File Stream

$hqin/Library/Application Support/Google/DriveFS
mv DriveFS DriveFS.old

reinstall Google Drive File Stream.

restart computer

Tuesday, November 12, 2019

monte carlo , markov chain

MCMC, markov chain monte carlo

http://www.phys.ubbcluj.ro/~zneda/edu/mc/mcshort.pdf

predatory journal list

Beall's List was a prominent list of predatory open-access publishers that was

https://en.wikipedia.org/wiki/Beall%27s_List

Friday, November 8, 2019

blogs on science, scRNA

http://www.nxn.se/

Thursday, November 7, 2019

test runner in Python

Python test runner

https://realpython.com/python-testing/#choosing-a-test-runner

Tuesday, November 5, 2019

*** Qin lab funding ackowledgments

For HYSAA:
We thank the support of NSF Career award #1453078 and #1720215, BD Spoke #1761839, and internal support of the University of Tennessee at Chattanooga.

For yeast aging:
We thank the support of NSF Career award #1453078 and #1720215, BD Spoke #1761839, REU #1852042, and internal support of the University of Tennessee at Chattanooga.

For REU:

REU #1852042

For Machine Learning
We thank the support of NSF Career award #1453078 and #1720215, BD Spoke #1761839, and internal support of the University of Tennessee at Chattanooga. TP, DM thanks the support of a DoD capacity building grant.

Cody: We thank the support of NSF Career award #1453078 and #1720215, BD Spoke #1761839, and internal support of the University of Tennessee at Chattanooga

Syed: BD Spoke #1761839

Allison: NSF Career award #1453078 and #1720215, BD Spoke #1761839,