This site is to serve as my note-book and to effectively communicate with my students and collaborators. Every now and then, a blog may be of interest to other researchers or teachers. Views in this blog are my own. All rights of research results and findings on this blog are reserved. See also http://youtube.com/c/hongqin @hongqin
Friday, December 27, 2019
*** useful R, Rstudio tips, update
Useful R tips, 20181227 update
timestamp = format(Sys.Date(), "%Y%m%d")
write.csv(dangtb, paste("DangProteo_MS02Zscore_summary_",timestamp, ".csv",sep='' ), quote =T, row.names = F )
title: "yeast PIN, rls ratio - CR prot"
author: "H Qin"
date: '`r paste( "2018-12-21 ~ ",format(Sys.Date(), "%Y-%B-%d"))`'
output:
pdf_document: default
html_document: default
Read csv is much faster than xlsx.
tb = read.csv(fullFileName, colClasses=c("character",NA, NA, "character", rep("numeric",8 ), NA));
tb = read.table("gene_association.sgd", skip=25, sep="\t", stringsAsFactors=FALSE, quote = "", row.names = NULL)
for( mymethod in mymethods ) {
hd = hclust( dist(ctb2), mymethod);
# plot( hd, main="hamming distance, ward linkage" )
coat.cat = cutree(hd, numclus ) ###<=== change is here
col.palette = c("red","brown","blue","green");
coat.color = col.palette[coat.cat]
library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(10,"RdBu"))(256);
hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(16);
#heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10) );
heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10),
RowSideColors=coat.color, ColSideColors = spec.colors,
hclustfun = function(c) hclust( c, method=mymethod),
#distfun = function(c) as.dist(hamming.distance(c)) #Hamming is less pleasant than Euclidean
main = mymethod
);
}
options(echo=TRUE) # if you want see commands in output file
args <- commandArgs(trailingOnly = TRUE)print(args)
# trailingOnly=TRUE means that only your arguments are returned, check:
# print(commandsArgs(trailingOnly=FALSE))i = as.integer(args[1])
j = as.integer(args[2])
x = seq(i, j)
print(x)
R -f R-args.R --args 2 5
Rscript file
#from lower case to upper case
require(stringr)
conditions$media[r] = str_replace( conditions$media[r], "\\/", "")
tb$AssignmentTotal= apply(tb[, assignments], 1, FUN=function(x){sum(x,na.rm=T)} )
http://cran.r-project.org/doc/FAQ/R-FAQ.html#How-can-I-create-rotated-axis-labels_003f
cumsum()
with()
R -f filename
axis( 2, at=pretty(tbf$s), tcl=0.2, las=2 ) #rotate axis labels
text( tb$G + 0.01*nchar(tb$strain)/4, log10(tb$R0)-0.1*nchar(tb$strain)/4, tb$strain, pos=3)
layout(mat, heights= c( 1.15, rep(1, nrow(mat)-2), 1.2) );
par(mar=c(5.1,4.1,4.1,2.1)
http://www.r-bloggers.com/setting-graph-margins-in-r-using-the-par-function-and-lots-of-cow-milk/
text ( aa, bb, t, cex=0.8);
####aplha
names(fit)[ grep("alpha", names(fit))]
fit_alpha_tb = data.frame( t( fit[, grep("alpha", names(fit)) ]))
rownames(fit_alpha_tb) = names(fit)[grep("alpha", names(fit))]
fit_alpha_tb$names = gsub("_.*", "", rownames(fit_alpha_tb))
library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(8);
hmcol = colorRampPalette(brewer.pal(3,"Blues"))(8);
http://www.r-bloggers.com/regular-expressions-in-r-vs-rstudio/
(WD <- getwd())
require(xlsx) # read Excel in R.
timestamp = format(Sys.Date(), "%Y%m%d")
write.csv(dangtb, paste("DangProteo_MS02Zscore_summary_",timestamp, ".csv",sep='' ), quote =T, row.names = F )
author: "H Qin"
date: '`r paste( "2018-12-21 ~ ",format(Sys.Date(), "%Y-%B-%d"))`'
output:
pdf_document: default
html_document: default
Read csv is much faster than xlsx.
tb = read.csv(fullFileName, colClasses=c("character",NA, NA, "character", rep("numeric",8 ), NA));
tb = read.table("gene_association.sgd", skip=25, sep="\t", stringsAsFactors=FALSE, quote = "", row.names = NULL)
for( mymethod in mymethods ) {
hd = hclust( dist(ctb2), mymethod);
# plot( hd, main="hamming distance, ward linkage" )
coat.cat = cutree(hd, numclus ) ###<=== change is here
col.palette = c("red","brown","blue","green");
coat.color = col.palette[coat.cat]
library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(10,"RdBu"))(256);
hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(16);
#heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10) );
heatmap( ctb2, col=hmcol, scale="none", margins = c(5,10),
RowSideColors=coat.color, ColSideColors = spec.colors,
hclustfun = function(c) hclust( c, method=mymethod),
#distfun = function(c) as.dist(hamming.distance(c)) #Hamming is less pleasant than Euclidean
main = mymethod
);
}
options(echo=TRUE) # if you want see commands in output file
args <- commandArgs(trailingOnly = TRUE)print(args)
# trailingOnly=TRUE means that only your arguments are returned, check:
# print(commandsArgs(trailingOnly=FALSE))i = as.integer(args[1])
j = as.integer(args[2])
x = seq(i, j)
print(x)
R -f R-args.R --args 2 5
Rscript file
#from lower case to upper case
chartr(old, new, x)
tolower(x)
toupper(x)
casefold(x, upper = FALSE)
conditions$media[r] = str_replace( conditions$media[r], "\\/", "")
tb$AssignmentTotal= apply(tb[, assignments], 1, FUN=function(x){sum(x,na.rm=T)} )
cumsum()
with()
R -f filename
axis( 2, at=pretty(tbf$s), tcl=0.2, las=2 ) #rotate axis labels
text( tb$G + 0.01*nchar(tb$strain)/4, log10(tb$R0)-0.1*nchar(tb$strain)/4, tb$strain, pos=3)
layout(mat, heights= c( 1.15, rep(1, nrow(mat)-2), 1.2) );
par(mar=c(5.1,4.1,4.1,2.1)
http://www.r-bloggers.com/setting-graph-margins-in-r-using-the-par-function-and-lots-of-cow-milk/
####aplha
names(fit)[ grep("alpha", names(fit))]
fit_alpha_tb = data.frame( t( fit[, grep("alpha", names(fit)) ]))
rownames(fit_alpha_tb) = names(fit)[grep("alpha", names(fit))]
fit_alpha_tb$names = gsub("_.*", "", rownames(fit_alpha_tb))
library(RColorBrewer);
#hmcol = colorRampPalette(brewer.pal(5,"RdBu"))(8);
hmcol = colorRampPalette(brewer.pal(3,"Blues"))(8);
format(Sys.time(), "%a %b %d %H:%M:%S %Y")
format(Sys.time(), "%Y%b%d_%H%M%S")
#regular expression
require(org.Sc.sgd.db) x <- org.Sc.sgdALIAS ls(x)[grep("^Y..\\d{3}", ls(x))]
http://www.regular-expressions.info/rlanguage.html
http://www.r-bloggers.com/regular-expressions-in-r-vs-rstudio/
list.files for the contents of a directory.
normalizePath for a ‘canonical’ path name.
(WD <- getwd())
if (!is.null(WD)) setwd(WD)
require(xlsx) # read Excel in R.
Usage
! x x & y x && y x | y x || y xor(x, y)
rm(list=ls() ); unlist(strsplit("a.b.c", "\\.")) ----- str(x) attributes(x) -------------- outer( month.abb, 1999:2003, FUn="paster"); Letters <- c( LETTERS, letters); Letters[ ! sapply(Letters, function(xx) exists(xx) ) ]; # anonymous function as a wrapper for a primitive function ------------ legend(100,60, seq(100,200,1), lty=1) # line legends Library(MASS); example(Skye); #tenary plot library(help = survivial) ColorBrewer.org useful comnds: x11; factor; relevel; class; loess; contour; is.element; math %in%; grep; sample; nrow; grepmisc: hist2d url() --- class and object CA@a[1] ---- test1 <- list( time= c(4, 3,1,1,2,2,3), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0), sex= c(0, 0,0,0,1,1,1)) coxph( Surv(time, status) ~ x + strata(sex), test1) #stratified model ---- delete NA form matrix > x<-matrix(1:16,4,4) > x[col(x)>=row(x)]<-NA > x[,! apply(x,2,function(x) all(is.na(x))) ] [,1] [,2] [,3] [1,] NA NA NA [2,] 2 NA NA [3,] 3 7 NA [4,] 4 8 12 ---- ? R/Splus Perl interface RSperl ? R Pythong interface Rpy Rpython not in CRAN ---- date.grouping <- function(d) { # for ea date in d calculate date beginning 6 month period which contains it mat <- matrix(as.numeric(unlist(strsplit(as.character(d),"-"))),nr=2) f <- function(x) do.call( "ISOdate", as.list(x) ) POSIXct.dates <- apply(rbind(mat,1),2,f) + ISOdate(1970,1,1) breaks <- c(seq(from=min(POSIXct.dates), to=max(POSIXct.dates), by="6 mo"), Inf) format( as.POSIXct( cut( POSIXct.dates, breaks, include.lowest=T )), "%Y-%m" ) } ---- nonlinear regression library(nls) ---- http://www.bioconductor.org/ ---- library(lattice) ---- persp() ---- las=1 or 2 You can use the graphics parameter "srt" to rotate displayed text by a specified number of degrees, e.g. srt=45 to put it on an angle, srt=90 to put it vertical. ---- cnams = dimnames(aa)[[2]] cnams[which(cnams == 'blah3.Mg')] = 'Mg (%)' ... dimnames(aa)[[2]] = cnams ---- eval(substitute(lf <- locfit(~s, data=age), list(s=s))) ------ sub=sort(sample(x,200, replace=F)) postscript("try.ps") matplot(x[sub],y[sub,],type="l",lwd=5) dev.off() -----
>Does anyone know if R has the functionality to calculate a simple
>moving average. I cant seem to find it in the help menu.
filter in library ts. does filter() do what you need?
Or look at the 'running' function in the gregmisc package.
moving.average <-
function(x, k) {
n <- length(x)
y <- rep(0, n)
for (i in (1+k):n)
y[i] <- mean(x[(i-k):i])
return(y)
}
----
tree packages
----
# Create an Example Data Frame Containing Car x Color data, with long car names
carnames <- c("BMW: High End, German",
"Renault: Medium End, French",
"Mercedes: High End, German",
"Seat: Imaginary, Unknown Producer")
carcolors <- c("red","white","silver","green")
datavals <- round(rnorm(16, mean=100, sd=60),1)
data <- data.frame(Car=rep(carnames,4),
Color=rep(carcolors, c(4,4,4,4) ),
Value=datavals )
# generate balloon plot with default scaling, the column labels will overlap
# balloonplot( data$Color, data$Car, data$Value)
# try again, with column labels rodated 90 degrees, and given more space
balloonplot( data$Car, data$Color, data$Value, colmar=3, colsrt=90)
----
Here is a very rough addlogo() using pixmap:
"addlogo" <- function(x, y, pixmap) {
if (is.list(x)) {
y <- x$y
x <- x$x
}
else if (missing(y))
stop("missing y")
if (!is.numeric(x) || !is.numeric(y))
stop("non-numeric coordinates")
if ((nx <- length(x)) <= 1 || nx != length(y) || nx > 2)
stop("invalid coordinate lengths")
pixmap@bbox[1] <- x[1]
pixmap@bbox[2] <- y[1]
pixmap@bbox[3] <- x[2]
pixmap@bbox[4] <- y[2]
pixmap@cellres[1] <- (pixmap@bbox[3] - pixmap@bbox[1]) / pixmap@size[2]
pixmap@cellres[2] <- (pixmap@bbox[4] - pixmap@bbox[2]) / pixmap@size[1]
plot(pixmap, add=TRUE)
invisible(pixmap)
}
which will work with locator() too. To maintain aspect, it shouldn't alter
the relative cell resolutions, and should just use the new x or y, bur
this is the general case. The handling of the location of the logo is
copied & pasted from legend().
----
x <- readLines(myfile)
strsplit(substring(x,8),split="")
----
Thursday, December 26, 2019
Wednesday, December 25, 2019
Atlanta graph lecture series
Atlanta Lecture Series in Combinatorics and Graph Theory XXIV (ALS 24)
Wednesday, December 18, 2019
Tuesday, December 17, 2019
HSYAA training data
Please check the link below :
You can find trap with more cells from “exC “ folder .
Monday, December 16, 2019
CITI training
Please create an account at CITI, associate your account with UTCOM Chattanooga, and then complete the basic human subject research modules.
Friday, December 13, 2019
Wednesday, December 11, 2019
Ghafari HYSAA github infor
Here is the link for GT repository.( Please check readme file for more details)
Currently, we are working on :
1) 100 Images without increasing resolution (60 x60 ) " 100IM_max3Cells_512x512_tiff “
2) 100 Images with increasing resolution (512 X512) “ 100IM_max3Cells_512x512_tiff_ CUBIC “
Saturday, December 7, 2019
Thursday, December 5, 2019
Monday, December 2, 2019
Friday, November 29, 2019
Tuesday, November 26, 2019
Friday, November 22, 2019
GPU request for data science
RE: GPU Workstation for enhancing experience learning of artificial intelligence in MSDA
We would like to request a Linux GPU workstation to enrich student experiential learning of artificial intelligence (AI) in the Master of Science in Data Analytics program (MSDA). GPU-based deep learning methods are the state-of-the-art AI method in data science. Computational training of deep-learning models with real-world big data is time-consuming with CPU or low-end GPUs. Lack of GPU computing power has prohibited many UTC students from applying deep learning methods to big data that are typically in the business world. The proposed Linux workstation will improve GPU access to students in several courses in the MSDA program, including CPSC 5440 Introduction to Machine Learning, CPSC5180 Programming Languages for Advanced Data Analytics, CPSC 5530 Data visualization and Exploration, and CPSC 5240 Principle of Data Analytics.
Precision 7920 Tower Workstation
 Intel Xeon Gold 6130 2.1GHz, 3.7GHz Turbo, 16C, 10.4GT/s 3UPI, 22MB Cache, HT (125W) DDR4-2666  Windows 10 Pro for Workstations (4 Cores Plus) Multi - English, French, Spanish  NVIDIA® Quadro® P2000, 5GB, 4 DP (7X20T)  32GB 4x8GB DDR4 2666MHz RDIMM ECC  3.5" 2TB 7200rpm SATA Hard Drive
$4,699.00
https://www.dell.com/en-us/work/shop/desktops-all-in-one-pcs/precision-7920-tower-workstation/spd/precision-7920-workstation/xctopt7920us_3
https://www.dell.com/al/business/p/precision-desktops?~ck=bt
Deep learning-based projects are popular choices for many undergraduate and graduates students. Almost all students in CPSC4180/5180 chose deep learning related course projects.
Some of my students are having trouble to get their deep learning model implanted and trained given the limited GPU computing resource and support we have. If the CSE department have our own Linux GUP workstations, our students could be more efficient and productive. Given that we expect more and more data science MS students, increasing GPU computing support seem to be strategically important for both education and research support,
The needs of machine learning and artificial intelligence are reflected by the recent Blue Sky initiative at our department and in our joint new program with College of Business of Data Analytics. GPU-based deep learning is an important skill and knowledge that our students should be trained with for their future employability. In order to provide experiential learning to the students in our department and the college, we need to provide the state-of-the-art deep learning training to our students in the field of artificial intelligence. Given the current cloud and virtual machine technology, GPU is still hard-linked with any virtual machine. So, in order to provide more GPU learning experiences to our students, we literally need to purchase more GPU hardware. It actually does not matter whether these GPU are hosted in a cloud or in workstations, because GPU cannot be virtualized to the best of our knowledge. Given that typical training in real-world data require long-computing time for deep-learning models, dedicated Linux nodes or workstations are the most practical ways to provide experience learning experiences for students to use real-world data for deep-learning projects. Our computers in 312 can be used by students to analyze toy-data, but not sufficient for any real-world data sets. In short, in order to provide real-world experiential learning experiences of AI to our students, we need provide the necessary GPU hardware to students.
We would like to request a Linux GPU workstation to enrich student experiential learning of artificial intelligence (AI) in the Master of Science in Data Analytics program (MSDA). GPU-based deep learning methods are the state-of-the-art AI method in data science. Computational training of deep-learning models with real-world big data is time-consuming with CPU or low-end GPUs. Lack of GPU computing power has prohibited many UTC students from applying deep learning methods to big data that are typically in the business world. The proposed Linux workstation will improve GPU access to students in several courses in the MSDA program, including CPSC 5440 Introduction to Machine Learning, CPSC5180 Programming Languages for Advanced Data Analytics, CPSC 5530 Data visualization and Exploration, and CPSC 5240 Principle of Data Analytics.
Precision 7920 Tower Workstation
 Intel Xeon Gold 6130 2.1GHz, 3.7GHz Turbo, 16C, 10.4GT/s 3UPI, 22MB Cache, HT (125W) DDR4-2666  Windows 10 Pro for Workstations (4 Cores Plus) Multi - English, French, Spanish  NVIDIA® Quadro® P2000, 5GB, 4 DP (7X20T)  32GB 4x8GB DDR4 2666MHz RDIMM ECC  3.5" 2TB 7200rpm SATA Hard Drive
$4,699.00
https://www.dell.com/en-us/work/shop/desktops-all-in-one-pcs/precision-7920-tower-workstation/spd/precision-7920-workstation/xctopt7920us_3
https://www.dell.com/al/business/p/precision-desktops?~ck=bt
Deep learning-based projects are popular choices for many undergraduate and graduates students. Almost all students in CPSC4180/5180 chose deep learning related course projects.
Some of my students are having trouble to get their deep learning model implanted and trained given the limited GPU computing resource and support we have. If the CSE department have our own Linux GUP workstations, our students could be more efficient and productive. Given that we expect more and more data science MS students, increasing GPU computing support seem to be strategically important for both education and research support,
The needs of machine learning and artificial intelligence are reflected by the recent Blue Sky initiative at our department and in our joint new program with College of Business of Data Analytics. GPU-based deep learning is an important skill and knowledge that our students should be trained with for their future employability. In order to provide experiential learning to the students in our department and the college, we need to provide the state-of-the-art deep learning training to our students in the field of artificial intelligence. Given the current cloud and virtual machine technology, GPU is still hard-linked with any virtual machine. So, in order to provide more GPU learning experiences to our students, we literally need to purchase more GPU hardware. It actually does not matter whether these GPU are hosted in a cloud or in workstations, because GPU cannot be virtualized to the best of our knowledge. Given that typical training in real-world data require long-computing time for deep-learning models, dedicated Linux nodes or workstations are the most practical ways to provide experience learning experiences for students to use real-world data for deep-learning projects. Our computers in 312 can be used by students to analyze toy-data, but not sufficient for any real-world data sets. In short, in order to provide real-world experiential learning experiences of AI to our students, we need provide the necessary GPU hardware to students.
advantage of temporal networks
Li, ..., Barabasi, Science, 2017,
tempoal network advantages.
Energy needed from state vector x0 to final state xf
E(x0, xf) = 1/2 d^T x W^01_eff x d
where Weff encode the energy structure of the network.
tempoal network advantages.
Energy needed from state vector x0 to final state xf
E(x0, xf) = 1/2 d^T x W^01_eff x d
where Weff encode the energy structure of the network.
I did not follow S1.1 method
logical puzzles
Logical puzzle YouTube, jellologic
https://www.youtube.com/watch?v=L_eTNclIKbQ
https://www.google.com/imgres?imgurl=https%3A%2F%2Fwww.woojr.com%2Fwp-content%2Fuploads%2F2018%2F08%2Fdifficult-logic-puzzle-kids-232x300.jpg&imgrefurl=https%3A%2F%2Fwww.woojr.com%2Fprintable-logic-puzzles-for-kids%2F&docid=s2zU7g6K-OmQ4M&tbnid=3r4TSXUQqlMTtM%3A&vet=10ahUKEwiLx4rxzf7lAhXOxFkKHbeTD64QMwhUKAcwBw..i&w=232&h=300&bih=852&biw=1870&q=logic%20puzzle%20examples%20with%20answers&ved=0ahUKEwiLx4rxzf7lAhXOxFkKHbeTD64QMwhUKAcwBw&iact=mrc&uact=8#h=300&imgdii=l2hOqZ9Mt4HwDM:&vet=10ahUKEwiLx4rxzf7lAhXOxFkKHbeTD64QMwhUKAcwBw..i&w=232
Thursday, November 21, 2019
hyper spectral images
plants
band interleaved by line (BIL) image encoding
https://www.loc.gov/preservation/digital/formats/fdd/fdd000304.shtml
Tuesday, November 19, 2019
Monday, November 18, 2019
clonal haematopoiesis f
Nature. 2018 Jul;559(7714):350-355. doi: 10.1038/s41586-018-0321-x. Epub 2018 Jul 11.
Insights into clonal haematopoiesis from 8,342 mosaic chromosomal alterations
uncovered in blood-derived DNA from 151,202 UK Biobank participants using phase-based computational techniques (estimated false discovery rate, 6-9%).
Seems to be the first author Loh's postdoc work
Saturday, November 16, 2019
remove Google Drive File large cache
uninstall Google Drive File Stream
$hqin/Library/Application Support/Google/DriveFS
mv DriveFS DriveFS.old
reinstall Google Drive File Stream.
restart computer
$hqin/Library/Application Support/Google/DriveFS
mv DriveFS DriveFS.old
reinstall Google Drive File Stream.
restart computer
Tuesday, November 12, 2019
predatory journal list
Beall's List was a prominent list of predatory open-access publishers that was
https://en.wikipedia.org/wiki/Beall%27s_List
Friday, November 8, 2019
Thursday, November 7, 2019
Tuesday, November 5, 2019
*** Qin lab funding ackowledgments
For HYSAA:
We thank the support of NSF Career award #1453078 and #1720215, BD Spoke #1761839, and internal support of the University of Tennessee at Chattanooga.
For yeast aging:
We thank the support of NSF Career award #1453078 and #1720215, BD Spoke #1761839, REU #1852042, and internal support of the University of Tennessee at Chattanooga.
For REU:
REU #1852042
For Machine Learning
We thank the support of NSF Career award #1453078 and #1720215, BD Spoke #1761839, and internal support of the University of Tennessee at Chattanooga. TP, DM thanks the support of a DoD capacity building grant.
Cody: We thank the support of NSF Career award #1453078 and #1720215, BD Spoke #1761839, and internal support of the University of Tennessee at Chattanooga
Syed: BD Spoke #1761839
Allison: NSF Career award #1453078 and #1720215, BD Spoke #1761839,
Subscribe to:
Posts (Atom)