Open Notebook: SQL

Saturday, December 27, 2014

SQLite3 code on rls.db

file 'test_rls.sql'

.open rls.db
.databases
.tables
.separator ::
.headers on
.mode column
select distinct experiment from result_experiment limit 20;
.indices
.indices set
.width 5
select * from result limit 1;

/* The following select can take rls and its reference rls */
select experiments,set_name,set_strain,set_background,set_genotype,
set_lifespan_mean,ref_genotype,ref_lifespan_mean
from result limit 2;

/* The fields of set_name and set_genotype sometimes provide the ORF-name pair, but there are many exceptions. */

mysql tips

MYSQL tips "mysql.txt" file

show tables like "h%";

select * form someTable into outfile "/tmp/tmpfile";

create temporary table tmptab select distinct id1 from sampleTab1 UNION ALL
select distinct id2 from sampleTab2;

grant ALL on homo_sapiens_core_17_33.* to hqin@localhost;

SUBSTRING(str,pos,len)
SUBSTRING(str FROM pos FOR len)

MID(str,pos,len)
#Returns a substring len characters long from string str, starting at position pos.
#The variant form that uses FROM is SQL-92 syntax:

mysql> SELECT SUBSTRING('Quadratically',5,6);
-> 'ratica'

mysqldump test name --no-data --no-create-db > tmp.dump
mysqlimport -u root -h shanghai hong_database *.txt.table

/* try left, inner, outer join to see what's missing */
mysql> create temporary table bader2gu
-> select orf, Name1
-> from curagenOrf2name left join Ks_Ka_Yeast_Ca
-> on curagenOrf2name.orf = Ks_Ka_Yeast_Ca.Name1;
Query OK, 6268 rows affected (54.27 sec)
Records: 6268 Duplicates: 0 Warnings: 0

mysql> select * from bader2gu where Name1 is NULL;
/* return 4313 rows */

mysql> select * from bader2gu where Name1 is not NULL;
/* return 1955 rows. Note, one record is missing probably
due to different annotations bw curagen and the public release from SGD
*/

SQLite 3, osX, byte, rls.db

Reference: http://www.sqlite.org/cli.html

#I want to install SQLite to load 'rls.db'.

$ sudo port install sqlite3

#OK

#how to load 'rls.db' ?

$ sqlite3

SQLite version 3.8.7.4 2014-12-09 01:34:36

Enter ".help" for usage hints.

Connected to a transient in-memory database.

Use ".open FILENAME" to reopen on a persistent database.

sqlite> .open rls.db

sqlite> .databases

seq name file

--- --------------- ----------------------------------------------------------

0 main /Users/hqin/projects/0.network.aging.prj/4.svm/rls.db

sqlite> .tables

build_log genotype_pubmed_id result_experiment set

cross_mating_type meta result_ref yeast_strain

cross_media result result_set

sqlite> .indices

build_log_filename

cross_mating_type_background

cross_mating_type_genotype

cross_mating_type_locus_tag

cross_mating_type_media

cross_mating_type_temperature

cross_media_background

cross_media_genotype

cross_media_locus_tag

cross_media_mating_type

cross_media_temperature

genotype_pubmed_id_genotype

genotype_pubmed_id_pubmed_id

meta_name

result_experiment_experiment

result_experiment_result_id

result_percent_change

result_pooled_by

result_ranksum_p

result_ref_background

result_ref_genotype

result_ref_locus_tag

result_ref_mating_type

result_ref_media

result_ref_name

result_ref_result_id

result_ref_set_id

result_ref_strain

result_ref_temperature

result_set_background

result_set_genotype

result_set_lifespan_mean

result_set_locus_tag

result_set_mating_type

result_set_media

result_set_name

result_set_result_id

result_set_set_id

result_set_strain

result_set_temperature

set_experiment

set_media

set_name

set_strain

set_temperature

yeast_strain_background

yeast_strain_genotype_short

yeast_strain_genotype_unique

yeast_strain_mating_type

yeast_strain_name

yeast_strain_owner

sqlite> select distinct experiment from result_experiment limit 20;

experiment

100

101

102_plate115

103

104

105

106_plate116

107

108_plate117

...

sqlite> .separator :::

sqlite> select * from result limit 2;

id:::experiments:::set_name:::set_strain:::set_background:::set_mating_type:::set_locus_tag:::set_genotype:::set_media:::set_temperature:::set_lifespan_start_count:::set_lifespan_count:::set_lifespan_mean:::set_lifespan_stdev:::set_lifespans:::ref_name:::ref_strain:::ref_background:::ref_mating_type:::ref_locus_tag:::ref_genotype:::ref_media:::ref_temperature:::ref_lifespan_start_count:::ref_lifespan_count:::ref_lifespan_mean:::ref_lifespan_stdev:::ref_lifespans:::percent_change:::ranksum_u:::ranksum_p:::pooled_by

1:::127:::BY4741:::KK19:::BY4741:::MATa::::::BY4741:::YPD:::30.0:::20:::20:::30.3:::7.526095:::23,26,34,31,22,37,26,39,22,36,38,24,36,40,26,38,38,17,34,19:::BY4742:::DH502:::BY4742:::MATalpha::::::BY4742:::YPD:::30.0:::40:::40:::29.625:::8.279377:::36,26,15,28,16,44,40,28,25,32,24,29,39,37,30,31,14,17,29,28,44,27,38,29,26,39,38,32,34,33,32,38,16,28,31,11,20,39,30,32:::2.278481:::409.0:::0.8916505557143:::file

2:::127:::ymr226c:::DC:4G4:::BY4741:::MATa::::::tma29:::YPD:::30.0:::20:::20:::27.1:::11.702:::24,11,37,32,41,38,12,11,31,23,39,36,22,19,28,36,24,49,24,5:::BY4741:::KK19:::BY4741:::MATa::::::BY4741:::YPD:::30.0:::20:::20:::30.3:::7.526095:::23,26,34,31,22,37,26,39,22,36,38,24,36,40,26,38,38,17,34,19:::-10.56106:::169.5:::0.4163969339623:::file
#Notes, field 'experiments' in 'result' maybe used to find the in-experiment wildtype controls.
# Ken once suggested that "pooled by" column?? file, genotype, mixed
# set lifespan
# ref lifespan

select experiments,set_name,set_strain,set_background,set_genotype,
set_lifespan_mean,ref_genotype,ref_lifespan_mean

from result limit 2;