README

# 2016-11-22
# downloaded GTex data to compare aging genes in my mouse data vs. human

GTEx_Analysis_v6p_RNA-seq_RNA-SeQCv1.1.8_gene_reads.gct
GTEx_Data_V6_Annotations_SubjectPhenotypes_DD.xlsx
GTEx_Data_V6_Annotations_SubjectPhenotypesDS.txt
GTEx_Data_V6_Annotations_SampleAttributesDS.txt


# 2016-12-1
Covariates used in eQTL analysis. Includes genotyping principal components and PEER factors.	GTEx_Analysis_v6p_eQTL_covariates.tar.gz	1.7M


=> may be needed to get aging data (see Yang et al, 2015 paper methods). 
Top 3 PCA axes for Genotyping are important as covariates!

http://www.gtexportal.org/home/datasets

#-------------------------------------------------------------------------------------
#GTEx_Analysis_v6_eQTLInputFiles_covariates.tar.gz
#-------------------------------------------------------------------------------------
#Tables of the covariate files used for matrix eqtl. All covariates in these files are used. They include 3 genotyping pcs, gender, genotyping platform (5M vs 2.5M), and a variable number of PEER factors.
#
#PEER factors are generated using the top 1000 expressed genes per tissue. The top 1000 genes are normalized with the same procedure as for the expression matrices. Number of PEERs is detrmined by N (number of samples per tissue)
#	N < 150, use 15  PEERs, 150<=N<250, use 30 PEERs, N >=250 use 35 PEERs




# 2016-12-14
# need to get only 1 entry per gene
# will get each with max count

./parse_genes.pl > 2016-11-30_GTex_Heart_Cerebellum_Liver.GNAME.txt
====> NO, the problem is all types of genes are included. Will need to parse out protein coding.


### 2016-12-16
# get gene names
cat /Volumes/MyBook_3/BD_aging_project/Public_datasets/GTex/2016-12-01_GTex_Heart_Cerebellum_Liver.txt | cut -f 1 | perl -lane '$_ =~ m/(.+)\.\d/; print "$_\t$1"' > 2016-12-16_GTEx_EnsGeneID_list.txt

#Talked with Joe from Montgomery lab
#V6P is on Gencode 19 (corresponds to Ens74/75; GRCh37.p13)

#use biomart Ens75 mirror
#http://feb2014.archive.ensembl.org/biomart/martview/0c085dcaa2ccf4996dda040f5116e823
2016-12-16_en75_human_mouse_orthologs.txt

#then get gene anems for mice orthologs
Mus musculus genes (GRCm38.p2)
2016-12-16_en75_mouse_geneNames.txt


# 2017-01-19
run GSEA on mouse RNAseq using significant human genes
will use FDR5% (male only)
http://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats#CLS:_Continuous_.28e.g_time-series_or_gene_profile.29_file_format_.28.2A.cls.29


#download names of mice orthologs
2016-12-16_en75_human_mouse_orthologs.txt




# 2017-03-15
# get genes lists of DE in mouse for GSEA
cat /Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/2015-11-19_Cerebellum_DESeq2_LINEAR_model_with_age\ _FDR5_genes_statistics.txt | \
	perl -lane 'next if ($. == 1); print "$F[0]" if ($F[2] > 0)' > Cerebellum_Mouse_Up_with_age.grp
cat /Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/2015-11-19_Cerebellum_DESeq2_LINEAR_model_with_age\ _FDR5_genes_statistics.txt | \
	perl -lane 'next if ($. == 1); print "$F[0]" if ($F[2] < 0)' > Cerebellum_Mouse_Down_with_age.grp

cat /Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/2015-11-19_Heart_DESeq2_LINEAR_model_with_age\ _FDR5_genes_statistics.txt | \
	perl -lane 'next if ($. == 1); print "$F[0]" if ($F[2] > 0)' > Heart_Mouse_Up_with_age.grp
cat /Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/2015-11-19_Heart_DESeq2_LINEAR_model_with_age\ _FDR5_genes_statistics.txt | \
	perl -lane 'next if ($. == 1); print "$F[0]" if ($F[2] < 0)' > Heart_Mouse_Down_with_age.grp

cat /Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/2015-11-19_Liver_DESeq2_LINEAR_model_with_age\ _FDR5_genes_statistics.txt  | \
	perl -lane 'next if ($. == 1); print "$F[0]" if ($F[2] > 0)' > Liver_Mouse_Up_with_age.grp
cat /Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/2015-11-19_Liver_DESeq2_LINEAR_model_with_age\ _FDR5_genes_statistics.txt  | \
	perl -lane 'next if ($. == 1); print "$F[0]" if ($F[2] < 0)' > Liver_Mouse_Down_with_age.grp
