# Prepare reference genomes (other species)
Downloads reference genomes for other species from Ensembl.

## Create directory
```sh
cd genome/
mkdir -p other_species/chicken/
mkdir -p other_species/great_ape/
mkdir -p other_species/african_cichlid/
```

File needed for each:
* fasta
* fasta bgzipped + index
* gff annotation bgzipped + index

Copy this into a text editor. Use Find + Replace to change `filename` into species of interest.
```sh
# create uncompressed fasta and index
gunzip filename.fa.gz 
samtools faidx filename.fa

# create bgzip version and index for IGV
bgzip -c filename.fa > filename.fa.gz 
samtools faidx filename.fa.gz

# GFF sorting and indexing 
gunzip -c filename.gff.gz | grep -v ^"#" | sort -k1,1 -k4,4n | bgzip > filename.sorted.gff.gz
tabix -p gff filename.sorted.gff.gz
```

## Wider African cichlid radiation
```sh
# Astatotilpia calliptera
curl 'https://ftp.ensembl.org/pub/release-108/fasta/astatotilapia_calliptera/dna/Astatotilapia_calliptera.fAstCal1.2.dna.toplevel.fa.gz' -o astCal.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/astatotilapia_calliptera/Astatotilapia_calliptera.fAstCal1.2.108.gff3.gz' -o  astCal.gff.gz

# Astatotilapia burtoni (Burton's mouthbrooder)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/haplochromis_burtoni/dna/Haplochromis_burtoni.AstBur1.0.dna.toplevel.fa.gz' -o hapBur.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/haplochromis_burtoni/Haplochromis_burtoni.AstBur1.0.108.gff3.gz' -o hapBur.gff.gz

# Maylandia zebra (Lake Malawi)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/maylandia_zebra/dna/Maylandia_zebra.M_zebra_UMD2a.dna.toplevel.fa.gz' -o mayZeb.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/maylandia_zebra/Maylandia_zebra.M_zebra_UMD2a.108.gff3.gz' -o mayZeb.gff.gz

# Neolamprologus brichardi (Lyretail cichlid, Lake Tanganyika)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/neolamprologus_brichardi/dna/Neolamprologus_brichardi.NeoBri1.0.dna.toplevel.fa.gz' -o neoBri.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/neolamprologus_brichardi/Neolamprologus_brichardi.NeoBri1.0.108.gff3.gz' -o neoBri.gff.gz

# Pundamilia nyerei (Makobe island cichlid, Lake Victoria)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/pundamilia_nyererei/dna/Pundamilia_nyererei.PunNye1.0.dna.toplevel.fa.gz' -o punNye.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/pundamilia_nyererei/Pundamilia_nyererei.PunNye1.0.108.gff3.gz' -o punNye.gff.gz

# Amphilophus citrinellus (Midas cichlid)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/amphilophus_citrinellus/dna/Amphilophus_citrinellus.Midas_v5.dna.toplevel.fa.gz' -o ampCit.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/amphilophus_citrinellus/Amphilophus_citrinellus.Midas_v5.108.gff3.gz' -o ampCit.gff.gz
```

## Chicken (*Gallus gallus*)
```sh
# maternal broiler (main chicken genome from VGP, meat breed)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/gallus_gallus/dna/Gallus_gallus.bGalGal1.mat.broiler.GRCg7b.dna.toplevel.fa.gz' -o broiler.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/gallus_gallus/Gallus_gallus.bGalGal1.mat.broiler.GRCg7b.108.gff3.gz' -o  broiler.gff.gz

# paternal white leghorn (egg laying breed)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/gallus_gallus_gca016700215v2/dna/Gallus_gallus_gca016700215v2.bGalGal1.pat.whiteleghornlayer.GRCg7w.dna.toplevel.fa.gz' -o whiteleghorn.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/gallus_gallus_gca016700215v2/Gallus_gallus_gca016700215v2.bGalGal1.pat.whiteleghornlayer.GRCg7w.108.gff3.gz' -o  whiteleghorn.gff.gz

# red junglefowl (wild species believed to have contributed to the domestic gene pool)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/gallus_gallus_gca000002315v5/dna/Gallus_gallus_gca000002315v5.GRCg6a.dna.toplevel.fa.gz' -o redjunglefowl.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/gallus_gallus_gca000002315v5/Gallus_gallus_gca000002315v5.GRCg6a.108.gff3.gz' -o redjunglefowl.gff.gz
```

## Great apes
```sh
# Homo sapiens
curl 'https://ftp.ensembl.org/pub/release-108/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.toplevel.fa.gz' -o homSapGRCh38.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/homo_sapiens/Homo_sapiens.GRCh38.108.gff3.gz' -o homSapGRCh38.gff.gz

# without alternate contigs
curl 'https://ftp.ensembl.org/pub/release-108/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_rm.primary_assembly.fa.gz' -o homSapGRCh38_primary.fa.gz

# Homo sapiens (T2T) - UCSC
curl 'https://hgdownload.soe.ucsc.edu/goldenPath/hs1/bigZips/hs1.fa.gz' -o homSapT2T.fa.gz
curl 'https://hgdownload.soe.ucsc.edu/goldenPath/hs1/bigZips/genes/catLiftOffGenesV1.gff3.gz' -o homSapT2T.gff.gz

# Pan troglodytes
curl 'https://ftp.ensembl.org/pub/release-108/fasta/pan_troglodytes/dna/Pan_troglodytes.Pan_tro_3.0.dna.toplevel.fa.gz' -o panTro.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/pan_troglodytes/Pan_troglodytes.Pan_tro_3.0.108.gff3.gz' -o panTro.gff.gz

# Pan paniscus (Bonobo)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/pan_paniscus/dna/Pan_paniscus.panpan1.1.dna.toplevel.fa.gz' -o panPan.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/pan_paniscus/Pan_paniscus.panpan1.1.108.gff3.gz' -o panPan.gff.gz

# Gorilla gorilla
curl 'https://ftp.ensembl.org/pub/release-108/fasta/gorilla_gorilla/dna/Gorilla_gorilla.gorGor4.dna.toplevel.fa.gz' -o gorGor.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/gorilla_gorilla/Gorilla_gorilla.gorGor4.108.gff3.gz' -o gorGor.gff.gz

# Pongo abelii (Sumatran orangutan)
curl 'https://ftp.ensembl.org/pub/release-108/fasta/pongo_abelii/dna/Pongo_abelii.Susie_PABv2.dna.toplevel.fa.gz' -o ponAbe.fa.gz
curl 'https://ftp.ensembl.org/pub/release-108/gff3/pongo_abelii/Pongo_abelii.Susie_PABv2.108.gff3.gz' -o ponAbe.gff.gz
```


## Rename chromosomes
For `minigraph` genome graphs, the genomes need to renamed so the contig names are not repeated.

```sh
sed '/^>/ s/>/>astCal_/' astCal.fa > astCal.renamed.fa
sed '/^>/ s/>/>hapBur_/' hapBur.fa > hapBur.renamed.fa
sed '/^>/ s/>/>mayZeb_/' mayZeb.fa > mayZeb.renamed.fa
sed '/^>/ s/>/>neoBri_/' neoBri.fa > neoBri.renamed.fa
sed '/^>/ s/>/>punNye_/' punNye.fa > punNye.renamed.fa
sed '/^>/ s/>/>panPan_/' ampCit.fa > ampCit.renamed.fa

sed '/^>/ s/>/>broiler_/' broiler_mat_GRCg7b.fa > broiler_mat_GRCg7b.renamed.fa
sed '/^>/ s/>/>whiteleghorn_/' whiteleghorn_pat_GRCg7w.fa > whiteleghorn_pat_GRCg7w.renamed.fa
sed '/^>/ s/>/>redjunglefowl_/' redjunglefowl_GRCg6a.fa > redjunglefowl_GRCg6a.renamed.fa

sed '/^>/ s/>/>homSapT2T_/' homSap_t2t.fa > homSap_t2t.renamed.fa
sed '/^>/ s/>/>panTro_/' panTro.fa > panTro.renamed.fa
sed '/^>/ s/>/>panPan_/' panPan.fa > panPan.renamed.fa
sed '/^>/ s/>/>gorGor_/' gorGor.fa > gorGor.renamed.fa
sed '/^>/ s/>/>ponAbe_/' ponAbe.fa > ponAbe.renamed.fa
```