# Launch Pelican on rodents data

Use an VM from IFB (ifb_pelican)

## Install Pelican

```{bash}
sudo apt update

sudo apt install libgsl-dev libgdbm-dev libblas-dev liblapack-dev zlib1g-dev m4 pkg-config

sudo apt install opam
opam update && opam upgrade
 
opam pin add pelican https://gitlab.in2p3.fr/phoogle/pelican.git#main -n
opam pin add -y phylogenetics --dev-repo
opam install pelican

sudo apt install libgsl-dev libgdbm-dev libblas-dev liblapack-dev zlib1g-dev m4 pkg-config
pelican --version #56c0107
```

## import data


Data organization

```{bash}
#on ifb_pelican
cd $HOME/data/mydatalocal/pelican_on_rodents/data
tree
```

```{bash}
└── data
    ├── input_ali
    │   ├── aa_alignment_hmmcleaner_3min_sp_renamed_onlygap_removed_seuil_0.1
    │   │   ├── 0610010F05Rik_4Q37P.renamed.cleaned.aa.fasta
    │   │   ├── 0610010K14Rik_4Q11F.renamed.cleaned.aa.fasta
    │   │   ├── 0610030E20Rik_4Q7R6.renamed.cleaned.aa.fasta
 [...]
    └── trees_and_metadata
        ├── attributes_Seq_dataonline_20220630.tsv
        ├── local_online_species_tree_rep_8_20200605.raxml.bestTree
        ├── tree_dataset_ancient_trans_sequences_raw_sp.nw
        ├── tree_dataset_murinae_sequences_raw_sp.nw
        ├── tree_dataset_pairset4_sequences_raw_sp.nw
        ├── tree_dataset_pairset_sequences_raw_sp.nw
        ├── tree_dataset_recent_trans_sequences_raw_sp.nw
        └── tree_dataset_total_sequences_raw_sp.nw
    
```

# Build annotation files

```
awk -F '\t' '{print $4"\t"$8}' data/trees_and_metadata/attributes_Seq_dataonline_20220630.tsv > data/trees_and_metadata/aridity_2cat.tsv
#replace header
sed -i -e "s/leaf_name/species/"	-e "s/Aride/trait/" data/trees_and_metadata/aridity_2cat.tsv

head data/trees_and_metadata/aridity_2cat.tsv
#species	trait
#Abrothrix_longipilis	no
#Abrothrix_olivaceus	no
#Acomys_dimidiatus	yes
#Apodemus_mystacinus	yes
#Apodemus_sylvaticus	no
#Arvicanthis_niloticus_sen	yes
#Castor_canadensis	no
#Cavia_aperea	no
#Cavia_porcellus	no


awk -F '\t' '{print $4"\t"$7}' data/trees_and_metadata/attributes_Seq_dataonline_20220630.tsv > data/trees_and_metadata/aridity_bio17.tsv
#replace header
sed -i -e "s/leaf_name/species/"	-e "s/Precipitation_of_Driest_Quarter/trait/" data/trees_and_metadata/aridity_bio17.tsv


#fix a name in the tree such it is in the alignments
sed -i -e "s/Mastomys_natalensis/Arvicanthis_niloticus_lmo/" data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.bestTree
```


# Annotating the tree

trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat.nhx

(((((Cavia_porcellus:0.001021[&&NHX:Trait=no],Cavia_aperea:0.012147[&&NHX:Trait=no]):0.05172[&&NHX:Trait=no],(Octodon_degus:0.047878[&&NHX:Trait=yes],Chinchilla_lanigera:0.033098[&&NHX:Trait=yes]):0.006292[&&NHX:Trait=no]):0.010376[&&NHX:Trait=no],(((Fukomys_damarensis:0.001248[&&NHX:Trait=yes],Fukomys_micklemi:0.001559[&&NHX:Trait=yes]):0.0027[&&NHX:Trait=yes],Fukomys_mechowii:0.010047[&&NHX:Trait=yes]):0.030157[&&NHX:Trait=yes],Heterocephalus_glaber_male:0.028677[&&NHX:Trait=yes]):0.013505[&&NHX:Trait=no]):0.045014[&&NHX:Trait=no],((((Dipodomys_ordii:0.008023[&&NHX:Trait=yes],Dipodomys_spectabilis2:0.005105[&&NHX:Trait=yes]):0.025844[&&NHX:Trait=yes],(Heteromys_desmarestianus:0.04354[&&NHX:Trait=no],Chaetodipus_baileyi:0.034007[&&NHX:Trait=yes]):0.006977[&&NHX:Trait=no]):0.051905[&&NHX:Trait=no],Castor_canadensis:0.054651[&&NHX:Trait=no]):0.007763[&&NHX:Trait=no],(Jaculus_jaculus:0.080373[&&NHX:Trait=yes],((Nannospalax_galili:0.034088[&&NHX:Trait=yes],Eospalax_fontanierii:0.036899[&&NHX:Trait=yes]):0.02318[&&NHX:Trait=yes],((Dendromus_melanotis:0.037402[&&NHX:Trait=no],Saccostomus_campestris:0.02951[&&NHX:Trait=yes]):0.018766[&&NHX:Trait=no],((((((((Nannomys_indutus:0.008346[&&NHX:Trait=yes],Nannomys_cot:0.007386[&&NHX:Trait=rm]):0.008547[&&NHX:Trait=yes],(Mus_pahari:0.017377[&&NHX:Trait=no],(((Mus_spretus:0.003542[&&NHX:Trait=no],(Mus_spicilegus:0.003333[&&NHX:Trait=no],Mus_macedonicus:0.002161[&&NHX:Trait=yes]):0.001665[&&NHX:Trait=no]):0.000649[&&NHX:Trait=no],((Mus_musculus_ddo:0.000456[&&NHX:Trait=no],Mus_musculus_wla:0.000746[&&NHX:Trait=no]):0.001091[&&NHX:Trait=no],Mus_musculus:0.000478[&&NHX:Trait=no]):0.003003[&&NHX:Trait=no]):0.004701[&&NHX:Trait=no],Mus_caroli:0.008432[&&NHX:Trait=no]):0.00778[&&NHX:Trait=no]):0.001564[&&NHX:Trait=no]):0.013104[&&NHX:Trait=no],(((Arvicanthis_niloticus_lmo:0.002409[&&NHX:Trait=rm],Mastomys_erythroleucus:0.002564[&&NHX:Trait=yes]):0.000431[&&NHX:Trait=yes],Mastomys_coucha:0.00294[&&NHX:Trait=yes]):0.002352[&&NHX:Trait=yes],Mastomys_kollmanspergeri:0.005086[&&NHX:Trait=yes]):0.019999[&&NHX:Trait=yes]):0.002141[&&NHX:Trait=no],(Apodemus_sylvaticus:0.014473[&&NHX:Trait=no],Apodemus_mystacinus:0.011722[&&NHX:Trait=yes]):0.023283[&&NHX:Trait=no]):0.002795[&&NHX:Trait=no],(Micaelamys_namaquensis:0.013336[&&NHX:Trait=yes],Arvicanthis_niloticus_sen:0.016797[&&NHX:Trait=yes]):0.010146[&&NHX:Trait=no]):0.003027[&&NHX:Trait=no],Rattus_norvegicus:0.03572[&&NHX:Trait=no]):0.021109[&&NHX:Trait=no],(Meriones_unguiculatus:0.042296[&&NHX:Trait=yes],Acomys_dimidiatus:0.04584[&&NHX:Trait=yes]):0.007558[&&NHX:Trait=no]):0.006197[&&NHX:Trait=no],((((Peromyscus_eremicus:0.000675[&&NHX:Trait=yes],Peromyscus_leucopus:0.000586[&&NHX:Trait=rm]):0.009414[&&NHX:Trait=yes],Peromyscus_maniculatus_bairdii:0.016008[&&NHX:Trait=no]):0.022247[&&NHX:Trait=no],(Abrothrix_longipilis:0.002876[&&NHX:Trait=no],Abrothrix_olivaceus:0.002826[&&NHX:Trait=no]):0.037995[&&NHX:Trait=no]):0.002312[&&NHX:Trait=no],((Myodes_glareolus:0.014968[&&NHX:Trait=no],Microtus_ochrogaster:0.017028[&&NHX:Trait=no]):0.027231[&&NHX:Trait=no],(Mesocricetus_auratus:0.028857[&&NHX:Trait=yes],Cricetulus_griseus_crigri:0.02148[&&NHX:Trait=yes]):0.014058[&&NHX:Trait=yes]):0.001659[&&NHX:Trait=no]):0.010576[&&NHX:Trait=no]):0.003795[&&NHX:Trait=no]):0.032649[&&NHX:Trait=no]):0.015177[&&NHX:Trait=no]):0.015713[&&NHX:Trait=no]):0.008613[&&NHX:Trait=no]):0.019985[&&NHX:Trait=no],((Marmota_marmota_marmota:0.006209[&&NHX:Trait=no],((Ictidomys_tridecemlineatus:0.005022[&&NHX:Trait=no],Urocitellus_parryii:0.003641[&&NHX:Trait=yes]):0.000728[&&NHX:Trait=no],Spermophilus_dauricus:0.008035[&&NHX:Trait=yes]):0.00229[&&NHX:Trait=no]):0.016386[&&NHX:Trait=no],Tamias_sibiricus:0.019321[&&NHX:Trait=no]):0.019985[&&NHX:Trait=no]):0.0[&&NHX:Trait=no];
(Cavia_porcellus:0.0005105[&&NHX:Trait=no],(Cavia_aperea:0.012147[&&NHX:Trait=no],((Octodon_degus:0.047878[&&NHX:Trait=yes],Chinchilla_lanigera:0.033098[&&NHX:Trait=yes]):0.006292[&&NHX:Trait=no],((((Fukomys_damarensis:0.001248[&&NHX:Trait=yes],Fukomys_micklemi:0.001559[&&NHX:Trait=yes]):0.0027[&&NHX:Trait=yes],Fukomys_mechowii:0.010047[&&NHX:Trait=yes]):0.030157[&&NHX:Trait=yes],Heterocephalus_glaber_male:0.028677[&&NHX:Trait=yes]):0.013505[&&NHX:Trait=no],(((((Dipodomys_ordii:0.008023[&&NHX:Trait=yes],Dipodomys_spectabilis2:0.005105[&&NHX:Trait=yes]):0.025844[&&NHX:Trait=yes],(Heteromys_desmarestianus:0.04354[&&NHX:Trait=no],Chaetodipus_baileyi:0.034007[&&NHX:Trait=yes]):0.006977[&&NHX:Trait=no]):0.051905[&&NHX:Trait=no],Castor_canadensis:0.054651[&&NHX:Trait=no]):0.007763[&&NHX:Trait=no],(Jaculus_jaculus:0.080373[&&NHX:Trait=yes],((Nannospalax_galili:0.034088[&&NHX:Trait=yes],Eospalax_fontanierii:0.036899[&&NHX:Trait=yes]):0.02318[&&NHX:Trait=yes],((Dendromus_melanotis:0.037402[&&NHX:Trait=no],Saccostomus_campestris:0.02951[&&NHX:Trait=yes]):0.018766[&&NHX:Trait=no],((((((((Mus_pahari:0.017377[&&NHX:Trait=no],(((Mus_spretus:0.003542[&&NHX:Trait=no],(Mus_spicilegus:0.003333[&&NHX:Trait=no],Mus_macedonicus:0.002161[&&NHX:Trait=yes]):0.001665[&&NHX:Trait=no]):0.000649[&&NHX:Trait=no],((Mus_musculus_ddo:0.000456[&&NHX:Trait=no],Mus_musculus_wla:0.000746[&&NHX:Trait=no]):0.001091[&&NHX:Trait=no],Mus_musculus:0.000478[&&NHX:Trait=no]):0.003003[&&NHX:Trait=no]):0.004701[&&NHX:Trait=no],Mus_caroli:0.008432[&&NHX:Trait=no]):0.00778[&&NHX:Trait=no]):0.001564[&&NHX:Trait=no],Nannomys_indutus:0.016893[&&NHX:Trait=yes]):0.013104[&&NHX:Trait=no],((Mastomys_coucha:0.00294[&&NHX:Trait=yes],Mastomys_erythroleucus:0.002995[&&NHX:Trait=yes]):0.002352[&&NHX:Trait=yes],Mastomys_kollmanspergeri:0.005086[&&NHX:Trait=yes]):0.019999[&&NHX:Trait=yes]):0.002141[&&NHX:Trait=no],(Apodemus_sylvaticus:0.014473[&&NHX:Trait=no],Apodemus_mystacinus:0.011722[&&NHX:Trait=yes]):0.023283[&&NHX:Trait=no]):0.002795[&&NHX:Trait=no],(Micaelamys_namaquensis:0.013336[&&NHX:Trait=yes],Arvicanthis_niloticus_sen:0.016797[&&NHX:Trait=yes]):0.010146[&&NHX:Trait=no]):0.003027[&&NHX:Trait=no],Rattus_norvegicus:0.03572[&&NHX:Trait=no]):0.021109[&&NHX:Trait=no],(Meriones_unguiculatus:0.042296[&&NHX:Trait=yes],Acomys_dimidiatus:0.04584[&&NHX:Trait=yes]):0.007558[&&NHX:Trait=no]):0.006197[&&NHX:Trait=no],(((Peromyscus_maniculatus_bairdii:0.016008[&&NHX:Trait=no],Peromyscus_eremicus:0.010089[&&NHX:Trait=yes]):0.022247[&&NHX:Trait=no],(Abrothrix_longipilis:0.002876[&&NHX:Trait=no],Abrothrix_olivaceus:0.002826[&&NHX:Trait=no]):0.037995[&&NHX:Trait=no]):0.002312[&&NHX:Trait=no],((Myodes_glareolus:0.014968[&&NHX:Trait=no],Microtus_ochrogaster:0.017028[&&NHX:Trait=no]):0.027231[&&NHX:Trait=no],(Mesocricetus_auratus:0.028857[&&NHX:Trait=yes],Cricetulus_griseus_crigri:0.02148[&&NHX:Trait=yes]):0.014058[&&NHX:Trait=yes]):0.001659[&&NHX:Trait=no]):0.010576[&&NHX:Trait=no]):0.003795[&&NHX:Trait=no]):0.032649[&&NHX:Trait=no]):0.015177[&&NHX:Trait=no]):0.015713[&&NHX:Trait=no]):0.008613[&&NHX:Trait=no],((Marmota_marmota_marmota:0.006209[&&NHX:Trait=no],((Ictidomys_tridecemlineatus:0.005022[&&NHX:Trait=no],Urocitellus_parryii:0.003641[&&NHX:Trait=yes]):0.000728[&&NHX:Trait=no],Spermophilus_dauricus:0.008035[&&NHX:Trait=yes]):0.00229[&&NHX:Trait=no]):0.016386[&&NHX:Trait=no],Tamias_sibiricus:0.019321[&&NHX:Trait=no]):0.03997[&&NHX:Trait=no]):0.045014[&&NHX:Trait=no]):0.010376[&&NHX:Trait=no]):0.05172[&&NHX:Trait=no]):0.0005105[&&NHX:Trait=no]);

```
python src/remove_leaves.py 
```
trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered.nhx 
(((((Cavia_porcellus:0.001021[&&NHX:Trait=no],Cavia_aperea:0.012147[&&NHX:Trait=no]):0.05172[&&NHX:Trait=no],(Octodon_degus:0.047878[&&NHX:Trait=yes],Chinchilla_lanigera:0.033098[&&NHX:Trait=yes]):0.006292[&&NHX:Trait=no]):0.010376[&&NHX:Trait=no],(((Fukomys_damarensis:0.001248[&&NHX:Trait=yes],Fukomys_micklemi:0.001559[&&NHX:Trait=yes]):0.0027[&&NHX:Trait=yes],Fukomys_mechowii:0.010047[&&NHX:Trait=yes]):0.030157[&&NHX:Trait=yes],Heterocephalus_glaber_male:0.028677[&&NHX:Trait=yes]):0.013505[&&NHX:Trait=no]):0.045014[&&NHX:Trait=no],((((Dipodomys_ordii:0.008023[&&NHX:Trait=yes],Dipodomys_spectabilis2:0.005105[&&NHX:Trait=yes]):0.025844[&&NHX:Trait=yes],(Heteromys_desmarestianus:0.04354[&&NHX:Trait=no],Chaetodipus_baileyi:0.034007[&&NHX:Trait=yes]):0.006977[&&NHX:Trait=no]):0.051905[&&NHX:Trait=no],Castor_canadensis:0.054651[&&NHX:Trait=no]):0.007763[&&NHX:Trait=no],(Jaculus_jaculus:0.080373[&&NHX:Trait=yes],((Nannospalax_galili:0.034088[&&NHX:Trait=yes],Eospalax_fontanierii:0.036899[&&NHX:Trait=yes]):0.02318[&&NHX:Trait=yes],((Dendromus_melanotis:0.037402[&&NHX:Trait=no],Saccostomus_campestris:0.02951[&&NHX:Trait=yes]):0.018766[&&NHX:Trait=no],((((((((Mus_pahari:0.017377[&&NHX:Trait=no],(((Mus_spretus:0.003542[&&NHX:Trait=no],(Mus_spicilegus:0.003333[&&NHX:Trait=no],Mus_macedonicus:0.002161[&&NHX:Trait=yes]):0.001665[&&NHX:Trait=no]):0.000649[&&NHX:Trait=no],((Mus_musculus_ddo:0.000456[&&NHX:Trait=no],Mus_musculus_wla:0.000746[&&NHX:Trait=no]):0.001091[&&NHX:Trait=no],Mus_musculus:0.000478[&&NHX:Trait=no]):0.003003[&&NHX:Trait=no]):0.004701[&&NHX:Trait=no],Mus_caroli:0.008432[&&NHX:Trait=no]):0.00778[&&NHX:Trait=no]):0.001564[&&NHX:Trait=no],Nannomys_indutus:0.016893[&&NHX:Trait=yes]):0.013104[&&NHX:Trait=no],((Mastomys_coucha:0.00294[&&NHX:Trait=yes],Mastomys_erythroleucus:0.002995[&&NHX:Trait=yes]):0.002352[&&NHX:Trait=yes],Mastomys_kollmanspergeri:0.005086[&&NHX:Trait=yes]):0.019999[&&NHX:Trait=yes]):0.002141[&&NHX:Trait=no],(Apodemus_sylvaticus:0.014473[&&NHX:Trait=no],Apodemus_mystacinus:0.011722[&&NHX:Trait=yes]):0.023283[&&NHX:Trait=no]):0.002795[&&NHX:Trait=no],(Micaelamys_namaquensis:0.013336[&&NHX:Trait=yes],Arvicanthis_niloticus_sen:0.016797[&&NHX:Trait=yes]):0.010146[&&NHX:Trait=no]):0.003027[&&NHX:Trait=no],Rattus_norvegicus:0.03572[&&NHX:Trait=no]):0.021109[&&NHX:Trait=no],(Meriones_unguiculatus:0.042296[&&NHX:Trait=yes],Acomys_dimidiatus:0.04584[&&NHX:Trait=yes]):0.007558[&&NHX:Trait=no]):0.006197[&&NHX:Trait=no],(((Peromyscus_maniculatus_bairdii:0.016008[&&NHX:Trait=no],Peromyscus_eremicus:0.010089[&&NHX:Trait=yes]):0.022247[&&NHX:Trait=no],(Abrothrix_longipilis:0.002876[&&NHX:Trait=no],Abrothrix_olivaceus:0.002826[&&NHX:Trait=no]):0.037995[&&NHX:Trait=no]):0.002312[&&NHX:Trait=no],((Myodes_glareolus:0.014968[&&NHX:Trait=no],Microtus_ochrogaster:0.017028[&&NHX:Trait=no]):0.027231[&&NHX:Trait=no],(Mesocricetus_auratus:0.028857[&&NHX:Trait=yes],Cricetulus_griseus_crigri:0.02148[&&NHX:Trait=yes]):0.014058[&&NHX:Trait=yes]):0.001659[&&NHX:Trait=no]):0.010576[&&NHX:Trait=no]):0.003795[&&NHX:Trait=no]):0.032649[&&NHX:Trait=no]):0.015177[&&NHX:Trait=no]):0.015713[&&NHX:Trait=no]):0.008613[&&NHX:Trait=no]):0.019985[&&NHX:Trait=no],((Marmota_marmota_marmota:0.006209[&&NHX:Trait=no],((Ictidomys_tridecemlineatus:0.005022[&&NHX:Trait=no],Urocitellus_parryii:0.003641[&&NHX:Trait=yes]):0.000728[&&NHX:Trait=no],Spermophilus_dauricus:0.008035[&&NHX:Trait=yes]):0.00229[&&NHX:Trait=no]):0.016386[&&NHX:Trait=no],Tamias_sibiricus:0.019321[&&NHX:Trait=no]):0.019985[&&NHX:Trait=no]):0[&&NHX:Trait=no];

## Murinae

```
python src/keep_murinae_leaves.py
```
((((((Mus_pahari:0.017377[&&NHX:Trait=no],(((Mus_spretus:0.003542[&&NHX:Trait=no],(Mus_spicilegus:0.003333[&&NHX:Trait=no],Mus_macedonicus:0.002161[&&NHX:Trait=yes]):0.001665[&&NHX:Trait=no]):0.000649[&&NHX:Trait=no],((Mus_musculus_ddo:0.000456[&&NHX:Trait=no],Mus_musculus_wla:0.000746[&&NHX:Trait=no]):0.001091[&&NHX:Trait=no],Mus_musculus:0.000478[&&NHX:Trait=no]):0.003003[&&NHX:Trait=no]):0.004701[&&NHX:Trait=no],Mus_caroli:0.008432[&&NHX:Trait=no]):0.00778[&&NHX:Trait=no]):0.001564[&&NHX:Trait=no],Nannomys_indutus:0.016893[&&NHX:Trait=yes]):0.013104[&&NHX:Trait=no],((Mastomys_coucha:0.00294[&&NHX:Trait=yes],Mastomys_erythroleucus:0.002995[&&NHX:Trait=yes]):0.002352[&&NHX:Trait=yes],Mastomys_kollmanspergeri:0.005086[&&NHX:Trait=yes]):0.019999[&&NHX:Trait=yes]):0.002141[&&NHX:Trait=no],(Apodemus_sylvaticus:0.014473[&&NHX:Trait=no],Apodemus_mystacinus:0.011722[&&NHX:Trait=yes]):0.023283[&&NHX:Trait=no]):0.002795[&&NHX:Trait=no],Micaelamys_namaquensis:0.023482[&&NHX:Trait=yes]):0.003027[&&NHX:Trait=no],Rattus_norvegicus:0.03572[&&NHX:Trait=no]):0.123238[&&NHX:Trait=no];

Modifier avec itol l'arrangement des feuilles
(Rattus_norvegicus:0.03572,(Micaelamys_namaquensis:0.023482,((Apodemus_sylvaticus:0.014473,Apodemus_mystacinus:0.011722):0.023283[&&NHX:Trait=no],((Nannomys_indutus:0.016893,((Mus_caroli:0.008432,((Mus_spretus:0.003542,(Mus_macedonicus:0.002161,Mus_spicilegus:0.003333):0.001665[&&NHX:Trait=no]):0.000649[&&NHX:Trait=no],(Mus_musculus:0.000478,(Mus_musculus_wla:0.000746,Mus_musculus_ddo:0.000456):0.001091[&&NHX:Trait=no]):0.003003[&&NHX:Trait=no]):0.004701[&&NHX:Trait=no]):0.00778[&&NHX:Trait=no],Mus_pahari:0.017377):0.001564[&&NHX:Trait=no]):0.013104[&&NHX:Trait=no],(Mastomys_kollmanspergeri:0.005086,(Mastomys_erythroleucus:0.002995,Mastomys_coucha:0.00294):0.002352[&&NHX:Trait=yes]):0.019999[&&NHX:Trait=yes]):0.002141[&&NHX:Trait=no]):0.002795[&&NHX:Trait=no]):0.003027[&&NHX:Trait=no]);

# Remove species from ali
Nannomys_cot
Arvicanthis_niloticus_lmo
Peromyscus_leucopus

```
python src/remove_sp_from_ali.py 
```

# Global analysis


## 2 aridity categories

```

#MURINAE
pelican scan discrete --alphabet=AA \
                      --tree=data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered_murinae.nhx \
                      --alignment=data/aa_alignment_hmmcleaner_3min_sp_renamed_onlygap_removed_seuil_0.1_only_in_dataset_murinae \
                      --progress-bar \
                      --multinomial-filter=0.99 \
                      --output=output_pelican_2025_Carine/murinae/aridity_2cats_aa_alignment_hmmcleaner_3min_sp_renamed_onlygap_removed_seuil_0.1_filtered_sequences_M1 \
                      --threads=32

#TOTAL                
pelican scan discrete --alphabet=AA \
                      --tree=data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered.nhx \
                      --alignment=data/aa_alignment_hmmcleaner_3min_sp_renamed_onlygap_removed_seuil_0.1_filtered_sequences_total_and_simu \
                      --progress-bar \
                      --multinomial-filter=0.99 \
                      --output=output_pelican_2025_Carine/total/aridity_2cats_aa_alignment_hmmcleaner_3min_sp_renamed_onlygap_removed_seuil_0.1_filtered_sequences_M1_with_simu \
                      --threads=32       

#MURINAE SPIKE
pelican scan discrete --alphabet=AA \
                      --tree=data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered_murinae.nhx \
                      --alignment=src/spike_aa_data/murinae/ \
                      --progress-bar \
                      --multinomial-filter=0.99 \
                      --output=output_pelican_2025_Carine/murinae/aridity_2cats_aa_alignment_murinae_spike_simu \
                      --threads=32

#TOTAL    SPIKE            
pelican scan discrete --alphabet=AA \
                      --tree=data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered.nhx \
                      --alignment=src/spike_aa_data/total/ \
                      --progress-bar \
                      --multinomial-filter=0.99 \
                      --output=output_pelican_2025_Carine/total/aridity_2cats_aa_alignment_total_spike_simu \
                      --threads=32    
                      

```


# subset ali according sub dataset

```

ali_dir="aa_alignment_hmmcleaner_3min_sp_renamed_onlygap_removed_seuil_0.1_filtered_sequences/"
ali_dir="nt_alignment_hmmcleaner_3min_sp_renamed_onlygap_removed_seuil_0.1_filtered_sequences/"


for dataset in ancient recent murinae
do

output_ali_dir="nt_alignment_hmmcleaner_3min_sp_renamed_onlygap_removed_seuil_0.1_only_in_dataset_${dataset}/"
mkdir -p $output_ali_dir

for fasta in $ali_dir/B*fasta
do
prefix=$(basename $fasta)
echo $prefix


output_fa="$output_ali_dir/$prefix"

seqtk subseq $fasta trees_and_metadata/species_list_${dataset}.txt > $output_fa


nb_seq_aride=$(grep  ">" $output_fa | grep -f trees_and_metadata/species_list_${dataset}_aride.txt -c )
nb_seq_mesique=$(grep  ">" $output_fa | grep -v -f trees_and_metadata/species_list_${dataset}_aride.txt -c )

#echo aride $nb_seq_aride 
#echo mesique $nb_seq_mesique 

if [ $nb_seq_aride -lt 3 ]
then

#echo rm $output_fa
rm $output_fa
fi

if [ $nb_seq_mesique -lt 5 ]
then

#echo rm $output_fa
rm $output_fa
fi

done

done
```


# Negative control

##Install pastek


```
opam pin add pastek "https://gitlab.in2p3.fr/pveber/pastek.git#main" -n
opam install pastek

pastek multiselreg --help


       

#--selreg-weights=10,20,30,40 
#simulates 50 sites into file data/aln.fasta, saves the site-wise
#types of evolution in data/regimes.txt, with the following
#proportions of selective regimes: 10% of the sites evolve neutrally,
#20% according to purifying selection, 30% according to persistent
#positive selection, and 40% have profile changes.

```


```
mkdir -p output_pelican_2025_Carine/murinae/pastek/aln output_pelican_2025_Carine/murinae/pastek/regimes


pastek multiselreg --alignment-output=output_pelican_2025_Carine/murinae/pastek/aln/aln_neutrally_Ne_1000.fasta \
                   --Ne 1000 --selreg-weights=100,0,0,0  --nsites=100000 --seed=123 \
                   --selreg-output=output_pelican_2025_Carine/murinae/pastek/regimes/regimes_neutrally_Ne_1000.txt \
                   --tree=data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered_murinae.nhx

pelican scan discrete --alphabet=AA \
                      --tree=data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered.nhx \
                      --alignment=output_pelican_2025_Carine/murinae/pastek/aln/ \
                      --progress-bar \
                      --multinomial-filter=0.99 \
                      --output=output_pelican_2025_Carine/murinae/murinae_neutrally \
                      --threads=32    
                      
mkdir -p  output_pelican_2025_Carine/total/pastek/aln output_pelican_2025_Carine/total/pastek/regimes


pastek multiselreg --alignment-output=output_pelican_2025_Carine/total/pastek/aln/aln_neutrally_Ne_1000.fasta \
                   --Ne 1000 --selreg-weights=100,0,0,0 --nsites=100000 --seed=123 \
                   --selreg-output=output_pelican_2025_Carine/total/pastek/regimes/regimes_neutrally_Ne_1000.txt \
                   --tree=data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered.nhx 



pelican scan discrete --alphabet=AA \
                      --tree=data/trees_and_metadata/local_online_species_tree_rep_8_20200605.raxml.annotated_habitat_filtered.nhx \
                      --alignment=output_pelican_2025_Carine/total/pastek/aln/ \
                      --progress-bar \
                      --multinomial-filter=0.99 \
                      --output=output_pelican_2025_Carine/total/total_neutrally \
                      --threads=32    
                      
           
```       
       
       
       
       
       
       
       
       
       
       



