# Can match files in .tex to computer and also vice versa.
# The first task is also done by script in folder "check_R_files_in_tex"

# make list of R files in RH_pools_1
ls -R /Users/dsmith/Work/RH_pools_1 |grep "\.R$" > /Users/dsmith/Desktop/R_files.txt


# make list of R files and xlsx files in supp.tex (test_supp.tex, a copy made for safety)
# grep -E '^\%.*\.R|.xlsx$' test_supp.tex > R_files_paper.txt 

# append list of R files and xlsx files in main.tex (test_main.tex, a copy made for safety)
# grep -E '^\%.*\.R|.xlsx$' test_main.tex >> R_files_paper.txt

# make list of R files in supp.tex (test_supp.tex, a copy made for safety)
grep -E '^\%.*\.R' test_supp.tex > R_files_paper.txt 

# append list of R files in main.tex (test_main.tex, a copy made for safety)
grep -E '^\%.*\.R' test_main.tex >> R_files_paper.txt


# Remove % from beginning of lines
sed -ie 's/% //g' R_files_paper.txt
sed -ie 's/%//g' R_files_paper.txt


# Remove space from beginning and end of lines
awk '{$1=$1};1' R_files_paper.txt > R_files_paper_2.txt

# Remove duplicate lines
sed '/^$/d' R_files_paper_2.txt  | awk '!a[$1]++' > R_files_paper_3.txt

# a small number of lines will require hand editing

# compare files in R_files.txt (files in RH_pools_1 folder) and R_files_paper_3.txt (R files mentioned in tex files)
# outputs lines unique to R_files_paper_3.txt (ie R files not mentioned in tex files)
# File empty: no missing files.


comm -13 <(sort -u R_files.txt) <(sort -u R_files_paper_3.txt) > files_missing_tex


# outputs lines unique to R_files.txt (ie R files not mentioned in RH_pools_1)
# Missing files nearly all to do with Hoffman, implicitly mentioned in tex files as part of folder, but not explicitly mentioned on a per file basis.
# Plus one file, "double_click_for_R_path.R", not mentioned in .tex files for good reason.


comm -13 <(sort -u R_files_paper_3.txt) <(sort -u R_files.txt) > files_missing_computer





