library(tidyverse)

files<- list.files()
theData_list <- lapply(files, function(file) {
  df <- read.table(file, header = T) %>% pivot_longer(2:last_col())
  df$file_name <- file  # Add the file name as a new column
  return(df)
})

dat <- bind_rows(theData_list)
names(dat)[1] <- "Type"
dat$real = "False"

dat <- dat %>% 
  group_by(Type, real, file_name) %>% 
  mutate(value = value / sum(value))

real <- read.table("Assignment_Solution_Activities.txt", header=T) %>% 
  pivot_longer(2:last_col()) %>% 
  filter(Samples %in% c("Natural", "ART")) %>% 
  mutate(real="True") %>% 
  group_by(Samples, real) %>% 
  mutate(value = value / sum(value))
real$Type = real$Samples

full <- bind_rows(dat, real)

remove <- full %>% group_by(name) %>% summarise(s=sum(value)) %>% filter(s == 0)

keep <- unique(real[real$value >0,]$name)

pvals <- full %>%
  filter(name %in% keep) %>% 
  group_by(Type, name) %>%
  summarize(null_dist = list(value), .groups = "drop") %>%
  inner_join(real %>% ungroup%>% select(Type, name, observed=value), by = c("Type", "name")) %>%
  rowwise() %>%
  mutate(
    p_upper = mean(null_dist >= observed),
    p_lower = mean(null_dist <= observed),
    p_two_sided = 2 * min(p_upper, p_lower)
  ) %>%
  ungroup() %>%
  mutate(fdr = p.adjust(p_two_sided, method = "BH"))
pvals

write.csv(pvals %>% select(-null_dist),
          "comparisons_to_null.csv", row.names=F)

perms_diff <- full %>%
  filter(name %in% keep) %>% 
  pivot_wider(names_from = Type, values_from = value, values_fill=0) %>%
  mutate(
    diff = ART - Natural  # Replace with your actual group names
  ) %>%
  select(file_name, name, diff)


real_diff <- real %>%
  ungroup() %>% 
  select(Type, value, name) %>% 
  pivot_wider(names_from = Type, values_from = value, values_fill=0) %>%
  mutate(
    obs_diff = ART - Natural 
  ) %>%
  select(name, obs_diff)

real_diff

group_comparison <- perms_diff %>%
  group_by(name) %>%
  summarize(null_diff_dist = list(diff), .groups = "drop") %>%
  inner_join(real_diff, by = "name") %>%
  rowwise() %>%
  mutate(
    p_one_sided = mean(null_diff_dist >= obs_diff),  # Group 2 > Group 1
    mean_null = mean(null_diff_dist),
    delta = obs_diff - mean_null
  ) %>%
  ungroup() %>%
  mutate(BH = p.adjust(p_one_sided, method = "BH"))
group_comparison
write.csv(group_comparison %>% select(-null_diff_dist),
          "group_comparisons.csv", row.names=F)
