# 读取 CSV 文件
unialigner_stats <- read.csv("./csv/CHM13_ref_UniAligner_statistics.csv")
# 读取新的large indel数据文件
large_indel_stats <- read.csv("./csv/UniAligner_large_indel_statistics.csv")

# 读取新的large indel数据文件
large_indel_stats <- read.csv("./csv/UniAligner_large_indel_statistics.csv")

# 提取large indel ratio列
large_indel_ratio <- large_indel_stats$large.indel.ratio....


# 接下来，可以继续进行绘图

# 提取所需的列
ref_aligned <- unialigner_stats$ref.aligned..
ref_snv <- unialigner_stats$ref.SNV..
ref_snv_reliable <- unialigner_stats$ref.SNV.in.reliable.region..
mean_identity <- unialigner_stats$mean.identity..
mean_identity_reliable <- unialigner_stats$mean.identity.in.reliable.region..


# 创建数据框用于绘图
plot_data <- data.frame(
  Chromosome = factor(c(1:22, "X"), levels = c(1:22, "X")),
  ref_aligned = ref_aligned,
  ref_snv = ref_snv,
  ref_snv_reliable = ref_snv_reliable,
  large_indel_ratio = large_indel_ratio,
  mean_identity = mean_identity,
  mean_identity_reliable = mean_identity_reliable
)

# 加载绘图库
library(ggplot2)

# 明亮且锐利的配色方案
my_colors <- c("ref aligned (%)" = "#f7776e",   # 第一组 鲜红色
               "reliable ratio (%)" = "#6c9fc6", # 第一组 中蓝色
               "ref SNV (%)" = "#4e5d71",       # 第二组 亮绿色
               "ref SNV in reliable region (%)" = "#bb2a2d",  # 第二组 亮橙色
               "large indel ratio (%)" = "#021e30",   # 第三组 紫罗兰色
               "reliable large indel ratio (%)" = "#45c3af",
               "mean identity (%)" = "#0393af",
               "mean identity in reliable region (%)" = "#1f4c98")  # 第三组 金黄色

shapes <- c(16, 17)


# 图1：仅显示 ref aligned (%)
p1 <- ggplot(plot_data, aes(x = Chromosome)) +
  geom_point(aes(y = ref_aligned, color = "ref aligned (%)", shape = "ref aligned (%)"), size = 5) +
  
  # 显示 ref aligned 的标签
  geom_text(aes(y = ref_aligned, label = round(ref_aligned, 2)), color = my_colors["ref aligned (%)"], 
            vjust = -1.5, hjust = 0.5, size = 4) +
  
  labs(y = "Realiable region ratio for RaMA alignment (%)", x = "") +
  
  # 设置颜色和形状图例，仅保留 ref aligned
  scale_color_manual(name = NULL, values = my_colors["ref aligned (%)"], labels = "Reliabilty based on identity") +
  scale_shape_manual(name = NULL, values = shapes[1], labels = "Reliabilty based on identity") +
  
  theme_minimal() +
  theme(
    legend.text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    plot.margin = unit(c(0.1, 0.5, 0.1, 0.5), "cm"),
    panel.border = element_rect(color = "black", fill = NA, size = 1.5),
    legend.position = "top"
  )

# 保存图形
ggsave("./plot/UniAligner_reliability_chm13.pdf", p1, width = 10, height = 4)

# 图2：ref SNV 和 ref SNV in reliable region
p2 <- ggplot(plot_data, aes(x = Chromosome)) +
  geom_point(aes(y = ref_snv, color = "ref SNV (%)", shape = "ref SNV (%)"), size = 5) +
  geom_point(aes(y = ref_snv_reliable, color = "ref SNV in reliable region (%)", shape = "ref SNV in reliable region (%)"), size = 5) +
  geom_text(aes(y = ref_snv, label = round(ref_snv, 2)), color = my_colors["ref SNV (%)"], vjust = -1.8, hjust = 0.5, size = 4) +
  geom_text(aes(y = ref_snv_reliable, label = round(ref_snv_reliable, 2)), color = my_colors["ref SNV in reliable region (%)"], vjust = -1.8, hjust = 0.5, size = 4) +
  labs(y = "SNV (%)", x = "") +
  scale_color_manual(name = NULL, values = my_colors[c("ref SNV (%)", "ref SNV in reliable region (%)")],
                     labels = c("SNV in Full Reference", "SNV in Reliable Region")) +
  scale_shape_manual(name = NULL, values = shapes, labels = c("SNV in Full Reference", "SNV in Reliable Region")) +
  ylim(-0.15, max(plot_data$ref_snv, plot_data$ref_snv_reliable, na.rm = TRUE)) +
  theme_minimal() +
  theme(legend.text = element_text(size = 14), axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14),
        axis.title.x = element_text(size = 16), plot.margin = unit(c(0.1, 0.5, 0.1, 0.5), "cm"),
        panel.border = element_rect(color = "black", fill = NA, size = 1.5), legend.position = "top")

ggsave("./plot/UniAligner_SNV_chm13.pdf", p2, width = 10, height = 3)

# 图3：large indel ratio
p3 <- ggplot(plot_data, aes(x = Chromosome)) +
  geom_point(aes(y = large_indel_ratio, color = "large indel ratio (%)", shape = "large indel ratio (%)"), size = 5) +
  
  # 显示 large indel ratio 的标签
  geom_text(aes(y = large_indel_ratio, label = round(large_indel_ratio, 2)), color = my_colors["large indel ratio (%)"], 
            vjust = -1.8, hjust = 0.5, size = 4) +
  
  labs(y = "Large Indel Ratio in Complete Alignment (%)", x = "Chromosome") +
  
  # 设置颜色和形状图例，仅保留 large indel ratio
  scale_color_manual(name = NULL, values = my_colors["large indel ratio (%)"], labels = "Large Indel Ratio") +
  scale_shape_manual(name = NULL, values = shapes[1], labels = "Large Indel Ratio") +
  
  theme_minimal() +
  theme(
    legend.text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    plot.margin = unit(c(0.1, 0.5, 0.1, 0.5), "cm"),
    panel.border = element_rect(color = "black", fill = NA, size = 1.5),
    legend.position = "top"
  )

# 保存图形
ggsave("./plot/UniAligner_indel_chm13.pdf", p3, width = 10, height = 4)


# 图4：mean identity 和 mean identity in reliable region
p4 <- ggplot(plot_data, aes(x = Chromosome)) +
  geom_point(aes(y = mean_identity, color = "mean identity (%)", shape = "mean identity (%)"), size = 5) +
  geom_point(aes(y = mean_identity_reliable, color = "mean identity in reliable region (%)", shape = "mean identity in reliable region (%)"), size = 5) +
  geom_text(aes(y = mean_identity, label = round(mean_identity, 2)), color = my_colors["mean identity (%)"], vjust = -1.8, hjust = 0.5, size = 4) +
  geom_text(aes(y = mean_identity_reliable, label = round(mean_identity_reliable, 2)), color = my_colors["mean identity in reliable region (%)"], vjust = -1.8, hjust = 0.5, size = 4) +
  labs(y = "Mean Identity (%)", x = "Chromosome") +
  ylim(min(plot_data$mean_identity, plot_data$mean_identity_reliable, na.rm = TRUE), 102) +
  scale_color_manual(name = NULL, values = my_colors[c("mean identity (%)", "mean identity in reliable region (%)")],
                     labels = c("Mean Identity", "Mean Identity in Reliable Region")) +
  scale_shape_manual(name = NULL, values = shapes, labels = c("Mean Identity", "Mean Identity in Reliable Region")) +
  theme_minimal() +
  theme(legend.text = element_text(size = 14), axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14),
        axis.title.x = element_text(size = 16), axis.title.y = element_text(size = 16), plot.margin = unit(c(0.1, 0.5, 0.1, 0.5), "cm"),
        panel.border = element_rect(color = "black", fill = NA, size = 1.5), legend.position = "top")

ggsave("./plot/UniAligner_identity_chm13.pdf", p4, width = 10, height = 4)
print(p4)
