# 读取两个CSV文件
rama_stats <- read.csv("./csv/CHM1_ref_RaMA_statistics.csv")
rare_match_reliability <- read.csv("./csv/RaMA_rare_match_reliablity.csv")

# 提取列
ref_aligned <- rama_stats$ref.aligned..
reliable_ratio <- rare_match_reliability$reliable.ratio..
ref_snv <- rama_stats$ref.SNV..
ref_snv_reliable <- rama_stats$ref.SNV.in.reliable.region..
large_indel_ratio <- rare_match_reliability$large.indel.ratio..
reliable_large_indel_ratio <- rare_match_reliability$reliable.large.indel.ratio..

# 读取 mean identity 列
mean_identity <- rama_stats$mean.identity.. # "mean identity (%)" 列
mean_identity_reliable <- rama_stats$mean.identity.in.reliable.region.. # "mean identity in reliable region (%)" 列

# 检查数据维度是否一致
if (length(ref_aligned) != length(reliable_ratio) ||
    length(ref_snv) != length(ref_snv_reliable) ||
    length(large_indel_ratio) != length(reliable_large_indel_ratio) ||
    length(mean_identity) != length(mean_identity_reliable)) {
  stop("列的长度不一致，请检查数据文件！")
}

# 创建合并的数据框用于绘图
plot_data <- data.frame(
  Chromosome = factor(c(1:22, "X"), levels = c(1:22, "X")),
  ref_aligned = ref_aligned,
  reliable_ratio = reliable_ratio,
  ref_snv = ref_snv,
  ref_snv_reliable = ref_snv_reliable,
  large_indel_ratio = large_indel_ratio,
  reliable_large_indel_ratio = reliable_large_indel_ratio,
  mean_identity = mean_identity,
  mean_identity_reliable = mean_identity_reliable
)

# 继续你的绘图代码

# 加载绘图库
library(ggplot2)

# 明亮且锐利的配色方案
my_colors <- c("ref aligned (%)" = "#f7776e",   # 第一组 鲜红色
               "reliable ratio (%)" = "#6c9fc6", # 第一组 中蓝色
               "ref SNV (%)" = "#4e5d71",       # 第二组 亮绿色
               "ref SNV in reliable region (%)" = "#bb2a2d",  # 第二组 亮橙色
               "large indel ratio (%)" = "#021e30",   # 第三组 紫罗兰色
               "reliable large indel ratio (%)" = "#45c3af",
               "mean identity (%)" = "#0393af",
               "mean identity in reliable region (%)" = "#1f4c98")  # 第三组 金黄色

# 使用不同的形状，圆圈和三角形
shapes <- c(16, 17)  # 16是圆圈，17是三角形

p1 <- ggplot(plot_data, aes(x = Chromosome)) +
  geom_point(aes(y = ref_aligned, color = "ref aligned (%)", shape = "ref aligned (%)"), size = 5) +
  geom_point(aes(y = reliable_ratio, color = "reliable ratio (%)", shape = "reliable ratio (%)"), size = 5) +
  
  # 控制 ref_aligned 标签文本，保持显示数值
  geom_text(aes(y = ref_aligned, label = round(ref_aligned, 2)), 
            color = my_colors[c("ref aligned (%)")], vjust = -1.5, hjust = 0.5, size = 4, 
            nudge_y = 0.5, check_overlap = TRUE) + 
  
  # 控制 reliable_ratio 标签文本，将其替换为自定义文本
  geom_text(aes(y = reliable_ratio, label = round(reliable_ratio, 2)), 
            color = my_colors[c("reliable ratio (%)")], vjust = -1.5, hjust = 0.5, size = 4, 
            nudge_y = 0.5, check_overlap = TRUE) + 
  
  labs(y = "Two types of reliable region ratio for RaMA alignment (%)", x = "") +
  
  # 去掉图例标题，只保留图例内容
  scale_color_manual(name = NULL, values = my_colors[c("ref aligned (%)", "reliable ratio (%)")],
                     labels = c("Reliability based on identity", "Reliability based on rare match")) +  # 自定义图例文本
  
  scale_shape_manual(name = NULL, values = shapes, 
                     labels = c("Reliability based on identity", "Reliability based on rare match")) +  # 自定义图例文本
  
  theme_minimal() +
  
  # 调整字体大小
  theme(
    legend.text = element_text(size = 14), # 调整图例字体大小
    axis.text.x = element_text(size = 14), # 调整横坐标字体大小
    axis.text.y = element_text(size = 14), # 调整横坐标字体大小
    axis.title.x = element_text(size = 16), # 调整横坐标标题字体大小
    plot.margin = unit(c(0.1, 0.5, 0.1, 0.5), "cm"), # 控制图形边距
    panel.border = element_rect(color = "black", fill = NA, size = 1.5), # 添加黑色边框，线宽为1.5
    
    # 将图例放在图表上方
    legend.position = "top" 
  )

# 保存图形
ggsave("./plot/RaMA_reliablity_chm1.pdf", p1, width = 10, height = 4)
# print(p1)


# 创建第二张图：ref SNV (%) 和 ref SNV in reliable region (%)
p2 <- ggplot(plot_data, aes(x = Chromosome)) +
  geom_point(aes(y = ref_snv, color = "ref SNV (%)", shape = "ref SNV (%)"), size = 5) +
  geom_point(aes(y = ref_snv_reliable, color = "ref SNV in reliable region (%)", shape = "ref SNV in reliable region (%)"), size = 5) +
  
  # 控制 ref_snv 标签文本，保持显示数值
  geom_text(aes(y = ref_snv, label = round(ref_snv, 2)), 
            color =my_colors[c("ref SNV (%)")], vjust = -1.8, hjust = 0.5, size = 4) + # 亮绿色文本
  geom_text(aes(y = ref_snv_reliable, label = round(ref_snv_reliable, 2)), 
            color = my_colors[c("ref SNV in reliable region (%)")], vjust = -1.8, hjust = 0.5, size = 4) + # 亮橙色文本
  
  labs(y = "SNV (%)", x = "") +
  
  # 去掉图例标题，只保留图例内容
  scale_color_manual(name = NULL, values = my_colors[c("ref SNV (%)", "ref SNV in reliable region (%)")],
                     labels = c("SNV in full reference", "SNV in reliable region based on identity")) +  # 自定义图例文本
  
  scale_shape_manual(name = NULL, values = shapes, 
                     labels = c("SNV in full reference", "SNV in reliable region based on identity")) +  # 自定义图例文本
  
  theme_minimal() +
  
  # 设置y轴范围，显示从-1到0的部分
  ylim(-0.15, max(plot_data$ref_snv, plot_data$ref_snv_reliable, na.rm = TRUE)) + 
  
  # 调整字体大小
  theme(
    legend.text = element_text(size = 14), # 调整图例字体大小
    axis.text.x = element_text(size = 14), # 调整横坐标字体大小
    axis.text.y = element_text(size = 14), # 调整横坐标字体大小
    axis.title.x = element_text(size = 16), # 调整横坐标标题字体大小
    plot.margin = unit(c(0.1, 0.5, 0.1, 0.5), "cm"), # 控制图形边距
    panel.border = element_rect(color = "black", fill = NA, size = 1.5), # 添加黑色边框，线宽为1.5
    
    # 将图例放在图表上方
    legend.position = "top" 
  )

# 保存第二张图
ggsave("./plot/RaMA_SNV_chm1.pdf", p2, width = 10, height = 3)
# print(p2)


p3 <- ggplot(plot_data, aes(x = Chromosome)) +
  geom_point(aes(y = large_indel_ratio, color = "large indel ratio (%)", shape = "large indel ratio (%)"), size = 5) +
  geom_point(aes(y = reliable_large_indel_ratio, color = "reliable large indel ratio (%)", shape = "reliable large indel ratio (%)"), size = 5) +
  
  # 控制 large_indel_ratio 标签文本，保持显示数值
  geom_text(aes(y = large_indel_ratio, label = round(large_indel_ratio, 2)), 
            color = my_colors["large indel ratio (%)"], vjust = -1.8, hjust = 0.5, size = 4) + # 紫罗兰色文本
  
  # 控制 reliable_large_indel_ratio 标签文本，将其替换为不同颜色
  geom_text(aes(y = reliable_large_indel_ratio, label = round(reliable_large_indel_ratio, 2)), 
            color = my_colors["reliable large indel ratio (%)"], vjust = -1.8, hjust = 0.5, size = 4) + # 金黄色文本
  
  labs(y = "Large indel ratio in complete alignment (%)", x = "Chromosome") +
  
  # 去掉图例标题，只保留图例内容
  scale_color_manual(name = NULL, values = my_colors[c("large indel ratio (%)", "reliable large indel ratio (%)")],
                     labels = c("Large indel ratio", "Reliable large indel ratio based on rare match")) +  # 自定义图例文本
  
  scale_shape_manual(name = NULL, values = shapes, 
                     labels = c("Large indel ratio", "Reliable large indel ratio based on rare match")) +  # 自定义图例文本
  
  theme_minimal() +
  
  # 保留横坐标并添加边框线
  theme(
    legend.text = element_text(size = 14),
    axis.text.x = element_text(angle = 0, size = 14), # 横坐标标签大小
    axis.text.y = element_text(size = 14), # 纵坐标标签大小
    axis.title.x = element_text(size = 16),
    plot.margin = unit(c(0.1, 0.5, 0.1, 0.5), "cm"), # 控制图形边距
    panel.border = element_rect(color = "black", fill = NA, size = 1.5), # 添加黑色边框，线宽为1.5
    
    # 将图例放在图表上方
    legend.position = "top" 
  )

# 保存第三张图
ggsave("./plot/RaMA_indel_chm1.pdf", p3, width = 10, height = 4)


# print(p3)

# 创建新图：mean identity (%) 和 mean identity in reliable region (%)
p4 <- ggplot(plot_data, aes(x = Chromosome)) +
  geom_point(aes(y = mean_identity, color = "mean identity (%)", shape = "mean identity (%)"), size = 5) +
  geom_point(aes(y = mean_identity_reliable, color = "mean identity in reliable region (%)", shape = "mean identity in reliable region (%)"), size = 5) +
  
  # 控制 mean identity 标签文本，保持显示数值
  geom_text(aes(y = mean_identity, label = round(mean_identity, 2)), 
            color = my_colors[c("mean identity (%)")], vjust = -1.8, hjust = 0.5, size = 4) + # 深紫色文本
  geom_text(aes(y = mean_identity_reliable, label = round(mean_identity_reliable, 2)), 
            color = my_colors[c("mean identity in reliable region (%)")], vjust = -1.8, hjust = 0.5, size = 4) + # 番茄色文本
  
  labs(y = "Mean Identity (%)", x = "Chromosome") +
  
  # 设置y轴范围，最大值为101
  ylim(min(plot_data$mean_identity, plot_data$mean_identity_reliable, na.rm = TRUE), 102) +
  
  # 去掉图例标题，只保留图例内容
  scale_color_manual(name = NULL, values = my_colors[c("mean identity (%)", "mean identity in reliable region (%)")],
                     labels = c("Mean Identity", "Mean Identity in Reliable Region")) +  # 自定义图例文本
  
  scale_shape_manual(name = NULL, values = shapes, 
                     labels = c("Mean Identity", "Mean Identity in Reliable Region")) +  # 自定义图例文本
  
  theme_minimal() +
  
  # 保留横坐标并添加边框线
  theme(
    legend.text = element_text(size = 14),
    axis.text.x = element_text(angle = 0, size = 14), # 横坐标标签大小
    axis.text.y = element_text(size = 14), # 纵坐标标签大小
    axis.title.x = element_text(size = 16), # 横坐标标题大小
    axis.title.y = element_text(size = 16), # 纵坐标标题大小
    plot.margin = unit(c(0.1, 0.5, 0.1, 0.5), "cm"), # 控制图形边距
    panel.border = element_rect(color = "black", fill = NA, size = 1.5), # 添加黑色边框，线宽为1.5
    
    # 将图例放在图表上方
    legend.position = "top" 
  )

# 保存新图
ggsave("./plot/RaMA_identity_chm1.pdf", p4, width = 10, height = 4)

print(p4)



