# 加载必要的库
library(ggplot2)

# 读取CSV文件
data <- read.csv("./csv/mt_match_length_anchor_counts.csv")

# 确保Similarity列有正确的值，补全缺失的70相似度
data$Similarity <- c(70, 75, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99)

# 创建一个新的伪相似度列，将 70, 75, 80 等均匀映射到新数值
data$Pseudo_Similarity <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)  # 伪相似度值

# 画折线图，使用伪相似度作为横坐标，Anchor Count 作为纵坐标
ggplot(data, aes(x = Pseudo_Similarity, y = Anchor.Count)) +
  geom_line(color = "#add8e6", size = 1.5, linetype = "solid") +  # 使用淡蓝色的线条
  geom_point(color = "#f08080", size = 3, shape = 19) +  # 使用淡红色的点
  labs(title = "Total anchor count identified by RaMA \nacross sequences with different similarities",
       x = "Sequence Similarity",
       y = "Anchor Count") +
  theme_minimal(base_size = 15) +  # 使用简约主题
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),  # 标题居中并加粗
    axis.text.x = element_text(angle = 45, hjust = 1),  # x轴标签倾斜显示
    panel.grid.minor = element_blank(),  # 去除小网格线
    panel.grid.major = element_blank(),  # 去除大网格线
    panel.border = element_rect(color = "black", fill = NA, size = 1)  # 添加基础的黑色框线
  ) +
  scale_x_continuous(breaks = data$Pseudo_Similarity, labels = data$Similarity) +  # 使用伪相似度绘图，但显示真实相似度
  scale_y_continuous(labels = scales::comma)  # y轴数值格式化为千位分隔符

# 保存图像
ggsave("./plot/mt_anchor_count.svg", width = 8, height = 6)

