# 加载必要的库
library(ggplot2)

# 读取CSV文件
data <- read.csv("./csv/sub_sa_length.csv")

# 修改列名
colnames(data) <- c("chr", "length")

# 确保 chr 列为因子型数据，以便在图中正确显示
data$chr <- factor(data$chr, levels = unique(data$chr))

# 计算 length 列的平均值
mean_length <- mean(data$length)

# 绘制美化后的条形图，确保 y 轴范围足够大，并绘制平均值的红色虚线
ggplot(data, aes(x = chr, y = length)) +
  geom_bar(stat = "identity", fill = "#6CAEDB", width = 0.7) +  # 使用柔和的蓝色
  geom_text(aes(label = length), vjust = -0.2, size = 4, color = "black", fontface = "plain") +  # 标注数字
  labs(title = "Ratio of sub suffix array length to original\n sequence length in different chromosome", 
       x = "Chromosome", y = "Ratio") +  # 添加单位到 y 轴标签
  scale_y_continuous(expand = c(0, 0), limits = c(0, max(data$length) * 1.3)) +  # 为 y 轴留出顶部空间
  geom_hline(yintercept = mean_length, linetype = "dashed", color = "red", size = 1) +  # 添加红色虚线
  # 手动指定注释位置
  annotate("text", x = 1, y = max(data$length) * 1.15-1.5, 
           label = paste("Mean =", round(mean_length, 2)), color = "red", size = 5, hjust = 0) +  # 注释放在顶部
  theme_minimal(base_size = 14) +  # 调整基础字体大小
  theme(
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold", color = "darkblue"),  # 标题稍微缩小
    axis.title.x = element_text(size = 16, face = "bold"),  # x 轴标签加粗
    axis.title.y = element_text(size = 16, face = "bold"),  # y 轴标签加粗
    axis.text.x = element_text(size = 12, angle = 45, hjust = 1),  # 旋转 x 轴标签
    axis.text.y = element_text(size = 12),  # y 轴刻度字体
    panel.grid.major = element_line(color = "grey90", size = 0.3),  # 更轻的网格线
    panel.grid.minor = element_blank(),  # 移除次要网格线
    panel.background = element_blank()  # 去掉背景填充和边框
  )

# 保存美化后的图表到文件
ggsave("./plot/sub_sa_length_with_mean.svg", width = 10, height = 6)
