### Change these values for each new run
input_file = "NC_022275.1[137357684..141243757].fa" # Choose the input file
chromosome = "mafa5" # The chromosome contained in the input file
​
### Do not change these variables
output_file = chromosome + "_windows.bed" # Output fasta can be found in "chromosome"_windows.bed
seq_position = 0
seq_list = []
seq_dict = {}
ATGC_count = 0 # Sequence window
N_count = 0 # Gap window
​
from Bio import SeqIO
​
fasta_sequences = SeqIO.parse(input_file, "fasta") # Iterator object
​
for seq_record in fasta_sequences:
	for i in seq_record.seq:
		seq_list.append(i) # Reads the sequence into a list of individual characters
​
for i in range(len(seq_list)):
	seq_position += 1
	if seq_list[i] == ("N"): # Case if the character is an N
		if ATGC_count != 0:
			seq_dict[i] = [chromosome, seq_position - ATGC_count, seq_position - 1, "sequence", ATGC_count] # Sequence window is over - add this section to the dictionary
			ATGC_count = 0 # Reset the window count
		N_count += 1 #I ncrement the gap window
	else: # Case if the character is A, T, G, or C
		if N_count != 0:
			seq_dict[i] = [chromosome, seq_position - N_count, seq_position - 1, "gap", N_count] # Gap window is over - add this section to the dictionary
			N_count = 0 # Reset the window count
		ATGC_count += 1 # Increment the sequence window
​
if ATGC_count != 0: # Closing cases
	seq_dict[seq_position + 1] = [chromosome, seq_position - ATGC_count, seq_position - 1, "sequence", ATGC_count] # Closes the last sequence and writes it into the file
​
if N_count != 0: # Closing cases
	seq_dict[seq_position + 1] = [chromosome, seq_position - N_count, seq_position - 1, "gap", N_count] # Closes the last gap and writes it into the file
		
​
# Writes the information to a bed file
fasta_file = open(output_file, "w")
# Takes the each key in the dictionary and follows the bed format by writing in the value
fasta_file.write("Chromosome" + "\t" + "Start" + "\t" +"Stop" + "\t" + "Type" + "\t" + "Length" + "\n") # Header for the columns in the file
for i in seq_dict:
	fasta_file.write('\t'.join(str(v) for v in seq_dict[i]) + "\n")