I need someone to fix my Python code given the prompy. I can't get it to save the codon counts as a file. Write Code that completes the following 2 objectives 1. Build a function that takes a record from your FASTA file as an argument, and returns a count of each amino acid coded for by the codons of the sequence. Keep in mind that because these records are not necessarily in the proper reading frame, so the user should be prompted to select a reading frame (0, +1, +2). 2. In this section, you will be reading in this file of apple genes and, based on these coding sequences, generate a codon usage bias table for this species. aa_dict = {'Met':['ATG'], 'Phe':['TTT', 'TTC'], 'Leu':['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'], 'Cys':['TGT', 'TGC'], 'Tyr':['TAC', 'TAT'], 'Trp':['TGG'], 'Pro':['CCT', 'CCC', 'CCA', 'CCG'], 'His':['CAT', 'CAC'], 'Gln':['CAA', 'CAG'], 'Arg':['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], 'Ile':['ATT', 'ATC', 'ATA'], 'Thr':['ACT', 'ACC', 'ACA', 'ACG'], 'Asn':['AAT', 'AAC'], 'Lys':['AAA', 'AAG'], 'Ser':['AGT', 'AGC', 'TCT', 'TCC', 'TCA', 'TCG'], 'Val':['GTT', 'GTC', 'GTA', 'GTG'], 'Ala':['GCT', 'GCC', 'GCA', 'GCG'], 'Asp':['GAT', 'GAC'], 'Glu':['GAA', 'GAG'], 'Gly':['GGT', 'GGC', 'GGA', 'GGG'], '*':['TAA','TAG','TGA']} def countCodons(fastqRecord,frame): #param is sequence value counts = {} for key in aa_dict: counts[key] = 0 #print(counts) for i in range(0+frame,len(fastqRecord),3): #first iteration: i(0) and i+3(3) #2nd: i(3) and i+3(6) #3rd: i(6) and i+3(9) start = i end = i+3 codon = fastqRecord[start:end] for aa,codons in aa_dict.items(): if codon in codons: if aa in counts.keys(): counts[aa] += 1 # count of each AA in fragment using the codon # Figure out what the AA is # Add 1 to the count of that AA print(counts) def generate_codon_usage_bias_table(data): # First it parses the input data to extract the coding sequences sequences = parse_data(data) # the it counts the frequency of each codon in the coding sequences codon_counts = defaultdict(int) for sequence in sequences: for i in range(0, len(sequence), 3): codon = sequence[i:i+3] codon_counts[codon] += 1 # The it calculate the codon usage bias for each codon codon_bias = defaultdict(list) for codon, count in codon_counts.items(): aa = codon_to_aa_mapping[codon] total_count = sum(codon_counts[c] for c in codon_to_aa_mapping if codon_to_aa_mapping[c] == aa) bias = count / total_count codon_bias[aa].append((codon, count, bias)) # Then it generates a human-readable file that shows the codon usage bias with open("AppleGenes.edited", 'w') as f: for aa, bias_list in codon_bias.items(): f.write(f"Amino acid: {aa}\n") for codon, count, bias in bias_list: f.write(f"Codon: {codon} Count: {count} Bias: {bias}\n") # Finally it calculates the overall codon usage bias for each amino acid aa_bias = {} for aa, bias_list in codon_bias.items(): total_bias = sum(bias for codon, count, bias in bias_list) total_count = sum(count for codon, count, bias in bias_list) aa_bias[aa] = total_bias / total_count return aa_bias def main(): d = {} count = 1 with open("Mdomestica_491_v1.1.cds_primaryTranscriptOnly.fa",'r') as fh: while 1: header = fh.readline().rstrip() if header == "": break sequence = fh.readline().rstrip() fh.readline() score = fh.readline().rstrip() d[header] = (sequence,score) if __name__ == "__main__": main()
I need someone to fix my Python code given the prompy. I can't get it to save the codon counts as a file.
Write Code that completes the following 2 objectives
1. Build a function that takes a record from your FASTA file as an argument, and returns a count of each amino acid coded for by the codons of the sequence. Keep in mind that because these records are not necessarily in the proper reading frame, so the user should be prompted to select a reading frame (0, +1, +2).
2. In this section, you will be reading in this file of apple genes and, based on these coding sequences, generate a codon usage bias table for this species.
aa_dict = {'Met':['ATG'], 'Phe':['TTT', 'TTC'], 'Leu':['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'], 'Cys':['TGT', 'TGC'], 'Tyr':['TAC', 'TAT'], 'Trp':['TGG'], 'Pro':['CCT', 'CCC', 'CCA', 'CCG'], 'His':['CAT', 'CAC'],
'Gln':['CAA', 'CAG'], 'Arg':['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], 'Ile':['ATT', 'ATC', 'ATA'], 'Thr':['ACT', 'ACC', 'ACA', 'ACG'],
'Asn':['AAT', 'AAC'], 'Lys':['AAA', 'AAG'], 'Ser':['AGT', 'AGC', 'TCT', 'TCC', 'TCA', 'TCG'], 'Val':['GTT', 'GTC', 'GTA', 'GTG'],
'Ala':['GCT', 'GCC', 'GCA', 'GCG'], 'Asp':['GAT', 'GAC'], 'Glu':['GAA', 'GAG'], 'Gly':['GGT', 'GGC', 'GGA', 'GGG'], '*':['TAA','TAG','TGA']}
def countCodons(fastqRecord,frame):
#param is sequence value
counts = {}
for key in aa_dict:
counts[key] = 0
#print(counts)
for i in range(0+frame,len(fastqRecord),3):
#first iteration: i(0) and i+3(3)
#2nd: i(3) and i+3(6)
#3rd: i(6) and i+3(9)
start = i
end = i+3
codon = fastqRecord[start:end]
for aa,codons in aa_dict.items():
if codon in codons:
if aa in counts.keys():
counts[aa] += 1
# count of each AA in fragment using the codon
# Figure out what the AA is
# Add 1 to the count of that AA
print(counts)
def generate_codon_usage_bias_table(data):
# First it parses the input data to extract the coding sequences
sequences = parse_data(data)
# the it counts the frequency of each codon in the coding sequences
codon_counts = defaultdict(int)
for sequence in sequences:
for i in range(0, len(sequence), 3):
codon = sequence[i:i+3]
codon_counts[codon] += 1
# The it calculate the codon usage bias for each codon
codon_bias = defaultdict(list)
for codon, count in codon_counts.items():
aa = codon_to_aa_mapping[codon]
total_count = sum(codon_counts[c] for c in codon_to_aa_mapping if codon_to_aa_mapping[c] == aa)
bias = count / total_count
codon_bias[aa].append((codon, count, bias))
# Then it generates a human-readable file that shows the codon usage bias
with open("AppleGenes.edited", 'w') as f:
for aa, bias_list in codon_bias.items():
f.write(f"Amino acid: {aa}\n")
for codon, count, bias in bias_list:
f.write(f"Codon: {codon} Count: {count} Bias: {bias}\n")
# Finally it calculates the overall codon usage bias for each amino acid
aa_bias = {}
for aa, bias_list in codon_bias.items():
total_bias = sum(bias for codon, count, bias in bias_list)
total_count = sum(count for codon, count, bias in bias_list)
aa_bias[aa] = total_bias / total_count
return aa_bias
def main():
d = {}
count = 1
with open("Mdomestica_491_v1.1.cds_primaryTranscriptOnly.fa",'r') as fh:
while 1:
header = fh.readline().rstrip()
if header == "":
break
sequence = fh.readline().rstrip()
fh.readline()
score = fh.readline().rstrip()
d[header] = (sequence,score)
if __name__ == "__main__":
main()
Trending now
This is a popular solution!
Step by step
Solved in 3 steps