Here's what I'm doing:
def dna_complement(dna):
coup = ""
for letter in dna:
if letter == "C":
coup = "G"
if letter == "G":
coup = "C"
if letter == "A":
coup = "T"
if letter == "T":
coup = "A"
return coup
def convert_to_rna(dna):
coup2 = ""
for letter in dna:
if letter == "C":
coup2 = "G"
if letter == "G":
coup2 = "C"
if letter == "A":
coup2 = "U"
if letter == "T":
coup2 = "A"
return coup2
def translate(rna):
amino_acid = ""
for len in rna:
if range(0, len(rna), 3) == "UUU" :
amino_acid = CODON_TABLE.get("UUU")
if range(0, len(rna), 3) == "UUC" :
amino_acid = CODON_TABLE.get("UUC")
if range(0, len(rna), 3) == "UUA" :
amino_acid = CODON_TABLE.get("UUA")
CODON_TABLE = {'UUU':'Phe','UUC':'Phe','UUA':'Leu','UUG':'Leu','CUU':'Leu','CUC':'Leu','CUA':'Leu','CUG':'Leu','AUU':'Ile','AUC':'Ile','AUA':'Ile','AUG':'Met','GUU':'Val','GUC':'Val','GUA':'Val','GUG':'Val','UCU':'Ser','UCC':'Ser','UCA':'Ser','UCG':'Ser','CCU':'Pro','CCC':'Pro','CCA':'Pro','CCG':'Pro','ACU':'Thr','ACC':'Thr','ACA':'Thr','ACG':'Thr','GCU':'Ala','GCC':'Ala','GCA':'Ala','GCG':'Ala','UAU':'Tyr','UAC':'Tyr','UAA':'STOP','UAG':'STOP','CAU':'His','CAC':'His','CAA':'Gln','CAG':'Gln','AAU':'Asn','AAC':'Asn','AAA':'Lys','AAG':'Lys','GAU':'Asp','GAC':'Asp','GAA':'Glu','GAG':'Glu','UGU':'Cys','UGC':'Cys','UGA':'STOP','UGG':'Trp','CGU':'Arg','CGC':'Arg','CGA':'Arg','CGG':'Arg','AGU':'Ser','AGC':'Ser','AGA':'Arg','AGG':'Arg','GGU':'Gly','GGC':'Gly','GGA':'Gly','GGG':'Gly'}
""" the rna is "UUCUCUACGGUAACAGGGGGCCGGAGGACGACGACGAGAAUCGCCCCGGUGUAGCCGGUGGCGACGGGACGGGGACCUCCCACCGGGGUGGCCGGCAAUGUCGCUCGUAUG" # This is already converted from DNA to RNA so there's no worry about the original RNA
"""
dna="AAGAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTCTTAGCGGGGCCACATCGGCCACCGCTGCCCTGCCCCTGGAGGGTGGCCCCACCGGCCGTTACAGCGAGCATAC"
def main():
print("\nWelcome to the DNA program: The Code of Life.")
print("\nSample DNA strand:\n")
print("Regular DNA:")
print_bases(dna)
print("DNA after complement: ")
dna2 = dna_complement(dna)
print_bases(dna2)
print("DNA after RNA convertion: ")
rna = convert_to_rna(dna)
print_bases(rna)
print("The result of translation: ")
amino_acid = translate(rna)
print(amino_acid)
As in the image above. (Thanks to a user called Random Davis telling me to correct my mistake on the description, now I have everything I'm having right now in display which might be a little better)
When I tried to run this the Type Error telling me that str object is not callable, can someone help me with this?
More information: Sorry for not saying this earlier, I was trying to get the conversion like UUU <-> Phe, UUA <-> Leu, and AUU <-> Ile. And stop the protein from generating once it detected UAA, UAG, UGA.
CodePudding user response:
So if you turn the whole RNA string into a codon string:
CODON_TABLE = {'UUU':'Phe','UUC':'Phe','UUA':'Leu','UUG':'Leu','CUU':'Leu','CUC':'Leu'} # etc
def translate(rna):
amino_acid = ""
for i in range(len(rna)-2):
three_letter = rna[i:i 3]
if three_letter in CODON_TABLE:
amino_acid = CODON_TABLE[three_letter]
# if a match is found, don't look for a new codon starting with
# the second or third letter of this codon
i = 2
return amino_acid
Explanation:
for i in range(len(rna)-2):
will give us an index number to the parts of the RNA string we want to read:
UUCUCUAC len = 8
012345 i = 0-5 = range(6)
rna[i:i 3]
gives us the three letters starting from the i
th one
UUCUCUAC = rna
UUC i = 0, three_letter = rna[0:3]
UCU i = 1, tl = rna[1:4]
CUC i = 2, tl = rna[2:5]
UCU i = 3, tl = rna[3:6]
CUA i = 4, tl = rna[4:7]
UAC i = 5, tl = rna[5:8]
For each three-letter sequence, if it's a codon, it adds the name of the codon to the amino acid
amino_acid = CODON_TABLE[three_letter]
Then it skips i
forward by 2 to avoid looking for another codon starting with letters we've already used:
UUCUCUAC
UUC Phe
UCU (skip)
CUC (skip)
UCU Ser
CUA (skip)
UAC (skip)
If it finds something that's not a codon, it ignores it and moves onto the next three letters. (I'm not a biologist, I don't know if every three-letter sequence is a codon?)
UUCZUCUAC
UUC Phe
UCU (skip)
CUC (skip)
ZUC (not found)
UCU Sre
CodePudding user response:
I would first rewrite your
range(0, len(rna), 3)
Do you want to get 3 by 3 from your input? For example, if your input is "ABCDE", first you want "ABC", then you want "BCD", etc. Is that correct?
If so, I would do it like this:
p = rna[i:i 3]
where rna is your input string.
Also, what do you want to do if you found UUU, UUC or UUA? What if you find all of them?
I hope you find this helpful.
def translate(rna):
amino_acid = []
for i in range(len(rna) - 2):
p = rna[i:i 3]
if p == "UUU" :
amino_acid.append(CODON_TABLE["UUU"])
if p == "UUC" :
amino_acid.append(CODON_TABLE["UUC"])
if p == "UUA" :
amino_acid.append(CODON_TABLE["UUA"])
return amino_acid
CODON_TABLE = {'UUU':'Phe','UUC':'Phe','UUA':'Leu','UUG':'Leu','CUU':'Leu','CUC':'Leu','CUA':'Leu','CUG':'Leu','AUU':'Ile','AUC':'Ile','AUA':'Ile','AUG':'Met','GUU':'Val','GUC':'Val','GUA':'Val','GUG':'Val','UCU':'Ser','UCC':'Ser','UCA':'Ser','UCG':'Ser','CCU':'Pro','CCC':'Pro','CCA':'Pro','CCG':'Pro','ACU':'Thr','ACC':'Thr','ACA':'Thr','ACG':'Thr','GCU':'Ala','GCC':'Ala','GCA':'Ala','GCG':'Ala','UAU':'Tyr','UAC':'Tyr','UAA':'STOP','UAG':'STOP','CAU':'His','CAC':'His','CAA':'Gln','CAG':'Gln','AAU':'Asn','AAC':'Asn','AAA':'Lys','AAG':'Lys','GAU':'Asp','GAC':'Asp','GAA':'Glu','GAG':'Glu','UGU':'Cys','UGC':'Cys','UGA':'STOP','UGG':'Trp','CGU':'Arg','CGC':'Arg','CGA':'Arg','CGG':'Arg','AGU':'Ser','AGC':'Ser','AGA':'Arg','AGG':'Arg','GGU':'Gly','GGC':'Gly','GGA':'Gly','GGG':'Gly'}
rna = "UUCUCUACGGUAACAGGGGGCCGGAGGACGACGACGAGAAUCGCCCCGGUGUAGCCGGUGGCGACGGGACGGGGACCUCCCACCGGGGUGGCCGGCAAUGUCGCUCGUAUG" # This is already converted from DNA to RNA so there's no worry about the original RNA
translate(rna)
Notice that I am iterating over the input and ignoring the last two characters, that's the reason why we iterate till range(len(rna)-2).