Home > Software design >  Variables not being reassigned in Loop
Variables not being reassigned in Loop

Time:02-19

Both degeneracy1 and protein_ls are not being reassigned in the nested while loops I am using, I can't figure out why this. This program is designed to find the best protein motif to create an oligo for genetic engineering. Both degeneracy1 and protein_ls are listed near the bottom of the python code.

import itertools
from collections import Counter
degen = {
    "A": 4,"R": 6,"N": 2,"D": 2,"C": 2,
    "E": 2,"Q": 2,"G": 2,"H": 2,"I": 3,
    "L": 6,"K": 2,"M": 1,"F": 2,"P": 4,
    "S": 6,"T": 4,"W": 1, "Y": 2,
    "V": 4}
d= {
    'A': ['GCA', 'GCC', 'GCG', 'GCT'],
    'C': ['TGC', 'TGT'],
    'D': ['GAC', 'GAT'],
    'E': ['GAA', 'GAG'],
    'F': ['TTC', 'TTT'],
    'G': ['GGA', 'GGC', 'GGG', 'GGT'],
    'H': ['CAC', 'CAT'],
    'I': ['ATA', 'ATC', 'ATT'],
    'K': ['AAA', 'AAG'],
    'L': ['CTA', 'CTC', 'CTG', 'CTT', 'TTA', 'TTG'],
    'M': ['ATG'],
    'N': ['AAC', 'AAT'],
    'P': ['CCA', 'CCC', 'CCG', 'CCT'],
    'Q': ['CAA', 'CAG'],
    'R': ['AGA', 'AGG', 'CGA', 'CGC', 'CGG', 'CGT'],
    'S': ['AGC', 'AGT', 'TCA', 'TCC', 'TCG', 'TCT'],
    'T': ['ACA', 'ACC', 'ACG', 'ACT'],
    'V': ['GTA', 'GTC', 'GTG', 'GTT'],
    'W': ['TGG'],
    'Y': ['TAC', 'TAT'],
    '_': ['TAA', 'TAG', 'TGA'],
}
def generator(protein):
    l = [d[aa] for aa in protein]
    for comb in itertools.product(*l):
        yield "".join(comb)



if __name__ == '__main__':
    import sys
    sequence= 'MQLRNPELHLGCALALRFLALVSWDIPGARALDNGLARTPTMGWLHWERFMCNLDCQEEPDSCISEKLFMEMAELMVSEGWKDAGYEYLCIDDCWMAPQRDSEGRLQADPQRFPHGIRQLANYVHSKGLKLGIYADVGNKTCAGFPGSFGYYDIDAQTFADWGVDLLKFDGCYCDSLENLADGYKHMSLALNRTGRSIVYSCEWPLYMWPFQKPNYTEIRQYCNHWRNFADIDDSWKSIKSILDWTSFNQERIVDVAGPGGWNDPDMLVIGNFGLSWNQQVTQMALWAIMAAPLFMSNDLRHISPQAKALLQDKDVIAINQDPLGKQGYQLRQGDNFEVWERPLSGLAWAVAMINRQEIGGPRSYTIAVASLGKGVACNPACFITQLLPVKRKLGFYEWTSRLRSHINPTGTVLLQLENTMQMSLKDLL'

    GCminbound= 35
    GCmaxbound= 65
    print('best sequences are as follows')
    ls_seq=list(sequence)
    degeneracy1 = 10000000000
    degeneracy = 0
    i=0
    while i<len(ls_seq):
        degeneracy=0
        j=0
        ls1_seq=ls_seq[i:i 7]
        if len(ls1_seq)==7:
            g = generator(ls1_seq)
            GCmax=0
            GCmin=100
            for rna_seq in g:

                seq_list = list(rna_seq)
                GCcount=100*(seq_list.count('G') seq_list.count('C'))/len(seq_list)
                if GCcount>GCmax:
                    GCmax=GCcount
                elif GCmin>GCcount:
                    GCmin=GCcount
        
        if GCmin>GCminbound and GCmax<GCmaxbound:

            while j<7:
                result=degen[ls1_seq[j]]
                degeneracy=result degeneracy
                j=j 1
            if degeneracy<degeneracy1:
                degeneracy1 = degeneracy
                protein_ls=ls1_seq
        i = i 1
        print('best degeneracy is: ', degeneracy1)
        print('the sequence is', ''.join(protein_ls))

CodePudding user response:

I did some refactoring. Can you try the following code?

import itertools
import re

degen = {"A": 4,"R": 6,"N": 2,"D": 2,"C": 2, "E": 2,"Q": 2,"G": 2,"H": 2,"I": 3, "L": 6,"K": 2,"M": 1,"F": 2,"P": 4, "S": 6,"T": 4,"W": 1, "Y": 2, "V": 4}
d= {'A': ['GCA', 'GCC', 'GCG', 'GCT'], 'C': ['TGC', 'TGT'], 'D': ['GAC', 'GAT'], 'E': ['GAA', 'GAG'], 'F': ['TTC', 'TTT'], 'G': ['GGA', 'GGC', 'GGG', 'GGT'], 'H': ['CAC', 'CAT'], 'I': ['ATA', 'ATC', 'ATT'], 'K': ['AAA', 'AAG'], 'L': ['CTA', 'CTC', 'CTG', 'CTT', 'TTA', 'TTG'], 'M': ['ATG'], 'N': ['AAC', 'AAT'], 'P': ['CCA', 'CCC', 'CCG', 'CCT'], 'Q': ['CAA', 'CAG'], 'R': ['AGA', 'AGG', 'CGA', 'CGC', 'CGG', 'CGT'], 'S': ['AGC', 'AGT', 'TCA', 'TCC', 'TCG', 'TCT'], 'T': ['ACA', 'ACC', 'ACG', 'ACT'], 'V': ['GTA', 'GTC', 'GTG', 'GTT'], 'W': ['TGG'], 'Y': ['TAC', 'TAT'], '_': ['TAA', 'TAG', 'TGA'],}

def rna_seq_generator(protein):
    for comb in itertools.product(*[d[s] for s in protein]):
        yield "".join(comb)

if __name__ == '__main__':
    print('best sequences are as follows')
    sequence= 'MQLRNPELHLGCALALRFLALVSWDIPGARALDNGLARTPTMGWLHWERFMCNLDCQEEPDSCISEKLFMEMAELMVSEGWKDAGYEYLCIDDCWMAPQRDSEGRLQADPQRFPHGIRQLANYVHSKGLKLGIYADVGNKTCAGFPGSFGYYDIDAQTFADWGVDLLKFDGCYCDSLENLADGYKHMSLALNRTGRSIVYSCEWPLYMWPFQKPNYTEIRQYCNHWRNFADIDDSWKSIKSILDWTSFNQERIVDVAGPGGWNDPDMLVIGNFGLSWNQQVTQMALWAIMAAPLFMSNDLRHISPQAKALLQDKDVIAINQDPLGKQGYQLRQGDNFEVWERPLSGLAWAVAMINRQEIGGPRSYTIAVASLGKGVACNPACFITQLLPVKRKLGFYEWTSRLRSHINPTGTVLLQLENTMQMSLKDLL'
    n_char_read = 7
    GC_min_baound, GC_max_bound = 35, 65
    degeneracy_best = 10000000000
    seq_protein = ''
    for i in range(len(sequence)):
        seq_target=sequence[i:i n_char_read]
        if len(seq_target)==n_char_read:
            GC_min, GC_max = 100, 0
            for rna_seq in rna_seq_generator(seq_target):
                GC_occupancy = 100 * len(re.findall('G|C', rna_seq)) / len(rna_seq)
                GC_min, GC_max = min(GC_min, GC_occupancy), max(GC_max, GC_occupancy)
        if GC_min_baound < GC_min and GC_max < GC_max_bound:
            degeneracy = sum([degen[c] for c in seq_target])
            if degeneracy < degeneracy_best:
                degeneracy_best = degeneracy
                seq_protein = seq_target
        print(f'best degeneracy is: {degeneracy_best}')
        print(f'the sequence is: {seq_protein}')
  • Related