Decreasing the run time of a function-CodePudding

For a school project I have to program a Wordle game. Now, I'm pretty much done with it, but there's still one requirement I need to complete. A certain function needs finish within 1 second, hoewever mine is taking nearly 6 seconds to finish. The function is called 'smart_guess', it takes two arguments: 'wordlist' and 'targets'. 'Wordlist' is a list of words you are allowed to guess. 'Targets' is a list of possible target words, based on previous guesses. When no guesses have been made yet, targets will be equal to wordlist. The return value is a word (i.e. string) that appears in wordlist. It is supposed to be a smart guess that helps to find the actual target in very few turns.

def smart_guess(wordlist, targets):
    ''' Returns best guess after comparing the distributions of each sampled guess '''
    
    #Get randomized sample from targetlist 
    samples = sample_targets(targets)
    #Get big number to start with
    min_largest_value = len(wordlist)
    best_guess = ""
    #Iterate trough samples
    for guess in samples:
        #Find the biggest number in distribution
        biggest_value_in_distr = max(distributions(guess, targets).values())
        #Check if biggest number is the smallest of all, if so, add the guess to best_guess
        if biggest_value_in_distr < min_largest_value:
            min_largest_value = biggest_value_in_distr
            best_guess = guess
            if min_largest_value <= 2:
                return best_guess  
    return best_guess
 
def sample_targets(targets):
    #Get randomized sample from targetlist and add a total random word
    len_word = len(targets[0])
    decr = 10
    if len_word == 4:
        sample_size = 100
    decr -= 1
    if len_word == 5:
        sample_size = 100
    decr -= 1
    if len_word == 6:
        sample_size = len_word * decr 
    decr -= 1
    if len_word == 7:
        sample_size = 60
    decr -= 1
    if len_word == 8:
        sample_size = len_word * decr
    decr -= 1
    if len_word == 9:
        sample_size = 8
    decr -= 1
    if len_word == 10:
        sample_size = 5
    
    samples = set([i for i in targets[0:sample_size]])
    
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    return samples

This is the function I want to make run faster. And for it to be more clear, I'll add my whole program down here:

import random

def load_words(file):
  result = set()
  with open(file) as f:
    for line in f.readlines():
       word = line.strip().lower()
       if word.isalpha() and word.isascii():
         result.add(word)
  return sorted(result)

def compare(guess, target):
    ''' Compare two words and give string with 'X' letter is in good place, 'O' not in good place but in word and '-': not in the word. '''
    result = list(target)
    index_list = list(range(len(guess)))
    letter_dict = {}
    for letter in target:
        letter_dict[letter] = target.count(letter)
        
    # Iterate list of indexes
    for idx in range(len(index_list)):
        # Look which letters are in good place
        if guess[idx] == target[idx]:
            # Decrease letter count
            letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
            # Delete index from list add 'X'
            result[idx] = "X"
            index_list.remove(idx)
            
    for idx in index_list:
        #Check if letter still is in letter_dict and in target
        if guess[idx] in target and letter_dict[guess[idx]] > 0:
            # Remove lettercount from dict
            letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
            # Add 'O' to place in guess_list 
            result[idx] = "O"
        else:
            result[idx] = "-"
            
    return  "".join(result)

dutch_words = load_words("wordlist.txt")
d6 = [word for word in dutch_words if len(word) == 6]
d6q = [word for word in d6 if word.startswith("q")]


def filter_targets(targets, guess_results):
   final_targets = []  
   for target in targets:
        #Create list with compared results
       temp_list = []
       for guess in guess_results:
          temp_list.append(compare(guess, target))   
        #Compare results are the same, add to final_targets  
       if temp_list == list(guess_results.values()):
          final_targets.append(target)
   return final_targets
    
def distributions(guess, targets):
    distr_dict = {}
    #Check how many times compared gives result
    for target in targets:
        result = compare(guess, target)
        if result not in list(distr_dict.keys()):
            distr_dict[result] = 1
        else:
            distr_dict[result]  = 1 
    return distr_dict

def smart_guess(wordlist, targets):
    ''' Returns best guess after comparing the distributions of each sampled guess '''
    
    #Get randomized sample from targetlist 
    samples = sample_targets(targets)
    #Get big number to start with
    min_largest_value = len(wordlist)
    best_guess = ""
    #Iterate trough samples
    for guess in samples:
        #Find the biggest number in distribution
        biggest_value_in_distr = max(distributions(guess, targets).values())
        #Check if biggest number is the smallest of all, if so, add the guess to best_guess
        if biggest_value_in_distr < min_largest_value:
            min_largest_value = biggest_value_in_distr
            best_guess = guess
            if min_largest_value <= 2:
                return best_guess  
    return best_guess
 
def sample_targets(targets):
    #Get randomized sample from targetlist and add a total random word
    len_word = len(targets[0])
    decr = 10
    if len_word == 4:
        sample_size = 100
    decr -= 1
    if len_word == 5:
        sample_size = 100
    decr -= 1
    if len_word == 6:
        sample_size = len_word * decr 
    decr -= 1
    if len_word == 7:
        sample_size = 60
    decr -= 1
    if len_word == 8:
        sample_size = len_word * decr
    decr -= 1
    if len_word == 9:
        sample_size = 8
    decr -= 1
    if len_word == 10:
        sample_size = 5
    
    samples = set([i for i in targets[0:sample_size]])

    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    return samples


def simulate_game(target, wordlist):
    n = len(target)
    wordlist = [w for w in wordlist if len(w) == n and w[0] == target[0]]
    if target not in wordlist:
      raise ValueError("Target is not in wordlist, thus impossible to guess.")
    targets = wordlist.copy()
    turns = 0
    while True:
       num_words = len(targets)
       print(f"There {'is' if num_words==1 else 'are'} {num_words} possible"
                f" target{'s' if num_words!=1 else ''} left.")
       turns  = 1
       guess = smart_guess(wordlist, targets)
       print("My guess is: ", guess.upper())
       result = compare(guess, target)
       print("Correctness: ", result)
       if result == n * "X":
          print(f"Target was guessed in {turns} "
                 f"turn{'s' if turns!=1 else ''}.")
          break
       else:
          targets = filter_targets(targets, {guess: result})

def count_turns(target, wordlist):
    n = len(target)
    wordlist = [w for w in wordlist if len(w) == n and w[0]==target[0]]
    targets = wordlist.copy()
    turns = 0
    while True:
      turns  = 1
      if turns > 100:
        raise RuntimeError("This is going nowhere: 100 turns used.")
      guess = smart_guess(wordlist, targets)
      result = compare(guess, target)
      if result == n * "X":
        break
      else:
          targets = filter_targets(targets, {guess: result})
    return turns

def turn_count_simulation(word_length, wordlist, runs=100):
    wordlist = [word for word in wordlist if len(word) == word_length]
    total = 0
    for _ in range(runs):
       target = random.choice(wordlist)
       total  = count_turns(target, wordlist)
    return total/runs

CodePudding user response：

There are many steps to reducing runtime.

Step one: Make vars one liners.

Lets say I have x=1 and y=2. Instead of writing them on 2 lines you would do x,y=1,2.

Step 2: Remove bool values in statements. Lets say I have a while statement, while x == True:. I would replace this with while x: and if x is false then I would do, while not x:.

Step 3: Make one liners. Python is slower when there are a lot of lines. We can remove about 40% of our lines by using one liners. Instead of doing something like x =1, y =1. We can do x =1; y =1.

Conclusion: There are many steps to optimize code in python, these are just three of them. I would recommend you research these three steps to get a general idea of how to implement them.

CodePudding user response：

What you need is profiling your code to understand where it takes too long. So you don't waste time trying to optimize parts that do not really affect the total timing.

Let's try just generating a few "words" (1716: the combinations of 6 of the first 13 letters of the alphabet), and profile smart_guess:

import cProfile
from itertools import combinations
ws=[''.join(c) for c in combinations('ABCDEFGHIJKLM',6)]

cProfile.run('smart_guess(ws,ws)')
         1053127 function calls in 0.749 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.749    0.749 <string>:1(<module>)
        3    0.000    0.000    0.000    0.000 random.py:224(_randbelow)
        3    0.000    0.000    0.000    0.000 random.py:256(choice)
        1    0.000    0.000    0.000    0.000 wordle_long.py:115(<listcomp>)
    87516    0.372    0.000    0.500    0.000 wordle_long.py:12(compare)
       51    0.242    0.005    0.749    0.015 wordle_long.py:59(distributions)
        1    0.000    0.000    0.749    0.749 wordle_long.py:70(smart_guess)
        1    0.000    0.000    0.000    0.000 wordle_long.py:90(sample_targets)
        1    0.000    0.000    0.749    0.749 {built-in method builtins.exec}
   175037    0.012    0.000    0.012    0.000 {built-in method builtins.len}
       51    0.000    0.000    0.000    0.000 {built-in method builtins.max}
        3    0.000    0.000    0.000    0.000 {method 'add' of 'set' objects}
        3    0.000    0.000    0.000    0.000 {method 'bit_length' of 'int' objects}
   525096    0.089    0.000    0.089    0.000 {method 'count' of 'str' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        3    0.000    0.000    0.000    0.000 {method 'getrandbits' of '_random.Random' objects}
    87516    0.014    0.000    0.014    0.000 {method 'join' of 'str' objects}
    87516    0.008    0.000    0.008    0.000 {method 'keys' of 'dict' objects}
    90272    0.012    0.000    0.012    0.000 {method 'remove' of 'list' objects}
       51    0.000    0.000    0.000    0.000 {method 'values' of 'dict' objects}

So the main culprit is compare. Try to optimize it. For instance:

def compare(guess, target):
    ltarget = list(target)
    result = ['-' for _ in guess]
    ##find right char, right place
    for i,c in enumerate(guess):
        if c == ltarget[i]:
            result[i] = 'X'
            ltarget[i] = ''
    ##find right char, wrong place
    for i,c in enumerate(guess):
        if c in ltarget:
            result[i] = 'O'
            ltarget[ltarget.index(c)] = ''
    return ''.join(result)

And now

>>> cProfile.run('smart_guess(ws,ws)')
         502586 function calls in 0.503 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.503    0.503 <string>:1(<module>)
        3    0.000    0.000    0.000    0.000 random.py:224(_randbelow)
        3    0.000    0.000    0.000    0.000 random.py:256(choice)
        1    0.000    0.000    0.000    0.000 wordle_long.py:105(sample_targets)
    87516    0.232    0.000    0.288    0.000 wordle_long.py:12(compare)
        1    0.000    0.000    0.000    0.000 wordle_long.py:130(<listcomp>)
    87516    0.022    0.000    0.022    0.000 wordle_long.py:14(<listcomp>)
       51    0.208    0.004    0.503    0.010 wordle_long.py:74(distributions)
        1    0.000    0.000    0.503    0.503 wordle_long.py:85(smart_guess)
        1    0.000    0.000    0.503    0.503 {built-in method builtins.exec}
        5    0.000    0.000    0.000    0.000 {built-in method builtins.len}
       51    0.000    0.000    0.000    0.000 {built-in method builtins.max}
        3    0.000    0.000    0.000    0.000 {method 'add' of 'set' objects}
        3    0.000    0.000    0.000    0.000 {method 'bit_length' of 'int' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        3    0.000    0.000    0.000    0.000 {method 'getrandbits' of '_random.Random' objects}
   152343    0.022    0.000    0.022    0.000 {method 'index' of 'list' objects}
    87516    0.013    0.000    0.013    0.000 {method 'join' of 'str' objects}
    87516    0.007    0.000    0.007    0.000 {method 'keys' of 'dict' objects}
       51    0.000    0.000    0.000    0.000 {method 'values' of 'dict' objects}

A bit better. Now, distributions is the slowest part, so we should optimize it. And so on, until you reach an acceptable execution time.