For a school project I have to program a Wordle game. Now, I'm pretty much done with it, but there's still one requirement I need to complete. A certain function needs finish within 1 second, hoewever mine is taking nearly 6 seconds to finish. The function is called 'smart_guess', it takes two arguments: 'wordlist' and 'targets'. 'Wordlist' is a list of words you are allowed to guess. 'Targets' is a list of possible target words, based on previous guesses. When no guesses have been made yet, targets will be equal to wordlist. The return value is a word (i.e. string) that appears in wordlist. It is supposed to be a smart guess that helps to find the actual target in very few turns.
def smart_guess(wordlist, targets):
''' Returns best guess after comparing the distributions of each sampled guess '''
#Get randomized sample from targetlist
samples = sample_targets(targets)
#Get big number to start with
min_largest_value = len(wordlist)
best_guess = ""
#Iterate trough samples
for guess in samples:
#Find the biggest number in distribution
biggest_value_in_distr = max(distributions(guess, targets).values())
#Check if biggest number is the smallest of all, if so, add the guess to best_guess
if biggest_value_in_distr < min_largest_value:
min_largest_value = biggest_value_in_distr
best_guess = guess
if min_largest_value <= 2:
return best_guess
return best_guess
def sample_targets(targets):
#Get randomized sample from targetlist and add a total random word
len_word = len(targets[0])
decr = 10
if len_word == 4:
sample_size = 100
decr -= 1
if len_word == 5:
sample_size = 100
decr -= 1
if len_word == 6:
sample_size = len_word * decr
decr -= 1
if len_word == 7:
sample_size = 60
decr -= 1
if len_word == 8:
sample_size = len_word * decr
decr -= 1
if len_word == 9:
sample_size = 8
decr -= 1
if len_word == 10:
sample_size = 5
samples = set([i for i in targets[0:sample_size]])
samples.add(random.choice(targets))
samples.add(random.choice(targets))
samples.add(random.choice(targets))
return samples
This is the function I want to make run faster. And for it to be more clear, I'll add my whole program down here:
import random
def load_words(file):
result = set()
with open(file) as f:
for line in f.readlines():
word = line.strip().lower()
if word.isalpha() and word.isascii():
result.add(word)
return sorted(result)
def compare(guess, target):
''' Compare two words and give string with 'X' letter is in good place, 'O' not in good place but in word and '-': not in the word. '''
result = list(target)
index_list = list(range(len(guess)))
letter_dict = {}
for letter in target:
letter_dict[letter] = target.count(letter)
# Iterate list of indexes
for idx in range(len(index_list)):
# Look which letters are in good place
if guess[idx] == target[idx]:
# Decrease letter count
letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
# Delete index from list add 'X'
result[idx] = "X"
index_list.remove(idx)
for idx in index_list:
#Check if letter still is in letter_dict and in target
if guess[idx] in target and letter_dict[guess[idx]] > 0:
# Remove lettercount from dict
letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
# Add 'O' to place in guess_list
result[idx] = "O"
else:
result[idx] = "-"
return "".join(result)
dutch_words = load_words("wordlist.txt")
d6 = [word for word in dutch_words if len(word) == 6]
d6q = [word for word in d6 if word.startswith("q")]
def filter_targets(targets, guess_results):
final_targets = []
for target in targets:
#Create list with compared results
temp_list = []
for guess in guess_results:
temp_list.append(compare(guess, target))
#Compare results are the same, add to final_targets
if temp_list == list(guess_results.values()):
final_targets.append(target)
return final_targets
def distributions(guess, targets):
distr_dict = {}
#Check how many times compared gives result
for target in targets:
result = compare(guess, target)
if result not in list(distr_dict.keys()):
distr_dict[result] = 1
else:
distr_dict[result] = 1
return distr_dict
def smart_guess(wordlist, targets):
''' Returns best guess after comparing the distributions of each sampled guess '''
#Get randomized sample from targetlist
samples = sample_targets(targets)
#Get big number to start with
min_largest_value = len(wordlist)
best_guess = ""
#Iterate trough samples
for guess in samples:
#Find the biggest number in distribution
biggest_value_in_distr = max(distributions(guess, targets).values())
#Check if biggest number is the smallest of all, if so, add the guess to best_guess
if biggest_value_in_distr < min_largest_value:
min_largest_value = biggest_value_in_distr
best_guess = guess
if min_largest_value <= 2:
return best_guess
return best_guess
def sample_targets(targets):
#Get randomized sample from targetlist and add a total random word
len_word = len(targets[0])
decr = 10
if len_word == 4:
sample_size = 100
decr -= 1
if len_word == 5:
sample_size = 100
decr -= 1
if len_word == 6:
sample_size = len_word * decr
decr -= 1
if len_word == 7:
sample_size = 60
decr -= 1
if len_word == 8:
sample_size = len_word * decr
decr -= 1
if len_word == 9:
sample_size = 8
decr -= 1
if len_word == 10:
sample_size = 5
samples = set([i for i in targets[0:sample_size]])
samples.add(random.choice(targets))
samples.add(random.choice(targets))
samples.add(random.choice(targets))
return samples
def simulate_game(target, wordlist):
n = len(target)
wordlist = [w for w in wordlist if len(w) == n and w[0] == target[0]]
if target not in wordlist:
raise ValueError("Target is not in wordlist, thus impossible to guess.")
targets = wordlist.copy()
turns = 0
while True:
num_words = len(targets)
print(f"There {'is' if num_words==1 else 'are'} {num_words} possible"
f" target{'s' if num_words!=1 else ''} left.")
turns = 1
guess = smart_guess(wordlist, targets)
print("My guess is: ", guess.upper())
result = compare(guess, target)
print("Correctness: ", result)
if result == n * "X":
print(f"Target was guessed in {turns} "
f"turn{'s' if turns!=1 else ''}.")
break
else:
targets = filter_targets(targets, {guess: result})
def count_turns(target, wordlist):
n = len(target)
wordlist = [w for w in wordlist if len(w) == n and w[0]==target[0]]
targets = wordlist.copy()
turns = 0
while True:
turns = 1
if turns > 100:
raise RuntimeError("This is going nowhere: 100 turns used.")
guess = smart_guess(wordlist, targets)
result = compare(guess, target)
if result == n * "X":
break
else:
targets = filter_targets(targets, {guess: result})
return turns
def turn_count_simulation(word_length, wordlist, runs=100):
wordlist = [word for word in wordlist if len(word) == word_length]
total = 0
for _ in range(runs):
target = random.choice(wordlist)
total = count_turns(target, wordlist)
return total/runs
CodePudding user response:
There are many steps to reducing runtime.
Step one: Make vars one liners.
Lets say I have x=1
and y=2
. Instead of writing them on 2 lines you
would do x,y=1,2
.
Step 2: Remove bool values in statements.
Lets say I have a while statement, while x == True:
. I would replace this
with while x:
and if x is false then I would do, while not x:
.
Step 3: Make one liners.
Python is slower when there are a lot of lines. We can remove about 40% of
our lines by using one liners. Instead of doing something like x =1
,
y =1
. We can do x =1; y =1
.
Conclusion: There are many steps to optimize code in python, these are just three of them. I would recommend you research these three steps to get a general idea of how to implement them.
CodePudding user response:
What you need is profiling your code to understand where it takes too long. So you don't waste time trying to optimize parts that do not really affect the total timing.
Let's try just generating a few "words" (1716: the combinations of 6 of the first 13 letters of the alphabet), and profile smart_guess
:
import cProfile
from itertools import combinations
ws=[''.join(c) for c in combinations('ABCDEFGHIJKLM',6)]
cProfile.run('smart_guess(ws,ws)')
1053127 function calls in 0.749 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.749 0.749 <string>:1(<module>)
3 0.000 0.000 0.000 0.000 random.py:224(_randbelow)
3 0.000 0.000 0.000 0.000 random.py:256(choice)
1 0.000 0.000 0.000 0.000 wordle_long.py:115(<listcomp>)
87516 0.372 0.000 0.500 0.000 wordle_long.py:12(compare)
51 0.242 0.005 0.749 0.015 wordle_long.py:59(distributions)
1 0.000 0.000 0.749 0.749 wordle_long.py:70(smart_guess)
1 0.000 0.000 0.000 0.000 wordle_long.py:90(sample_targets)
1 0.000 0.000 0.749 0.749 {built-in method builtins.exec}
175037 0.012 0.000 0.012 0.000 {built-in method builtins.len}
51 0.000 0.000 0.000 0.000 {built-in method builtins.max}
3 0.000 0.000 0.000 0.000 {method 'add' of 'set' objects}
3 0.000 0.000 0.000 0.000 {method 'bit_length' of 'int' objects}
525096 0.089 0.000 0.089 0.000 {method 'count' of 'str' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
3 0.000 0.000 0.000 0.000 {method 'getrandbits' of '_random.Random' objects}
87516 0.014 0.000 0.014 0.000 {method 'join' of 'str' objects}
87516 0.008 0.000 0.008 0.000 {method 'keys' of 'dict' objects}
90272 0.012 0.000 0.012 0.000 {method 'remove' of 'list' objects}
51 0.000 0.000 0.000 0.000 {method 'values' of 'dict' objects}
So the main culprit is compare
. Try to optimize it. For instance:
def compare(guess, target):
ltarget = list(target)
result = ['-' for _ in guess]
##find right char, right place
for i,c in enumerate(guess):
if c == ltarget[i]:
result[i] = 'X'
ltarget[i] = ''
##find right char, wrong place
for i,c in enumerate(guess):
if c in ltarget:
result[i] = 'O'
ltarget[ltarget.index(c)] = ''
return ''.join(result)
And now
>>> cProfile.run('smart_guess(ws,ws)')
502586 function calls in 0.503 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.503 0.503 <string>:1(<module>)
3 0.000 0.000 0.000 0.000 random.py:224(_randbelow)
3 0.000 0.000 0.000 0.000 random.py:256(choice)
1 0.000 0.000 0.000 0.000 wordle_long.py:105(sample_targets)
87516 0.232 0.000 0.288 0.000 wordle_long.py:12(compare)
1 0.000 0.000 0.000 0.000 wordle_long.py:130(<listcomp>)
87516 0.022 0.000 0.022 0.000 wordle_long.py:14(<listcomp>)
51 0.208 0.004 0.503 0.010 wordle_long.py:74(distributions)
1 0.000 0.000 0.503 0.503 wordle_long.py:85(smart_guess)
1 0.000 0.000 0.503 0.503 {built-in method builtins.exec}
5 0.000 0.000 0.000 0.000 {built-in method builtins.len}
51 0.000 0.000 0.000 0.000 {built-in method builtins.max}
3 0.000 0.000 0.000 0.000 {method 'add' of 'set' objects}
3 0.000 0.000 0.000 0.000 {method 'bit_length' of 'int' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
3 0.000 0.000 0.000 0.000 {method 'getrandbits' of '_random.Random' objects}
152343 0.022 0.000 0.022 0.000 {method 'index' of 'list' objects}
87516 0.013 0.000 0.013 0.000 {method 'join' of 'str' objects}
87516 0.007 0.000 0.007 0.000 {method 'keys' of 'dict' objects}
51 0.000 0.000 0.000 0.000 {method 'values' of 'dict' objects}
A bit better. Now, distributions
is the slowest part, so we should optimize it. And so on, until you reach an acceptable execution time.