I'm trying to implement a priority queue in Python using heapq as a base. It works fine when inputting values directly, and then popping them, but when values are updated or removed in between, the values begin to pop in the wrong order.
Since changing values directly (I have already tested this) does not maintain the heap invariant, I instead empty the original value list, and then insert the updated version. When popping values from the heap, if an empty list is popped, it will pop again until it returns a proper value.
I thought that the invariant must have been disrupted somehow, despite my efforts, so I used heapq.nsmallest(len(heap), heap) to check the ordering of the values. To my surprise, it always returned the correct ordering. Nonetheless, when removing the values with heapq.heappop(heap), they were not returned in that order. How can that be possible? My code is below:
import heapq
class IndexedPriorityQueue:
def __init__(self, start: list | tuple):
self.added = 0
self.heap = []
self.key_values = {}
for index, item in enumerate(start):
self._add(item[0], index, item[1])
def add(self, item: str, priority: int):
if item in self.key_values:
return False
self._add(item, self.added, priority)
self.added = 1
def _add(self, item: str, index: int, priority: int):
next = [-priority, index, item]
self.key_values[item] = next
heapq.heappush(self.heap, next)
def pop(self):
if not self.heap:
return
value = heapq.heappop(self.heap)
while not value:
value = heapq.heappop(self.heap)
return value[2]
def remove(self, item: str):
if self.key_values[item] is self.heap[0]:
heapq.heappop(self.heap)
else:
self.key_values[item].pop()
self.key_values[item].pop()
self.key_values[item].pop()
del self.key_values[item]
def update(self, item: str, new_priority: int):
if self.key_values[item] is self.heap[0]:
new = [-new_priority, self.key_values[item][1], item]
heapq.heapreplace(self.heap, new)
self.key_values[item] = new
else:
self.key_values[item].pop()
index = self.key_values[item].pop()
self.key_values[item].pop()
self._add(item, index, new_priority)
ipq = IndexedPriorityQueue((["First", 1], ["Third", 10], ["Second", 0], ["Fifth", 7], ["Fourth", 6], ["None", 9999]))
print("Actual: ", heapq.nsmallest(len(ipq.heap), ipq.heap))
return_list = []
while (next_val := ipq.pop()) is not None:
return_list.append(next_val)
print("Returned: ", return_list)
ipq = IndexedPriorityQueue((["First", 1], ["Third", 10], ["Second", 0], ["Fifth", 7], ["Fourth", 6], ["None", 9999]))
ipq.add("Sixth", 0)
ipq.update("First", 9999)
ipq.remove("None")
ipq.update("Second", 999)
ipq.update("Fourth", 8)
print("Actual: ", heapq.nsmallest(len(ipq.heap), ipq.heap))
return_list = []
while (next_val := ipq.pop()) is not None:
return_list.append(next_val)
print("Returned: ", return_list)
The printed values are:
Actual: [[-9999, 5, 'None'], [-10, 1, 'Third'], [-7, 3, 'Fifth'], [-6, 4, 'Fourth'], [-1, 0, 'First'], [0, 2, 'Second']]
Returned: ['None', 'Third', 'Fifth', 'Fourth', 'First', 'Second']
Actual: [[], [], [], [-9999, 0, 'First'], [-999, 2, 'Second'], [-10, 1, 'Third'], [-8, 4, 'Fourth'], [-7, 3, 'Fifth'], [0, 0, 'Sixth']]
Returned: ['First', 'Third', 'Second', 'Fourth', 'Fifth', 'Sixth']
The empty lists are also not all popped at the beginning.
CodePudding user response:
By emptying elements (lists) that are in the heap, the heap property can be violated. Such emptied element becomes smaller than their parent -- violating the minheap (heapq) property, and this will negatively affect the behavior of future operations on the heap, bringing more inconsistency.
You could solve this by not emptying the element that must be removed/replaced, but by only removing the last member from that element, so that it reduces its length to 2. Those two remaining values will then still allow the heap property to be maintained, while you can also detect (by the reduced length) that the element is actually deleted.
Here is what to update:
def remove(self, item: str):
if self.key_values[item] is self.heap[0]:
heapq.heappop(self.heap)
else:
# Perform only one pop so to leave 2 members in the list
self.key_values[item].pop()
del self.key_values[item]
def update(self, item: str, new_priority: int):
if self.key_values[item] is self.heap[0]:
new = [-new_priority, self.key_values[item][1], item]
heapq.heapreplace(self.heap, new)
self.key_values[item] = new
else:
self.key_values[item].pop() # Only pop the last member
index = self.key_values[item][-1] # Don't pop this one -- just read
self._add(item, index, new_priority)
def pop(self):
if not self.heap:
return
value = heapq.heappop(self.heap)
while len(value) < 3: # Changed test for delete-mark
if not self.heap: # Protection needed!
return
value = heapq.heappop(self.heap)
return value[2]
Now the output of your script will end with:
Returned: ['First', 'Second', 'Third', 'Fourth', 'Fifth', 'Sixth']
CodePudding user response:
If I replace your remove
with this:
def remove(self, item: str):
self.heap.remove(self.key_values[item])
del self.key_values[item]
heapq.heapify(self.heap)
then order seems to be maintained.