Home > Back-end >  How to implement a heap sort in-place?
How to implement a heap sort in-place?

Time:04-01

My current implementation (shown below) works, but it's not in place. I tried to google, but I couldn't find an example. Your guidance is appreciated!

As you can see, in the second phase of sorting, I used sliced the original list so I can heapify this slice. However, I'm not supposed to use the slice operation as it creates a new list. What else can I try?

```
from queue import Empty

# logic is easy but hard to achieve in-order.

class PriorityQueueBase():
    class _Item():
        __slots__ = "_key","_value"
        def __init__(self, k,v):
            self._key = k
            self._value = v
        def __lt__(self,other):
            return self._key < other._key
        def __str__(self):
            return str(self._key)  " "  str(self._value)
    def is_empty(self):
        return len(self) == 0
class HeapPriorityQueue(PriorityQueueBase):
    # This is a maximum-oriented heap priority queue
    # This uses bottom-up construction method.
    def _parent(self,j):
        return (j-1) // 2
    def _left(self,j):
        return 2*j 1
    def _right(self,j):
        return 2*j 2
    def _has_left(self,j):
        return self._left(j) < len(self._data)
    def _has_right(self,j):
        return self._right(j) < len(self._data)
    def _swap(self,i,j):
        self._data[i],self._data[j] = self._data[j],self._data[i]
    def _upheap(self,j):
        parent = self._parent(j)
        if j>0 and self._data[j] > self._data[parent]:
            self._swap(j,parent)
            self._upheap(parent)
    def _downheap(self,j):
        if self._has_left(j) and self._has_right(j):
            left = self._data[self._left(j)]
            right = self._data[self._right(j)]
            if left > right:
                if self._data[self._left(j)] > self._data[j]:
                    self._swap(j,self._left(j))
                    self._downheap(self._left(j))
            else:
                if self._data[self._right(j)] > self._data[j]:
                    self._swap(j,self._right(j))
                    self._downheap(self._right(j))
        elif self._has_left(j):
            if self._data[self._left(j)] > self._data[j]:
                self._swap(j,self._left(j))
                self._downheap(self._left(j))
        
        # ---------------------public methods----------------------
    def __init__(self,contents=[]):
        if isinstance(contents[0],self._Item):
            self._data = contents
            print("the slice passed in",self)
        else:
            self._data = [self._Item(k,v) for k,v in contents]
        if len(self._data) > 1:
            self._heapify()
            print("after heapifying the slice",self)
    def __len__(self):
        return len(self._data)
    def add(self,key,value):
        i = self._Item(key,value)
        self._data.append(i)
        self._upheap(len(self._data)-1)
    def max(self):
        if self.is_empty():
            raise Empty("Priority queue is empty")
        item = self._data[0]
        return (item._key,item._value)
    def remove_max(self):
        if self.is_empty():
            raise Empty("Priority queue is empty")
        self._swap(0,len(self)-1)
        item = self._data.pop()
        self._downheap(0)
        return (item._key,item._value)
    def _heapify(self):
        start = self._parent(len(self._data)-1)
        for i in range(start,-1,-1):
            self._downheap(i)
    def __iter__(self):
        for i in self._data:
            yield i
    def __str__(self):
        l = [str(i)for i in self._data]
        return " ".join(l)

    def sort(self):
        l = len(self._data)
        for i in range(1,l):
            self._swap(0,l-i)
            print(self)
            # here it's not in-place. 
            slice = self._data[0:l-i]
            h = HeapPriorityQueue(slice)
            l1 = len(h._data)
            self._data[0:l-i]=h._data
            print(self)
        return self._data

c = ((1,"Frank"),(2,"Daniel"),(3,"Mark"),(4,"Mark"),(5,"Mark"),(6,"Mark"))
h = HeapPriorityQueue(c)
s = h.sort()
for i in s:
    print(i)
```

CodePudding user response:

You could take the following steps to make it inplace:

  • Make it possible for the heap to only act on part of a given list, ignoring the values that do not belong to this part. For that purpose, introduce a size attribute, which starts with the same value as len(contents), but can be reduced by the user of the heap. This means that you also need to:

    • Replace every use of len(self), or len(self._data) with self.size
    • Move the is_empty method into the subclass, which actually removes the need to have a subclassing at all.
  • Replace the part where you create a new instance of a heap with:

    self.size -= 1
    self._downheap(0)
    
    • As a consequence you no longer need the following part in the constructor:

      if isinstance(contents[0],self._Item):
          self._data = contents
          print("the slice passed in",self)
      

Updated code:

from queue import Empty

class HeapPriorityQueue():  # Subclassing removed
    class _Item():
        __slots__ = "_key","_value"
        def __init__(self, k,v):
            self._key = k
            self._value = v
        
        def __lt__(self,other):
            return self._key < other._key
        
        def __str__(self):
            return str(self._key)  " "  str(self._value)
    
    def is_empty(self):
        return self.size == 0  # use self.size
        
    def _parent(self,j):
        return (j-1) // 2
        
    def _left(self,j):
        return 2*j 1
        
    def _right(self,j):
        return 2*j 2
        
    def _has_left(self,j):
        return self._left(j) < self.size  # use self.size
        
    def _has_right(self,j):
        return self._right(j) < self.size  # use self.size
        
    def _swap(self,i,j):
        self._data[i],self._data[j] = self._data[j],self._data[i]
        
    def _upheap(self,j):
        parent = self._parent(j)
        if j>0 and self._data[j] > self._data[parent]:
            self._swap(j,parent)
            self._upheap(parent)
            
    def _downheap(self,j):
        if self._has_left(j) and self._has_right(j):
            left = self._data[self._left(j)]
            right = self._data[self._right(j)]
            if left > right:
                if self._data[self._left(j)] > self._data[j]:
                    self._swap(j,self._left(j))
                    self._downheap(self._left(j))
            else:
                if self._data[self._right(j)] > self._data[j]:
                    self._swap(j,self._right(j))
                    self._downheap(self._right(j))
        elif self._has_left(j):
            if self._data[self._left(j)] > self._data[j]:
                self._swap(j,self._left(j))
                self._downheap(self._left(j))
        
    # ---------------------public methods----------------------
    def __init__(self,contents=[]):
        self.size = len(contents)
        # Removed support for _Item instances in contents
        self._data = [self._Item(k,v) for k,v in contents]
        if len(self._data) > 1:
            self._heapify()
            print("after heapifying the slice",self)
            
    def __len__(self):
        return len(self._data)
        
    def add(self,key,value):
        i = self._Item(key,value)
        self._data.append(i)
        self._upheap(self.size-1)   # use self.size
        
    def max(self):
        if self.is_empty():
            raise Empty("Priority queue is empty")
        item = self._data[0]
        return (item._key,item._value)
        
    def remove_max(self):
        if self.is_empty():
            raise Empty("Priority queue is empty")
        self._swap(0,self.size-1)  # use self.size
        item = self._data.pop()
        self._downheap(0)
        return (item._key,item._value)
        
    def _heapify(self):
        start = self._parent(self.size-1)  # use self.size
        for i in range(start,-1,-1):
            self._downheap(i)
            
    def __iter__(self):  # Warning: Does not take size into account
        for i in self._data:
            yield i
            
    def __str__(self):  # Warning: Does not take size into account
        l = [str(i) for i in self._data]
        return " ".join(l)

    def sort(self):
        l = len(self._data)
        for i in range(1,l):
            self._swap(0,l-i)
            print(self)
            # now it's in-place. 
            self.size -= 1
            self._downheap(0)
            print(self)
        return self._data


c = ((6,"Iris"), (3,"Helen"),(1,"Frank"),(4,"Joy"),(2,"Daniel"),(5,"Mark"))
h = HeapPriorityQueue(c)
s = h.sort()
for i in s:
    print(i)
  • Related