Home > database >  Django ORM distinct on only a subset of the queryset
Django ORM distinct on only a subset of the queryset

Time:01-08

Working in Django Rest Framework (DRF), django-filter, and PostgreSQL, and having an issue with one of our endpoints.

Assume the following:

# models.py
class Company(models.Model):
    name = models.CharField(max_length=50)


class Venue(models.Model):
    company = models.ForeignKey(to="Company", on_delete=models.CASCADE)
    name = models.CharField(max_length=50)

# create some data

company1 = Company.objects.create(name="Proper Ltd")
company2 = Company.objects.create(name="MyCompany Ltd")

Venue.objects.create(name="Venue #1", company=company1)
Venue.objects.create(name="Venue #2", company=company1)
Venue.objects.create(name="Property #1", company=company2)
Venue.objects.create(name="Property #2", company=company2)

# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
    serializer_class = CompanyVenueSearchSerializer
    queryset = (
        Venue.objects.all()
        .select_related("company")
        .order_by("company__name")
    )
    permission_classes = (ReadOnly,)
    http_method_names = ["get"]
    filter_backends = (filters.DjangoFilterBackend,)
    filterset_class = CompanyVenueListFilter
    pagination_class = None

# filterset
class CompanyVenueListFilter(filters.FilterSet):
    text = filters.CharFilter(method="name_search")

    def name_search(self, qs, name, value):
        return qs.filter(
            Q(name__icontains=value)
            | Q(company__name__icontains=value)
        )

    class Meta:
        model = Venue
        fields = [
            "name",
            "company__name",
        ]

# serializer
class CompanyVenueSearchSerializer(serializers.ModelSerializer):
    company_id = serializers.IntegerField(source="company.pk")
    company_name = serializers.CharField(source="company.name")
    venue_id = serializers.IntegerField(source="pk")
    venue_name = serializers.CharField(source="name")

    class Meta:
        model = Venue
        fields = (
            "company_id",
            "company_name",
            "venue_id",
            "venue_name",
        )

We now want to allow the user to filter the results by sending a query in the request, e.g. curl -X GET https://example.com/api/company/?text=pr.

The serializer result will look something like:

[
   {
      "company_id":1,
      "company_name":"Proper Ltd",
      "venue_id":1,
      "venue_name":"Venue #1"
   },
   {  // update ORM to exclude this dict
      "company_id":1,
      "company_name":"Proper Ltd",
      "venue_id":2,
      "venue_name":"Venue #1"
   },
   {
      "company_id":2,
      "company_name":"MyCompany Ltd",
      "venue_id":3,
      "venue_name":"Property #1"
   },
   {
      "company_id":2,
      "company_name":"MyCompany Ltd",
      "venue_id":4,
      "venue_name":"Property #1"
   }
]

Expected result:

Want to rewrite the ORM query so that if the filter ("pr") matches the venue__name, return all venues. But if the filter matches the company__name, only return it once, thus in the example above the second dict in the list would be excluded/removed.

Is this possible?

CodePudding user response:

We have a temporary solution, which we're a bit wary about but it seems to do its job. Won't tag this answer as accepted as we're still hoping that someone has a more pythonic/djangoistic solution to the problem.

# viewset
class CompanyViewSet(viewsets.ReadOnlyModelViewSet):
    serializer_class = CompanyVenueSearchSerializer
    queryset = (
        Venue.objects.all()
        .select_related("company")
        .order_by("company__name")
    )
    permission_classes = (ReadOnly,)
    http_method_names = ["get"]
    filter_backends = (filters.DjangoFilterBackend,)
    filterset_class = CompanyVenueListFilter
    pagination_class = None

    def list(self, request, *args, **kwargs):
        queryset = self.filter_queryset(self.get_queryset())
        serializer = self.get_serializer(queryset, many=True)
        text = request.GET.get("text").lower()
        first_idx = 0
        to_remove = []

        for data in serializer.data:
            if text in data.get("name").lower() and text not in data.get("venue_name").lower():
                if data.get("id") != first_idx:
                    """We don't want to remove the first hit of a company whose name matches"""
                    first_idx = data.get("id")
                    continue
                to_remove.append((data.get("id"), data.get("venue_id")))

        return Response(
            [
                data
                for data in serializer.data
                if (data.get("id"), data.get("venue_id")) not in to_remove
            ],
            status=status.HTTP_200_OK,
        )

CodePudding user response:

What you can do is to filter Company that matches name filtering and annotate them with the first related Venue and then combine it's results with the second requirement to return venue with name=value

from django.db.models import OuterRef, Q, Subquery

value = "pr"
first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
company_qs = Company.objects.filter(name__icontains=value).annotate(
    first_venue_id=Subquery(first_venue.values("id")[:1])
)
venue_qs = Venue.objects.filter(
    Q(name__icontains=value)
    | Q(id__in=company_qs.values_list("first_venue_id", flat=True))
)

The query executed when accessing values of venue_qs looks like

SELECT
    "venues_venue"."id",
    "venues_venue"."company_id",
    "venues_venue"."name"
FROM
    "venues_venue"
WHERE
    (
        UPPER("venues_venue"."name"::TEXT) LIKE UPPER(% pr %)
        OR "venues_venue"."id" IN (
            SELECT
                (
                    SELECT
                        U0."id"
                    FROM
                        "venues_venue" U0
                    WHERE
                        U0."company_id" IN (V0."id")
                    ORDER BY
                        U0."id" ASC
                    LIMIT
                        1
                ) AS "first_venue_id"
            FROM
                "venues_company" V0
            WHERE
                UPPER(V0."name"::TEXT) LIKE UPPER(% pr %)
        )
    )

This is how the filter should look like

class CompanyVenueListFilter(filters.FilterSet):
    text = filters.CharFilter(method="name_search")

    def name_search(self, qs, name, value):
        first_venue = Venue.objects.filter(company__in=OuterRef("id")).order_by("id")
        company_qs = Company.objects.filter(name__icontains=value).annotate(
            first_venue_id=Subquery(first_venue.values("id")[:1])
        )
        return qs.objects.filter(
            Q(name__icontains=value)
            | Q(id__in=company_qs.values_list("first_venue_id", flat=True))
        )

    class Meta:
        model = Venue
        fields = [
            "name",
            "company__name",
        ]

The answer is based on other stackoverflow answer and django docs

  • Related