Getting City from IP Address range-CodePudding

I have an IP address. For example, 192.168.2.10
Also I have a dictionary:

RANGES = {
        'london': [
            {'start': '10.10.0.0', 'end': '10.10.255.255'},
            {'start': '192.168.1.0', 'end': '192.168.1.255'},
        ],
        'munich': [
            {'start': '10.12.0.0', 'end': '10.12.255.255'},
            {'start': '172.16.10.0', 'end': '172.16.11.255'},
            {'start': '192.168.2.0', 'end': '192.168.2.255'},
        ]
    }

Question: How should I find the city from my IP address and use this dictionary spending less time (time complexity) as possible?

CodePudding user response：

Write a custom function which parses the IP addresses as tuples of numbers for easier comparison:

def get_city(ip):
    for city in RANGES:
        for d in RANGES[city]:
            if tuple(map(int, d["start"].split("."))) <= tuple(map(int, ip.split("."))) <= tuple(map(int, d["end"].split("."))):
                return city

>>> get_city("192.168.2.10")
"munich"

CodePudding user response：

First, you need to rearrange your data, for lookup more efficiently.

create a function for transforming IP address to number
and using the lower/start IP number as the new data key, and also keep the end IP in values.

def ip_to_long(ip):
    return reduce(lambda x, y: (x << 8)   y, map(int, ip.split('.')))

def data_transform(input_ranges):
    data = {}
    for location, items in RANGES.items():
        for item in items:
            data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))

Now, you could use bisect to search the sorted start IP, for your input, AIK it using the RB-tree internally.

Below is the whole PoC code for it:

from functools import reduce
from bisect import bisect_left


RANGES = {
        'london': [
            {'start': '10.10.0.0', 'end': '10.10.255.255'},
            {'start': '192.168.1.0', 'end': '192.168.1.255'},
        ],
        'munich': [
            {'start': '10.12.0.0', 'end': '10.12.255.255'},
            {'start': '172.16.10.0', 'end': '172.16.11.255'},
            {'start': '192.168.2.0', 'end': '192.168.2.255'},
        ]
    }


def ip_to_long(ip):
    return reduce(lambda x, y: (x << 8)   y, map(int, ip.split('.')))

def data_transform(input_ranges):
    data = {}
    for location, items in input_ranges.items():
        for item in items:
            data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
    return data

def search_for_ip(search_ip, ip_starts, ip_data):
    lookup_index = bisect_left(ip_starts, ip_to_long(search_ip))
    if lookup_index > 0 and ip_data[ip_starts[lookup_index-1]]['end'] > ip_to_long(search_ip):
        return ip_data[ip_starts[lookup_index-1]]['location']
    return

new_data = data_transform(RANGES)
print(new_data)

ip_starts = sorted(list(new_data))


print(search_for_ip('192.168.2.100', ip_starts, new_data))  # -> munich
print(search_for_ip('192.168.1.100', ip_starts, new_data))  # -> lodon
print(search_for_ip('192.168.0.100', ip_starts, new_data))  # -> None

CodePudding user response：

The "proper answer" if you want the best complexity for arbitrarily large data sets is the one given given by Ji Bin.

To really optimize performances over multiple calls, you indeed need to restructure your data, and use the inbuilt bisect function.

But if you REALLY do not want to touch your data, you can still use a band-aid custom implementation of bisect which would look like that

RANGES = {
    'london': [
        {'start': '10.10.0.0', 'end': '10.10.255.255'},
        {'start': '192.168.1.0', 'end': '192.168.1.255'},
    ],
    'munich': [
        {'start': '10.12.0.0', 'end': '10.12.255.255'},
        {'start': '172.16.10.0', 'end': '172.16.11.255'},
        {'start': '192.168.2.0', 'end': '192.168.2.255'},
    ]
}


def ipv4_str_to_tuple(ip_str):
    return tuple(map(int, ip_str.split('.')))


def relative_in_range(ipv4_tuple, ip_range):
    ipv4t_start = ipv4_str_to_tuple(ip_range['start'])
    ipv4t_end = ipv4_str_to_tuple(ip_range['end'])
    if ipv4t_start > ipv4_tuple:
        return -1
    if ipv4t_end < ipv4_tuple:
        return 1
    return 0


def from_those_ranges(ipv4_tuple, ranges):
    #in-built bisect
    lo, hi = 0, len(ranges)
    while lo < hi:
        mid = lo   (hi - lo) // 2
        comp = relative_in_range(ipv4_tuple, ranges[mid])
        if comp == 0:
            return True
        if comp > 0:
            lo = mid   1
        else:
            hi = mid
    return False


def find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges):
    for entry, entry_ranges in entries_ranges.items():
        if from_those_ranges(ipv4_tuple, entry_ranges):
            return entry
    return None


def find_entry_from_ipv4_str(ipv4_str, entries_ranges):
    ipv4_tuple = ipv4_str_to_tuple(ipv4_str)
    return find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges)


print(find_entry_from_ipv4_str('10.2.4.2', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('192.168.1.1', RANGES))
print(find_entry_from_ipv4_str('172.12.10.25', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('10.10.5.5', RANGES))

-> None

-> munich

-> london

-> None

-> munich

-> london

etc.

link to playground : https://trinket.io/python/e1f9deb1c7