- I have an IP address. For example, 192.168.2.10
- Also I have a dictionary:
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
Question: How should I find the city from my IP address and use this dictionary spending less time (time complexity) as possible?
CodePudding user response:
Write a custom function which parses the IP addresses as tuples of numbers for easier comparison:
def get_city(ip):
for city in RANGES:
for d in RANGES[city]:
if tuple(map(int, d["start"].split("."))) <= tuple(map(int, ip.split("."))) <= tuple(map(int, d["end"].split("."))):
return city
>>> get_city("192.168.2.10")
"munich"
CodePudding user response:
First, you need to rearrange your data, for lookup more efficiently.
- create a function for transforming IP address to number
- and using the lower/start IP number as the new data key, and also keep the end IP in values.
def ip_to_long(ip):
return reduce(lambda x, y: (x << 8) y, map(int, ip.split('.')))
def data_transform(input_ranges):
data = {}
for location, items in RANGES.items():
for item in items:
data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
Now, you could use bisect
to search the sorted start IP, for your input, AIK it using the RB-tree internally.
Below is the whole PoC code for it:
from functools import reduce
from bisect import bisect_left
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
def ip_to_long(ip):
return reduce(lambda x, y: (x << 8) y, map(int, ip.split('.')))
def data_transform(input_ranges):
data = {}
for location, items in input_ranges.items():
for item in items:
data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
return data
def search_for_ip(search_ip, ip_starts, ip_data):
lookup_index = bisect_left(ip_starts, ip_to_long(search_ip))
if lookup_index > 0 and ip_data[ip_starts[lookup_index-1]]['end'] > ip_to_long(search_ip):
return ip_data[ip_starts[lookup_index-1]]['location']
return
new_data = data_transform(RANGES)
print(new_data)
ip_starts = sorted(list(new_data))
print(search_for_ip('192.168.2.100', ip_starts, new_data)) # -> munich
print(search_for_ip('192.168.1.100', ip_starts, new_data)) # -> lodon
print(search_for_ip('192.168.0.100', ip_starts, new_data)) # -> None
CodePudding user response:
The "proper answer" if you want the best complexity for arbitrarily large data sets is the one given given by Ji Bin.
To really optimize performances over multiple calls, you indeed need to restructure your data, and use the inbuilt bisect function.
But if you REALLY do not want to touch your data, you can still use a band-aid custom implementation of bisect which would look like that
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
def ipv4_str_to_tuple(ip_str):
return tuple(map(int, ip_str.split('.')))
def relative_in_range(ipv4_tuple, ip_range):
ipv4t_start = ipv4_str_to_tuple(ip_range['start'])
ipv4t_end = ipv4_str_to_tuple(ip_range['end'])
if ipv4t_start > ipv4_tuple:
return -1
if ipv4t_end < ipv4_tuple:
return 1
return 0
def from_those_ranges(ipv4_tuple, ranges):
#in-built bisect
lo, hi = 0, len(ranges)
while lo < hi:
mid = lo (hi - lo) // 2
comp = relative_in_range(ipv4_tuple, ranges[mid])
if comp == 0:
return True
if comp > 0:
lo = mid 1
else:
hi = mid
return False
def find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges):
for entry, entry_ranges in entries_ranges.items():
if from_those_ranges(ipv4_tuple, entry_ranges):
return entry
return None
def find_entry_from_ipv4_str(ipv4_str, entries_ranges):
ipv4_tuple = ipv4_str_to_tuple(ipv4_str)
return find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges)
print(find_entry_from_ipv4_str('10.2.4.2', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('192.168.1.1', RANGES))
print(find_entry_from_ipv4_str('172.12.10.25', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('10.10.5.5', RANGES))
-> None
-> munich
-> london
-> None
-> munich
-> london
etc.
link to playground : https://trinket.io/python/e1f9deb1c7