I am looking at a kml
file which I am trying to parse using BeautifulSoup
I am trying the following code but have failed to achieve the desired output:
from bs4 import BeautifulSoup
fn = r'sampleKMLFile.kml'
f = open(fn, 'r')
s = BeautifulSoup(f, 'xml')
pnodes = s.find_all('name')
z2 = s.find_all("Folder", {"name": 'MyNodes'})
Basically I want to achieve the following in a pandas dataframe:
MyNodes longitude latitude
Houston -95 33
Austin -97 33
The KML file is large and has the following content that I am interested in.
<Folder>
<name>MyNodes</name>
<Placemark>
<name>Houston</name>
<Camera>
<longitude>-95</longitude>
<latitude>33</latitude>
<roll>-1.6</roll>
<gx:altitudeMode>relativeToSeaFloor</gx:altitudeMode>
</Camera>
<styleUrl>#msn_placemark_circle</styleUrl>
<Point>
<coordinates>-95,33,0</coordinates>
<gx:drawOrder>1</gx:drawOrder>
</Point>
</Placemark>
<Placemark>
<name>Austin</name>
<Camera>
<longitude>-97</longitude>
<latitude>33</latitude>
<roll>-1.6</roll>
<gx:altitudeMode>relativeToSeaFloor</gx:altitudeMode>
</Camera>
<styleUrl>#msn_placemark_circle</styleUrl>
<Point>
<coordinates>-97,33,0</coordinates>
<gx:drawOrder>1</gx:drawOrder>
</Point>
</Placemark>
</Folder>
Edit: since the above html format string is in a file, how do I read the file to generate sample
or cnt
from the two answers below
CodePudding user response:
# cnt = """KML file content str"""
soup = BeautifulSoup(cnt, "lxml")
placemark = soup.find_all("placemark")
print("MyNodes longitude latitude")
for obj in placemark:
# here you can save the value inside i.e. a dictionary
print(obj.find("name").text, end=" ")
print(obj.find("longitude").text, end=" ")
print(obj.find("latitude").text)
# MyNodes longitude latitude
# Houston -95 33
# Austin -97 33
CodePudding user response:
You can try something like this:
from bs4 import BeautifulSoup
import pandas as pd
sample = """<Folder>
<name>MyNodes</name>
<Placemark>
<name>Houston</name>
<Camera>
<longitude>-95</longitude>
<latitude>33</latitude>
<roll>-1.6</roll>
<gx:altitudeMode>relativeToSeaFloor</gx:altitudeMode>
</Camera>
<styleUrl>#msn_placemark_circle</styleUrl>
<Point>
<coordinates>-95,33,0</coordinates>
<gx:drawOrder>1</gx:drawOrder>
</Point>
</Placemark>
<Placemark>
<name>Austin</name>
<Camera>
<longitude>-97</longitude>
<latitude>33</latitude>
<roll>-1.6</roll>
<gx:altitudeMode>relativeToSeaFloor</gx:altitudeMode>
</Camera>
<styleUrl>#msn_placemark_circle</styleUrl>
<Point>
<coordinates>-97,33,0</coordinates>
<gx:drawOrder>1</gx:drawOrder>
</Point>
</Placemark>
</Folder>
"""
def finder(tag: str) -> list:
return [i.getText() for i in soup.find_all(tag) if i.getText() != "MyNodes"]
soup = BeautifulSoup(sample, features="xml")
df = pd.DataFrame(
zip(finder("name"), finder("longitude"), finder("latitude")),
columns=["Nodes", "Longitude", "Latitude"],
)
print(df)
Output:
Nodes Longitude Latitude
0 Houston -95 33
1 Austin -97 33