#importing libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
#storing website
website = 'https://space.skyrocket.de/directories/sat_c.htm'
#get request
response = requests.get(website)
#check status code
response.status_code
#soup object
soup = BeautifulSoup(response.content,'html.parser')
#bringing in the list
countries = soup.find('ul',{'class':'country-list'})
#for loop
country_list = []
for nation in countries:
country_list.append(nation.find('a').get_text())
CodePudding user response:
You could use iso3166 and not scrape any data.
pip install iso3166
and then this code will print all countries:
from iso3166 import countries_by_name
for country in countries_by_name:
print(country)
Hope this is what you wanted :)
CodePudding user response:
import requests
from bs4 import BeautifulSoup
r = requests.get('https://space.skyrocket.de/directories/sat_c.htm')
soup = BeautifulSoup(r.text, 'html.parser')
c_list = soup.select_one('ul.country-list').select('li')
list_with_country_names =[x.text.strip() for x in c_list]
list_with_country_names
This will return:
['Afghanistan',
'Albania',
'Algeria',
'Angola',
'Argentina',
'Armenia',
'Australia',
'Austria',
'Azerbaijan',
'Bangladesh',
'Belarus',
'Belgium',
'Bermuda',
'Bhutan',
'Bolivia',
'Brazil',
'Bulgaria',
'Cambodia',
'Canada',
'Cayman Islands',
'Chile',
'China',
'Colombia',
'Congo (Democratic Republic)',
'Costa Rica',
'Czechoslovakia → Czech Rep.',
'Denmark',
'Ecuador',
'Egypt',
'Estonia',
'Ethiopia',
'Europe (ESA)',
'Finland',
'France',
'Georgia',
'Germany',
'Ghana',
'Greece',
'Guatemala',
'Hungary',
'India',
'Indonesia',
'Iran',
'Iraq',
'Ireland',
'Israel',
'Italy',
'Japan',
'Jordan',
'Kazakhstan',
'Kenya',
'Kuwait',
'Laos',
'Latvia',
'Lithuania',
'Luxembourg',
'Malaysia',
'Mauritius',
'Mexico',
'Moldova',
'Monaco',
'Mongolia',
'Morocco',
'Myanmar',
'Nepal',
'Netherlands',
'New Zealand',
'Nicaragua',
'Nigeria',
'North Korea',
'Norway',
'Pakistan',
'Paraguay',
'Peru',
'Philippines',
'Poland',
'Portugal',
'Qatar',
'Romania',
'Russia',
'Rwanda',
'Saudi Arabia',
'Singapore',
'Slovakia',
'Slovenia',
'South Africa',
'South Korea',
'Spain',
'Sri Lanka',
'Sudan',
'Sweden',
'Switzerland',
'Taiwan (Republic of China)',
'Thailand',
'Tonga',
'Tunisia',
'Turkey',
'Turkmenistan',
'UAE',
'Uganda',
'UK',
'Ukraine',
'Uruguay',
'USA',
'USSR',
'Venezuela',
'Vietnam',
'Zimbabwe',
'International',
'International (Amateur)']
CodePudding user response:
from bs4 import BeautifulSoup
import requests
website = 'https://space.skyrocket.de/directories/sat_c.htm'
response = requests.get(website)
print(response.status_code)
soup = BeautifulSoup(response.content,'html.parser')
countries = soup.find('ul',{'class':'country-list'})
country_list = []
for nation in countries:
link = nation.find('a')
if not isinstance(link, int):
country_list.append(link.text)
You just need to filter out the integers that find('a') returns like so. This code will give you what you need.