The following code is meant to print There is page two.
if it finds a certain div
on this website:
use reqwest;
use select::document::Document;
use select::predicate::Name;
use std::io;
static mut DECIDE: bool = false;
fn page_two_filter(x: &str, url: &str) {
if x == "pSiguiente('?pagina=2')" {
unsafe {
DECIDE = true;
}
}
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Give me the URL with the search results?");
let mut url = String::new();
io::stdin()
.read_line(&mut url)
.expect("Failed to read line");
let url = url.trim();
let html = reqwest::get(url).await?.text().await?;
Document::from(html.as_str())
.find(Name("div"))
.filter_map(|n| n.attr("onclick"))
.for_each(|x| page_two_filter(x, url));
unsafe {
if DECIDE == true {
println!("There is page two.")
}
}
Ok(())
}
Dependencies from Cargo.toml
[dependencies]
futures = "0.3.15"
reqwest = "0.11.9"
scraper = "0.12.0"
select = "0.5.0"
tokio = { version = "1", features = ["full"] }
Is there a safer way, i.e. without the unsafe
blocks of code, of doing what that code does?
Wanting to avoid global mutable variables, I've tried with redefining page_two_filter
and an if statement with the result of the call to page_two_filter
, like so:
fn page_two_filter(x: &str, url: &str) -> bool {
if x == "pSiguiente('?pagina=2')" {
return true;
}
false
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Give me the URL with the search results?");
let mut url = String::new();
io::stdin()
.read_line(&mut url)
.expect("Failed to read line");
let url = url.trim();
let html = reqwest::get(url).await?.text().await?;
if Document::from(html.as_str())
.find(Name("div"))
.filter_map(|n| n.attr("onclick"))
.for_each(|x| page_two_filter(x, url))
{
println!("There is page two.")
}
Ok(())
}
but compiler does not allow me doing this saying:
mismatched types expected `()`, found `bool`
CodePudding user response:
Instead of for_each()
, I guess you need find()
.
This returns Some( found_element )
if found or None
if not found.
You can then use the Option
returned by find()
with if let
, match
, is_some()
...
if let Some(_) = Document::from(html.as_str())
.find(Name("div"))
.filter_map(|n| n.attr("onclick"))
.find(|x| page_two_filter(x, url))
{
println!("There is page two.")
}
CodePudding user response:
First of all, the
mismatched types expected
()
, foundbool
error is because there is no semicolon after the println statement in the for_each closure.
Secondly, the filter is actually a one-liner, which could be integrated in that very closure
fn page_two_filter(x: &str, url: &str) -> bool {
x == "pSiguiente('?pagina=2')"
}
Lastly, you already use various iterator methods, so why not continue?
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Give me the URL with the search results?");
let mut url = String::new();
io::stdin().read_line(&mut url).expect("Failed to read line");
let url = url.trim();
let html = reqwest::get(url).await?.text().await?;
if let Some(_) = Document::from(html.as_str())
.find(Name("div"))
.filter_map(|n| n.attr("onclick"))
.find_map(|attr| if attr == "pSiguiente('?pagina=2')" {
Some(true)
} else {
None
}) {
println!("There is page two.");
}
Ok(())
}
CodePudding user response:
You can use Iterator::any
which returns true on first find of condition, false otherwise:
fn page_two_filter(x: &str, url: &str) -> bool {
x == "pSiguiente('?pagina=2')"
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Give me the URL with the search results?");
let mut url = String::new();
io::stdin()
.read_line(&mut url)
.expect("Failed to read line");
let url = url.trim();
let html = reqwest::get(url).await?.text().await?;
let found = Document::from(html.as_str())
.find(Name("div"))
.filter_map(|n| n.attr("onclick"))
.any(|x| page_two_filter(x, url));
if found {
println!("There is page two.");
}
}