Time out after 60s when checking URLs (#2785)

This commit is contained in:
RumovZ 2023-10-31 22:46:49 +01:00 committed by GitHub
parent 252b4e7f00
commit feaaaa230a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -6,6 +6,7 @@
use std::borrow::Cow;
use std::env;
use std::iter;
use std::time::Duration;
use anki::links::help_page_link_suffix;
use anki::links::help_page_to_link;
@ -21,11 +22,14 @@ use regex::Regex;
use reqwest::Url;
use strum::IntoEnumIterator;
const WEB_TIMEOUT: Duration = Duration::from_secs(60);
/// Aggregates [`Outcome`]s by collecting the error messages of the invalid
/// ones.
#[derive(Default)]
struct Outcomes(Vec<String>);
#[derive(Debug)]
enum Outcome {
Valid,
Invalid(String),
@ -98,23 +102,23 @@ async fn check_links() {
async fn check_url(page: CheckableUrl, ctx: &BasicContext) -> Outcome {
let link = page.url();
match Url::parse(&link) {
Ok(url) => {
if url.as_str() == link {
match check_web(&url, ctx).await {
Ok(()) => Outcome::Valid,
Err(Reason::Dom) => Outcome::Invalid(format!(
"'#{}' not found on '{}{}'",
url.fragment().unwrap(),
url.domain().unwrap(),
url.path(),
)),
Err(Reason::Web(err)) => Outcome::Invalid(err.to_string()),
_ => unreachable!(),
}
} else {
Outcome::Invalid(format!("'{}' is not a valid URL part", page.anchor(),))
Ok(url) if url.as_str() == link => {
let future = check_web(&url, ctx);
let timeout = tokio::time::timeout(WEB_TIMEOUT, future);
match timeout.await {
Err(_) => Outcome::Invalid(format!("Timed out: {link}")),
Ok(Ok(())) => Outcome::Valid,
Ok(Err(Reason::Dom)) => Outcome::Invalid(format!(
"'#{}' not found on '{}{}'",
url.fragment().unwrap(),
url.domain().unwrap(),
url.path(),
)),
Ok(Err(Reason::Web(err))) => Outcome::Invalid(err.to_string()),
_ => unreachable!(),
}
}
Ok(_) => Outcome::Invalid(format!("'{}' is not a valid URL part", page.anchor(),)),
Err(err) => Outcome::Invalid(err.to_string()),
}
}