Using html scoring dance to collect stats

This commit is contained in:
Lukas Wölfer
2025-10-05 00:26:55 +02:00
parent c54d950e30
commit bc57e8cceb
7 changed files with 109 additions and 84 deletions

11
Cargo.lock generated
View File

@@ -2098,6 +2098,15 @@ dependencies = [
"serde_core", "serde_core",
] ]
[[package]]
name = "serde_plain"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "serde_spanned" name = "serde_spanned"
version = "1.0.0" version = "1.0.0"
@@ -2320,7 +2329,7 @@ dependencies = [
"reqwest", "reqwest",
"scraper", "scraper",
"serde", "serde",
"serde_json", "serde_plain",
"thiserror 2.0.12", "thiserror 2.0.12",
"tokio", "tokio",
"tracing", "tracing",

View File

@@ -19,7 +19,7 @@ rand = "0.9.2"
reqwest = "0.12.22" reqwest = "0.12.22"
scraper = "0.24.0" scraper = "0.24.0"
serde = { version = "1.0.219", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.145" serde_plain = "1.0.2"
thiserror = "2.0.12" thiserror = "2.0.12"
tokio = { version = "1.46.1", features = ["rt"] } tokio = { version = "1.46.1", features = ["rt"] }
tracing = { version = "0.1.41", default-features = false, features = ["std"] } tracing = { version = "0.1.41", default-features = false, features = ["std"] }

View File

@@ -1,19 +1,10 @@
use std::fmt; #[derive(serde::Deserialize, serde::Serialize, Debug, PartialEq, Eq)]
#[derive(serde::Deserialize, Debug, PartialEq, Eq)]
pub enum DanceRole { pub enum DanceRole {
Leader, Leader,
Follower, Follower,
} }
impl DanceRole { impl DanceRole {
pub const fn as_str(&self) -> &str {
match self {
Self::Leader => "Leader",
Self::Follower => "Follower",
}
}
#[allow(dead_code)] #[allow(dead_code)]
pub const fn other(&self) -> Self { pub const fn other(&self) -> Self {
match self { match self {
@@ -23,29 +14,12 @@ impl DanceRole {
} }
} }
#[derive(Debug)] serde_plain::derive_display_from_serialize!(DanceRole);
pub struct ParseDanceRoleError; serde_plain::derive_fromstr_from_deserialize!(DanceRole);
impl std::fmt::Display for ParseDanceRoleError { #[derive(
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { serde::Serialize, serde::Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy,
write!(f, "failed to parse DanceRole") )]
}
}
impl std::error::Error for ParseDanceRoleError {}
impl TryFrom<&str> for DanceRole {
type Error = ParseDanceRoleError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value.to_lowercase().as_str() {
"leader" => Ok(Self::Leader),
"follower" => Ok(Self::Follower),
_ => Err(ParseDanceRoleError),
}
}
}
#[derive(serde::Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub enum DanceRank { pub enum DanceRank {
Newcomer, Newcomer,
Novice, Novice,
@@ -56,19 +30,8 @@ pub enum DanceRank {
AllStars, AllStars,
Champions, Champions,
} }
serde_plain::derive_display_from_serialize!(DanceRank);
impl DanceRank { serde_plain::derive_fromstr_from_deserialize!(DanceRank);
pub const fn as_str(&self) -> &str {
match self {
Self::Newcomer => "Newcomer",
Self::Novice => "Novice",
Self::Intermediate => "Intermediate",
Self::Advanced => "Advanced",
Self::AllStars => "All-Stars",
Self::Champions => "Champions",
}
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct CompState { pub struct CompState {

View File

@@ -5,6 +5,10 @@
clippy::multiple_crate_versions, clippy::multiple_crate_versions,
reason = "Don't know how to fix this, should be fine" reason = "Don't know how to fix this, should be fine"
)] )]
#![allow(
clippy::future_not_send,
reason = "Probably makes sense to fix this, I'll wait for mwbot to fix it"
)]
#![allow( #![allow(
clippy::cast_possible_truncation, clippy::cast_possible_truncation,
clippy::cast_precision_loss, clippy::cast_precision_loss,

View File

@@ -11,24 +11,21 @@ pub enum InfoCompileError {
pub fn page_from_info(info: DanceInfo) -> Result<Wikicode, InfoCompileError> { pub fn page_from_info(info: DanceInfo) -> Result<Wikicode, InfoCompileError> {
let mut params = IndexMap::new(); let mut params = IndexMap::new();
params.insert("name".to_string(), info.name()); params.insert("name".to_string(), info.name());
params.insert( params.insert("dominant_role".to_string(), info.dominant_role.to_string());
"dominant_role".to_string(),
info.dominant_role.as_str().to_string(),
);
params.insert( params.insert(
"allowed_rank".to_string(), "allowed_rank".to_string(),
info.dominant_role_comp.rank.as_str().to_string(), info.dominant_role_comp.rank.to_string(),
); );
params.insert( params.insert(
"dominant_rank".to_string(), "dominant_rank".to_string(),
info.dominant_role_comp.rank.as_str().to_string(), info.dominant_role_comp.rank.to_string(),
); );
params.insert( params.insert(
"dominant_points".to_string(), "dominant_points".to_string(),
info.dominant_role_comp.points.to_string(), info.dominant_role_comp.points.to_string(),
); );
if let Some(u) = info.non_dominant_role_comp { if let Some(u) = info.non_dominant_role_comp {
params.insert("non_dominant_rank".to_string(), u.rank.as_str().to_string()); params.insert("non_dominant_rank".to_string(), u.rank.to_string());
params.insert("non_dominant_points".to_string(), u.points.to_string()); params.insert("non_dominant_points".to_string(), u.points.to_string());
} }
let t = Template::new("Template:WSDCBox", &params)?; let t = Template::new("Template:WSDCBox", &params)?;

View File

@@ -3,11 +3,12 @@ use std::collections::HashMap;
use crate::{ use crate::{
app_signature, app_signature,
dance_info::{CompState, DanceInfo, DanceRank, DanceRole}, dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
worldsdc::scoringdance::fetch_wsdc_info_scoring_dance,
}; };
use reqwest::ClientBuilder; use reqwest::ClientBuilder;
mod scoringdance; mod scoringdance;
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> { pub async fn fetch_wsdc_info_wsdc(id: u32) -> Result<DanceInfo, DanceInfoError> {
let client = ClientBuilder::new() let client = ClientBuilder::new()
.user_agent(app_signature()) .user_agent(app_signature())
.build() .build()
@@ -36,6 +37,10 @@ pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
Ok(x.into()) Ok(x.into())
} }
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
fetch_wsdc_info_scoring_dance(id).await
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")] #![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
@@ -70,6 +75,8 @@ pub enum DanceInfoError {
Request(reqwest::Error), Request(reqwest::Error),
#[error("Failed to parse response: {0}")] #[error("Failed to parse response: {0}")]
JsonParse(reqwest::Error), JsonParse(reqwest::Error),
#[error("Failed to parse html: {0}")]
HtmlParse(#[from] scoringdance::ScoringParseError),
} }
#[derive(serde::Deserialize, Debug)] #[derive(serde::Deserialize, Debug)]

View File

@@ -1,24 +1,36 @@
use std::{collections, str::FromStr}; use std::str::FromStr;
use reqwest::ClientBuilder; use reqwest::ClientBuilder;
use scraper::{ElementRef, Html, Selector}; use scraper::{ElementRef, Html, Selector};
use crate::{ use crate::{
app_signature, app_signature,
dance_info::{DanceInfo, DanceRank}, dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
worldsdc::{DanceInfoError, DanceInfoParser}, worldsdc::DanceInfoError,
}; };
#[derive(thiserror::Error, Debug)]
pub enum ScoringParseError {
#[error("Could not parse: {0}")]
ParseMismatch(String),
}
fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) { fn parse_card(t: ElementRef) -> Result<(String, Vec<Vec<String>>), ScoringParseError> {
#[allow(clippy::unwrap_used)]
let title_selector = Selector::parse("div.card-header").unwrap(); let title_selector = Selector::parse("div.card-header").unwrap();
#[allow(clippy::unwrap_used)]
let table_selector = Selector::parse("div.card-body > table").unwrap(); let table_selector = Selector::parse("div.card-body > table").unwrap();
#[allow(clippy::unwrap_used)]
let row_selector = Selector::parse("tr").unwrap(); let row_selector = Selector::parse("tr").unwrap();
#[allow(clippy::unwrap_used)]
let cell_selector = Selector::parse("th,td").unwrap(); let cell_selector = Selector::parse("th,td").unwrap();
let table = t.select(&table_selector).next().unwrap(); let table = t
.select(&table_selector)
.next()
.ok_or_else(|| ScoringParseError::ParseMismatch("table".to_owned()))?;
let title = t let title = t
.select(&title_selector) .select(&title_selector)
.next() .next()
.unwrap() .ok_or_else(|| ScoringParseError::ParseMismatch("title".to_owned()))?
.text() .text()
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join("") .join("")
@@ -33,10 +45,10 @@ fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
.collect::<Vec<_>>() .collect::<Vec<_>>()
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
(title, parsed_table) Ok((title, parsed_table))
} }
fn parse_details(d: &[Vec<String>]) { fn parse_details(d: &[Vec<String>]) -> Result<(String, String), ScoringParseError> {
let first_name_row = d.iter().find(|v| { let first_name_row = d.iter().find(|v| {
v.first() v.first()
.is_some_and(|v| v.to_lowercase().contains("first name")) .is_some_and(|v| v.to_lowercase().contains("first name"))
@@ -45,12 +57,20 @@ fn parse_details(d: &[Vec<String>]) {
v.first() v.first()
.is_some_and(|v| v.to_lowercase().contains("last name")) .is_some_and(|v| v.to_lowercase().contains("last name"))
}); });
let first_name = first_name_row.unwrap().last().unwrap(); let first_name = first_name_row
let last_name = last_name_row.unwrap().last().unwrap(); .ok_or_else(|| ScoringParseError::ParseMismatch("first_name".to_owned()))?
dbg!(first_name, last_name); .last()
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name value".to_owned()))?;
let last_name = last_name_row
.ok_or_else(|| ScoringParseError::ParseMismatch("last_name".to_owned()))?
.last()
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name value".to_owned()))?;
Ok((first_name.clone(), last_name.clone()))
} }
fn parse_stats(d: &[Vec<String>]) { fn parse_stats(
d: &[Vec<String>],
) -> Result<(DanceRole, CompState, Option<CompState>), ScoringParseError> {
let chapters = d.chunk_by(|_, b| b.len() != 1).map(|v| { let chapters = d.chunk_by(|_, b| b.len() != 1).map(|v| {
let (a, b) = v.split_first().unwrap(); let (a, b) = v.split_first().unwrap();
let a = a.first().unwrap(); let a = a.first().unwrap();
@@ -66,7 +86,7 @@ fn parse_stats(d: &[Vec<String>]) {
let mut sorted_chapters = rest let mut sorted_chapters = rest
.into_iter() .into_iter()
.map(|(chapter, items)| { .map(|(chapter, items)| {
let rank: DanceRank = serde_json::from_str(chapter).map_err(|_| chapter.to_owned())?; let rank: DanceRank = serde_plain::from_str(chapter).map_err(|_| chapter.to_owned())?;
Ok::<(DanceRank, Vec<[&String; 2]>), String>((rank, items)) Ok::<(DanceRank, Vec<[&String; 2]>), String>((rank, items))
}) })
.filter_map(|v| match v { .filter_map(|v| match v {
@@ -91,43 +111,69 @@ fn parse_stats(d: &[Vec<String>]) {
.find(|[a, _]| a.to_lowercase().contains("points follower")) .find(|[a, _]| a.to_lowercase().contains("points follower"))
.map(|[_, points]| (rank, points)) .map(|[_, points]| (rank, points))
}); });
let primary_role = all_time.iter().find_map(|[key, value]| { let primary_role: DanceRole = all_time
.iter()
.find_map(|[key, value]| {
if !key.to_lowercase().contains("primary role") { if !key.to_lowercase().contains("primary role") {
return None; return None;
} }
Some(value) Some(value)
})
.map(|arg0: &&std::string::String| DanceRole::from_str(arg0.as_str()).unwrap())
.unwrap();
let ((rank, points), non_d) = match primary_role {
DanceRole::Leader => (leader_rank.unwrap(), follower_rank),
DanceRole::Follower => (follower_rank.unwrap(), leader_rank),
};
let dominant_comp = CompState {
points: points.parse().unwrap(),
rank: *rank,
};
let non_dominant_comp = non_d.map(|(rank, points)| CompState {
points: points.parse().unwrap(),
rank: *rank,
}); });
dbg!(leader_rank, follower_rank, primary_role); Ok((primary_role, dominant_comp, non_dominant_comp))
// dbg!(chapters.collect::<Vec<_>>()); // dbg!(chapters.collect::<Vec<_>>());
} }
fn extract_tables(html: &str) -> Vec<(String, Vec<Vec<String>>)> { fn extract_tables(html: &str) -> Result<Vec<(String, Vec<Vec<String>>)>, ScoringParseError> {
let document = Html::parse_document(html); let document = Html::parse_document(html);
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap(); let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
document.select(&card_selector).map(parse_card).collect() document.select(&card_selector).map(parse_card).collect()
} }
fn parse_info(html: &str) { fn parse_info(html: &str) -> Result<DanceInfo, ScoringParseError> {
let tables = extract_tables(html); let tables = extract_tables(html)?;
let details = &tables let details = &tables
.iter() .iter()
.find(|(v, _)| v.to_lowercase().contains("detail")) .find(|(v, _)| v.to_lowercase().contains("detail"))
.unwrap() .ok_or_else(|| ScoringParseError::ParseMismatch("detail card".to_owned()))?
.1; .1;
let stats = &tables let stats = &tables
.iter() .iter()
.find(|(v, _)| v.to_lowercase().contains("stats")) .find(|(v, _)| v.to_lowercase().contains("stats"))
.unwrap() .ok_or_else(|| ScoringParseError::ParseMismatch("stats card".to_owned()))?
.1; .1;
dbg!(parse_stats(&stats)); let (dominant_role, dominant_role_comp, non_dominant_role_comp) = parse_stats(stats)?;
dbg!(parse_details(&details)); let (firstname, lastname) = parse_details(details)?;
Ok(DanceInfo {
firstname,
lastname,
dominant_role,
dominant_role_comp,
non_dominant_role_comp,
})
} }
#[test] #[test]
fn test_parse_table() { fn test_parse_table() {
parse_info(include_str!("../../polina.html")); dbg!(parse_info(include_str!("../../polina.html")));
} }
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> { pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
@@ -146,6 +192,5 @@ pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceIn
.await .await
.map_err(DanceInfoError::Request)?; .map_err(DanceInfoError::Request)?;
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?; parse_info(response.text().await.unwrap().as_str()).map_err(DanceInfoError::HtmlParse)
Ok(x.into())
} }