Using html scoring dance to collect stats

This commit is contained in:
Lukas Wölfer
2025-10-05 00:26:55 +02:00
parent c54d950e30
commit bc57e8cceb
7 changed files with 109 additions and 84 deletions

11
Cargo.lock generated
View File

@@ -2098,6 +2098,15 @@ dependencies = [
"serde_core",
]
[[package]]
name = "serde_plain"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50"
dependencies = [
"serde",
]
[[package]]
name = "serde_spanned"
version = "1.0.0"
@@ -2320,7 +2329,7 @@ dependencies = [
"reqwest",
"scraper",
"serde",
"serde_json",
"serde_plain",
"thiserror 2.0.12",
"tokio",
"tracing",

View File

@@ -19,7 +19,7 @@ rand = "0.9.2"
reqwest = "0.12.22"
scraper = "0.24.0"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.145"
serde_plain = "1.0.2"
thiserror = "2.0.12"
tokio = { version = "1.46.1", features = ["rt"] }
tracing = { version = "0.1.41", default-features = false, features = ["std"] }

View File

@@ -1,19 +1,10 @@
use std::fmt;
#[derive(serde::Deserialize, Debug, PartialEq, Eq)]
#[derive(serde::Deserialize, serde::Serialize, Debug, PartialEq, Eq)]
pub enum DanceRole {
Leader,
Follower,
}
impl DanceRole {
pub const fn as_str(&self) -> &str {
match self {
Self::Leader => "Leader",
Self::Follower => "Follower",
}
}
#[allow(dead_code)]
pub const fn other(&self) -> Self {
match self {
@@ -23,29 +14,12 @@ impl DanceRole {
}
}
#[derive(Debug)]
pub struct ParseDanceRoleError;
serde_plain::derive_display_from_serialize!(DanceRole);
serde_plain::derive_fromstr_from_deserialize!(DanceRole);
impl std::fmt::Display for ParseDanceRoleError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "failed to parse DanceRole")
}
}
impl std::error::Error for ParseDanceRoleError {}
impl TryFrom<&str> for DanceRole {
type Error = ParseDanceRoleError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value.to_lowercase().as_str() {
"leader" => Ok(Self::Leader),
"follower" => Ok(Self::Follower),
_ => Err(ParseDanceRoleError),
}
}
}
#[derive(serde::Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
#[derive(
serde::Serialize, serde::Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy,
)]
pub enum DanceRank {
Newcomer,
Novice,
@@ -56,19 +30,8 @@ pub enum DanceRank {
AllStars,
Champions,
}
impl DanceRank {
pub const fn as_str(&self) -> &str {
match self {
Self::Newcomer => "Newcomer",
Self::Novice => "Novice",
Self::Intermediate => "Intermediate",
Self::Advanced => "Advanced",
Self::AllStars => "All-Stars",
Self::Champions => "Champions",
}
}
}
serde_plain::derive_display_from_serialize!(DanceRank);
serde_plain::derive_fromstr_from_deserialize!(DanceRank);
#[derive(Debug)]
pub struct CompState {

View File

@@ -5,6 +5,10 @@
clippy::multiple_crate_versions,
reason = "Don't know how to fix this, should be fine"
)]
#![allow(
clippy::future_not_send,
reason = "Probably makes sense to fix this, I'll wait for mwbot to fix it"
)]
#![allow(
clippy::cast_possible_truncation,
clippy::cast_precision_loss,

View File

@@ -11,24 +11,21 @@ pub enum InfoCompileError {
pub fn page_from_info(info: DanceInfo) -> Result<Wikicode, InfoCompileError> {
let mut params = IndexMap::new();
params.insert("name".to_string(), info.name());
params.insert(
"dominant_role".to_string(),
info.dominant_role.as_str().to_string(),
);
params.insert("dominant_role".to_string(), info.dominant_role.to_string());
params.insert(
"allowed_rank".to_string(),
info.dominant_role_comp.rank.as_str().to_string(),
info.dominant_role_comp.rank.to_string(),
);
params.insert(
"dominant_rank".to_string(),
info.dominant_role_comp.rank.as_str().to_string(),
info.dominant_role_comp.rank.to_string(),
);
params.insert(
"dominant_points".to_string(),
info.dominant_role_comp.points.to_string(),
);
if let Some(u) = info.non_dominant_role_comp {
params.insert("non_dominant_rank".to_string(), u.rank.as_str().to_string());
params.insert("non_dominant_rank".to_string(), u.rank.to_string());
params.insert("non_dominant_points".to_string(), u.points.to_string());
}
let t = Template::new("Template:WSDCBox", &params)?;

View File

@@ -3,11 +3,12 @@ use std::collections::HashMap;
use crate::{
app_signature,
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
worldsdc::scoringdance::fetch_wsdc_info_scoring_dance,
};
use reqwest::ClientBuilder;
mod scoringdance;
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
pub async fn fetch_wsdc_info_wsdc(id: u32) -> Result<DanceInfo, DanceInfoError> {
let client = ClientBuilder::new()
.user_agent(app_signature())
.build()
@@ -36,6 +37,10 @@ pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
Ok(x.into())
}
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
fetch_wsdc_info_scoring_dance(id).await
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
@@ -70,6 +75,8 @@ pub enum DanceInfoError {
Request(reqwest::Error),
#[error("Failed to parse response: {0}")]
JsonParse(reqwest::Error),
#[error("Failed to parse html: {0}")]
HtmlParse(#[from] scoringdance::ScoringParseError),
}
#[derive(serde::Deserialize, Debug)]

View File

@@ -1,24 +1,36 @@
use std::{collections, str::FromStr};
use std::str::FromStr;
use reqwest::ClientBuilder;
use scraper::{ElementRef, Html, Selector};
use crate::{
app_signature,
dance_info::{DanceInfo, DanceRank},
worldsdc::{DanceInfoError, DanceInfoParser},
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
worldsdc::DanceInfoError,
};
#[derive(thiserror::Error, Debug)]
pub enum ScoringParseError {
#[error("Could not parse: {0}")]
ParseMismatch(String),
}
fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
fn parse_card(t: ElementRef) -> Result<(String, Vec<Vec<String>>), ScoringParseError> {
#[allow(clippy::unwrap_used)]
let title_selector = Selector::parse("div.card-header").unwrap();
#[allow(clippy::unwrap_used)]
let table_selector = Selector::parse("div.card-body > table").unwrap();
#[allow(clippy::unwrap_used)]
let row_selector = Selector::parse("tr").unwrap();
#[allow(clippy::unwrap_used)]
let cell_selector = Selector::parse("th,td").unwrap();
let table = t.select(&table_selector).next().unwrap();
let table = t
.select(&table_selector)
.next()
.ok_or_else(|| ScoringParseError::ParseMismatch("table".to_owned()))?;
let title = t
.select(&title_selector)
.next()
.unwrap()
.ok_or_else(|| ScoringParseError::ParseMismatch("title".to_owned()))?
.text()
.collect::<Vec<_>>()
.join("")
@@ -33,10 +45,10 @@ fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();
(title, parsed_table)
Ok((title, parsed_table))
}
fn parse_details(d: &[Vec<String>]) {
fn parse_details(d: &[Vec<String>]) -> Result<(String, String), ScoringParseError> {
let first_name_row = d.iter().find(|v| {
v.first()
.is_some_and(|v| v.to_lowercase().contains("first name"))
@@ -45,12 +57,20 @@ fn parse_details(d: &[Vec<String>]) {
v.first()
.is_some_and(|v| v.to_lowercase().contains("last name"))
});
let first_name = first_name_row.unwrap().last().unwrap();
let last_name = last_name_row.unwrap().last().unwrap();
dbg!(first_name, last_name);
let first_name = first_name_row
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name".to_owned()))?
.last()
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name value".to_owned()))?;
let last_name = last_name_row
.ok_or_else(|| ScoringParseError::ParseMismatch("last_name".to_owned()))?
.last()
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name value".to_owned()))?;
Ok((first_name.clone(), last_name.clone()))
}
fn parse_stats(d: &[Vec<String>]) {
fn parse_stats(
d: &[Vec<String>],
) -> Result<(DanceRole, CompState, Option<CompState>), ScoringParseError> {
let chapters = d.chunk_by(|_, b| b.len() != 1).map(|v| {
let (a, b) = v.split_first().unwrap();
let a = a.first().unwrap();
@@ -66,7 +86,7 @@ fn parse_stats(d: &[Vec<String>]) {
let mut sorted_chapters = rest
.into_iter()
.map(|(chapter, items)| {
let rank: DanceRank = serde_json::from_str(chapter).map_err(|_| chapter.to_owned())?;
let rank: DanceRank = serde_plain::from_str(chapter).map_err(|_| chapter.to_owned())?;
Ok::<(DanceRank, Vec<[&String; 2]>), String>((rank, items))
})
.filter_map(|v| match v {
@@ -91,43 +111,69 @@ fn parse_stats(d: &[Vec<String>]) {
.find(|[a, _]| a.to_lowercase().contains("points follower"))
.map(|[_, points]| (rank, points))
});
let primary_role = all_time.iter().find_map(|[key, value]| {
let primary_role: DanceRole = all_time
.iter()
.find_map(|[key, value]| {
if !key.to_lowercase().contains("primary role") {
return None;
}
Some(value)
})
.map(|arg0: &&std::string::String| DanceRole::from_str(arg0.as_str()).unwrap())
.unwrap();
let ((rank, points), non_d) = match primary_role {
DanceRole::Leader => (leader_rank.unwrap(), follower_rank),
DanceRole::Follower => (follower_rank.unwrap(), leader_rank),
};
let dominant_comp = CompState {
points: points.parse().unwrap(),
rank: *rank,
};
let non_dominant_comp = non_d.map(|(rank, points)| CompState {
points: points.parse().unwrap(),
rank: *rank,
});
dbg!(leader_rank, follower_rank, primary_role);
Ok((primary_role, dominant_comp, non_dominant_comp))
// dbg!(chapters.collect::<Vec<_>>());
}
fn extract_tables(html: &str) -> Vec<(String, Vec<Vec<String>>)> {
fn extract_tables(html: &str) -> Result<Vec<(String, Vec<Vec<String>>)>, ScoringParseError> {
let document = Html::parse_document(html);
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
document.select(&card_selector).map(parse_card).collect()
}
fn parse_info(html: &str) {
let tables = extract_tables(html);
fn parse_info(html: &str) -> Result<DanceInfo, ScoringParseError> {
let tables = extract_tables(html)?;
let details = &tables
.iter()
.find(|(v, _)| v.to_lowercase().contains("detail"))
.unwrap()
.ok_or_else(|| ScoringParseError::ParseMismatch("detail card".to_owned()))?
.1;
let stats = &tables
.iter()
.find(|(v, _)| v.to_lowercase().contains("stats"))
.unwrap()
.ok_or_else(|| ScoringParseError::ParseMismatch("stats card".to_owned()))?
.1;
dbg!(parse_stats(&stats));
dbg!(parse_details(&details));
let (dominant_role, dominant_role_comp, non_dominant_role_comp) = parse_stats(stats)?;
let (firstname, lastname) = parse_details(details)?;
Ok(DanceInfo {
firstname,
lastname,
dominant_role,
dominant_role_comp,
non_dominant_role_comp,
})
}
#[test]
fn test_parse_table() {
parse_info(include_str!("../../polina.html"));
dbg!(parse_info(include_str!("../../polina.html")));
}
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
@@ -146,6 +192,5 @@ pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceIn
.await
.map_err(DanceInfoError::Request)?;
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?;
Ok(x.into())
parse_info(response.text().await.unwrap().as_str()).map_err(DanceInfoError::HtmlParse)
}