Using html scoring dance to collect stats
This commit is contained in:
11
Cargo.lock
generated
11
Cargo.lock
generated
@@ -2098,6 +2098,15 @@ dependencies = [
|
|||||||
"serde_core",
|
"serde_core",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_plain"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_spanned"
|
name = "serde_spanned"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
@@ -2320,7 +2329,7 @@ dependencies = [
|
|||||||
"reqwest",
|
"reqwest",
|
||||||
"scraper",
|
"scraper",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_plain",
|
||||||
"thiserror 2.0.12",
|
"thiserror 2.0.12",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tracing",
|
"tracing",
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ rand = "0.9.2"
|
|||||||
reqwest = "0.12.22"
|
reqwest = "0.12.22"
|
||||||
scraper = "0.24.0"
|
scraper = "0.24.0"
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = "1.0.145"
|
serde_plain = "1.0.2"
|
||||||
thiserror = "2.0.12"
|
thiserror = "2.0.12"
|
||||||
tokio = { version = "1.46.1", features = ["rt"] }
|
tokio = { version = "1.46.1", features = ["rt"] }
|
||||||
tracing = { version = "0.1.41", default-features = false, features = ["std"] }
|
tracing = { version = "0.1.41", default-features = false, features = ["std"] }
|
||||||
|
|||||||
@@ -1,19 +1,10 @@
|
|||||||
use std::fmt;
|
#[derive(serde::Deserialize, serde::Serialize, Debug, PartialEq, Eq)]
|
||||||
|
|
||||||
#[derive(serde::Deserialize, Debug, PartialEq, Eq)]
|
|
||||||
pub enum DanceRole {
|
pub enum DanceRole {
|
||||||
Leader,
|
Leader,
|
||||||
Follower,
|
Follower,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DanceRole {
|
impl DanceRole {
|
||||||
pub const fn as_str(&self) -> &str {
|
|
||||||
match self {
|
|
||||||
Self::Leader => "Leader",
|
|
||||||
Self::Follower => "Follower",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub const fn other(&self) -> Self {
|
pub const fn other(&self) -> Self {
|
||||||
match self {
|
match self {
|
||||||
@@ -23,29 +14,12 @@ impl DanceRole {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
serde_plain::derive_display_from_serialize!(DanceRole);
|
||||||
pub struct ParseDanceRoleError;
|
serde_plain::derive_fromstr_from_deserialize!(DanceRole);
|
||||||
|
|
||||||
impl std::fmt::Display for ParseDanceRoleError {
|
#[derive(
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
serde::Serialize, serde::Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy,
|
||||||
write!(f, "failed to parse DanceRole")
|
)]
|
||||||
}
|
|
||||||
}
|
|
||||||
impl std::error::Error for ParseDanceRoleError {}
|
|
||||||
|
|
||||||
impl TryFrom<&str> for DanceRole {
|
|
||||||
type Error = ParseDanceRoleError;
|
|
||||||
|
|
||||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
|
||||||
match value.to_lowercase().as_str() {
|
|
||||||
"leader" => Ok(Self::Leader),
|
|
||||||
"follower" => Ok(Self::Follower),
|
|
||||||
_ => Err(ParseDanceRoleError),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(serde::Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
|
||||||
pub enum DanceRank {
|
pub enum DanceRank {
|
||||||
Newcomer,
|
Newcomer,
|
||||||
Novice,
|
Novice,
|
||||||
@@ -56,19 +30,8 @@ pub enum DanceRank {
|
|||||||
AllStars,
|
AllStars,
|
||||||
Champions,
|
Champions,
|
||||||
}
|
}
|
||||||
|
serde_plain::derive_display_from_serialize!(DanceRank);
|
||||||
impl DanceRank {
|
serde_plain::derive_fromstr_from_deserialize!(DanceRank);
|
||||||
pub const fn as_str(&self) -> &str {
|
|
||||||
match self {
|
|
||||||
Self::Newcomer => "Newcomer",
|
|
||||||
Self::Novice => "Novice",
|
|
||||||
Self::Intermediate => "Intermediate",
|
|
||||||
Self::Advanced => "Advanced",
|
|
||||||
Self::AllStars => "All-Stars",
|
|
||||||
Self::Champions => "Champions",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct CompState {
|
pub struct CompState {
|
||||||
|
|||||||
@@ -5,6 +5,10 @@
|
|||||||
clippy::multiple_crate_versions,
|
clippy::multiple_crate_versions,
|
||||||
reason = "Don't know how to fix this, should be fine"
|
reason = "Don't know how to fix this, should be fine"
|
||||||
)]
|
)]
|
||||||
|
#![allow(
|
||||||
|
clippy::future_not_send,
|
||||||
|
reason = "Probably makes sense to fix this, I'll wait for mwbot to fix it"
|
||||||
|
)]
|
||||||
#![allow(
|
#![allow(
|
||||||
clippy::cast_possible_truncation,
|
clippy::cast_possible_truncation,
|
||||||
clippy::cast_precision_loss,
|
clippy::cast_precision_loss,
|
||||||
|
|||||||
@@ -11,24 +11,21 @@ pub enum InfoCompileError {
|
|||||||
pub fn page_from_info(info: DanceInfo) -> Result<Wikicode, InfoCompileError> {
|
pub fn page_from_info(info: DanceInfo) -> Result<Wikicode, InfoCompileError> {
|
||||||
let mut params = IndexMap::new();
|
let mut params = IndexMap::new();
|
||||||
params.insert("name".to_string(), info.name());
|
params.insert("name".to_string(), info.name());
|
||||||
params.insert(
|
params.insert("dominant_role".to_string(), info.dominant_role.to_string());
|
||||||
"dominant_role".to_string(),
|
|
||||||
info.dominant_role.as_str().to_string(),
|
|
||||||
);
|
|
||||||
params.insert(
|
params.insert(
|
||||||
"allowed_rank".to_string(),
|
"allowed_rank".to_string(),
|
||||||
info.dominant_role_comp.rank.as_str().to_string(),
|
info.dominant_role_comp.rank.to_string(),
|
||||||
);
|
);
|
||||||
params.insert(
|
params.insert(
|
||||||
"dominant_rank".to_string(),
|
"dominant_rank".to_string(),
|
||||||
info.dominant_role_comp.rank.as_str().to_string(),
|
info.dominant_role_comp.rank.to_string(),
|
||||||
);
|
);
|
||||||
params.insert(
|
params.insert(
|
||||||
"dominant_points".to_string(),
|
"dominant_points".to_string(),
|
||||||
info.dominant_role_comp.points.to_string(),
|
info.dominant_role_comp.points.to_string(),
|
||||||
);
|
);
|
||||||
if let Some(u) = info.non_dominant_role_comp {
|
if let Some(u) = info.non_dominant_role_comp {
|
||||||
params.insert("non_dominant_rank".to_string(), u.rank.as_str().to_string());
|
params.insert("non_dominant_rank".to_string(), u.rank.to_string());
|
||||||
params.insert("non_dominant_points".to_string(), u.points.to_string());
|
params.insert("non_dominant_points".to_string(), u.points.to_string());
|
||||||
}
|
}
|
||||||
let t = Template::new("Template:WSDCBox", ¶ms)?;
|
let t = Template::new("Template:WSDCBox", ¶ms)?;
|
||||||
|
|||||||
@@ -3,11 +3,12 @@ use std::collections::HashMap;
|
|||||||
use crate::{
|
use crate::{
|
||||||
app_signature,
|
app_signature,
|
||||||
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
||||||
|
worldsdc::scoringdance::fetch_wsdc_info_scoring_dance,
|
||||||
};
|
};
|
||||||
use reqwest::ClientBuilder;
|
use reqwest::ClientBuilder;
|
||||||
mod scoringdance;
|
mod scoringdance;
|
||||||
|
|
||||||
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
pub async fn fetch_wsdc_info_wsdc(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||||
let client = ClientBuilder::new()
|
let client = ClientBuilder::new()
|
||||||
.user_agent(app_signature())
|
.user_agent(app_signature())
|
||||||
.build()
|
.build()
|
||||||
@@ -36,6 +37,10 @@ pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
|||||||
Ok(x.into())
|
Ok(x.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||||
|
fetch_wsdc_info_scoring_dance(id).await
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
|
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
|
||||||
@@ -70,6 +75,8 @@ pub enum DanceInfoError {
|
|||||||
Request(reqwest::Error),
|
Request(reqwest::Error),
|
||||||
#[error("Failed to parse response: {0}")]
|
#[error("Failed to parse response: {0}")]
|
||||||
JsonParse(reqwest::Error),
|
JsonParse(reqwest::Error),
|
||||||
|
#[error("Failed to parse html: {0}")]
|
||||||
|
HtmlParse(#[from] scoringdance::ScoringParseError),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Deserialize, Debug)]
|
#[derive(serde::Deserialize, Debug)]
|
||||||
|
|||||||
@@ -1,24 +1,36 @@
|
|||||||
use std::{collections, str::FromStr};
|
use std::str::FromStr;
|
||||||
|
|
||||||
use reqwest::ClientBuilder;
|
use reqwest::ClientBuilder;
|
||||||
use scraper::{ElementRef, Html, Selector};
|
use scraper::{ElementRef, Html, Selector};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
app_signature,
|
app_signature,
|
||||||
dance_info::{DanceInfo, DanceRank},
|
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
||||||
worldsdc::{DanceInfoError, DanceInfoParser},
|
worldsdc::DanceInfoError,
|
||||||
};
|
};
|
||||||
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
pub enum ScoringParseError {
|
||||||
|
#[error("Could not parse: {0}")]
|
||||||
|
ParseMismatch(String),
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
|
fn parse_card(t: ElementRef) -> Result<(String, Vec<Vec<String>>), ScoringParseError> {
|
||||||
|
#[allow(clippy::unwrap_used)]
|
||||||
let title_selector = Selector::parse("div.card-header").unwrap();
|
let title_selector = Selector::parse("div.card-header").unwrap();
|
||||||
|
#[allow(clippy::unwrap_used)]
|
||||||
let table_selector = Selector::parse("div.card-body > table").unwrap();
|
let table_selector = Selector::parse("div.card-body > table").unwrap();
|
||||||
|
#[allow(clippy::unwrap_used)]
|
||||||
let row_selector = Selector::parse("tr").unwrap();
|
let row_selector = Selector::parse("tr").unwrap();
|
||||||
|
#[allow(clippy::unwrap_used)]
|
||||||
let cell_selector = Selector::parse("th,td").unwrap();
|
let cell_selector = Selector::parse("th,td").unwrap();
|
||||||
let table = t.select(&table_selector).next().unwrap();
|
let table = t
|
||||||
|
.select(&table_selector)
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| ScoringParseError::ParseMismatch("table".to_owned()))?;
|
||||||
let title = t
|
let title = t
|
||||||
.select(&title_selector)
|
.select(&title_selector)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.ok_or_else(|| ScoringParseError::ParseMismatch("title".to_owned()))?
|
||||||
.text()
|
.text()
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.join("")
|
.join("")
|
||||||
@@ -33,10 +45,10 @@ fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
|
|||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
(title, parsed_table)
|
Ok((title, parsed_table))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_details(d: &[Vec<String>]) {
|
fn parse_details(d: &[Vec<String>]) -> Result<(String, String), ScoringParseError> {
|
||||||
let first_name_row = d.iter().find(|v| {
|
let first_name_row = d.iter().find(|v| {
|
||||||
v.first()
|
v.first()
|
||||||
.is_some_and(|v| v.to_lowercase().contains("first name"))
|
.is_some_and(|v| v.to_lowercase().contains("first name"))
|
||||||
@@ -45,12 +57,20 @@ fn parse_details(d: &[Vec<String>]) {
|
|||||||
v.first()
|
v.first()
|
||||||
.is_some_and(|v| v.to_lowercase().contains("last name"))
|
.is_some_and(|v| v.to_lowercase().contains("last name"))
|
||||||
});
|
});
|
||||||
let first_name = first_name_row.unwrap().last().unwrap();
|
let first_name = first_name_row
|
||||||
let last_name = last_name_row.unwrap().last().unwrap();
|
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name".to_owned()))?
|
||||||
dbg!(first_name, last_name);
|
.last()
|
||||||
|
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name value".to_owned()))?;
|
||||||
|
let last_name = last_name_row
|
||||||
|
.ok_or_else(|| ScoringParseError::ParseMismatch("last_name".to_owned()))?
|
||||||
|
.last()
|
||||||
|
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name value".to_owned()))?;
|
||||||
|
Ok((first_name.clone(), last_name.clone()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_stats(d: &[Vec<String>]) {
|
fn parse_stats(
|
||||||
|
d: &[Vec<String>],
|
||||||
|
) -> Result<(DanceRole, CompState, Option<CompState>), ScoringParseError> {
|
||||||
let chapters = d.chunk_by(|_, b| b.len() != 1).map(|v| {
|
let chapters = d.chunk_by(|_, b| b.len() != 1).map(|v| {
|
||||||
let (a, b) = v.split_first().unwrap();
|
let (a, b) = v.split_first().unwrap();
|
||||||
let a = a.first().unwrap();
|
let a = a.first().unwrap();
|
||||||
@@ -66,7 +86,7 @@ fn parse_stats(d: &[Vec<String>]) {
|
|||||||
let mut sorted_chapters = rest
|
let mut sorted_chapters = rest
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(chapter, items)| {
|
.map(|(chapter, items)| {
|
||||||
let rank: DanceRank = serde_json::from_str(chapter).map_err(|_| chapter.to_owned())?;
|
let rank: DanceRank = serde_plain::from_str(chapter).map_err(|_| chapter.to_owned())?;
|
||||||
Ok::<(DanceRank, Vec<[&String; 2]>), String>((rank, items))
|
Ok::<(DanceRank, Vec<[&String; 2]>), String>((rank, items))
|
||||||
})
|
})
|
||||||
.filter_map(|v| match v {
|
.filter_map(|v| match v {
|
||||||
@@ -91,43 +111,69 @@ fn parse_stats(d: &[Vec<String>]) {
|
|||||||
.find(|[a, _]| a.to_lowercase().contains("points follower"))
|
.find(|[a, _]| a.to_lowercase().contains("points follower"))
|
||||||
.map(|[_, points]| (rank, points))
|
.map(|[_, points]| (rank, points))
|
||||||
});
|
});
|
||||||
let primary_role = all_time.iter().find_map(|[key, value]| {
|
let primary_role: DanceRole = all_time
|
||||||
|
.iter()
|
||||||
|
.find_map(|[key, value]| {
|
||||||
if !key.to_lowercase().contains("primary role") {
|
if !key.to_lowercase().contains("primary role") {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
Some(value)
|
Some(value)
|
||||||
|
})
|
||||||
|
.map(|arg0: &&std::string::String| DanceRole::from_str(arg0.as_str()).unwrap())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let ((rank, points), non_d) = match primary_role {
|
||||||
|
DanceRole::Leader => (leader_rank.unwrap(), follower_rank),
|
||||||
|
DanceRole::Follower => (follower_rank.unwrap(), leader_rank),
|
||||||
|
};
|
||||||
|
let dominant_comp = CompState {
|
||||||
|
points: points.parse().unwrap(),
|
||||||
|
rank: *rank,
|
||||||
|
};
|
||||||
|
let non_dominant_comp = non_d.map(|(rank, points)| CompState {
|
||||||
|
points: points.parse().unwrap(),
|
||||||
|
rank: *rank,
|
||||||
});
|
});
|
||||||
dbg!(leader_rank, follower_rank, primary_role);
|
Ok((primary_role, dominant_comp, non_dominant_comp))
|
||||||
|
|
||||||
// dbg!(chapters.collect::<Vec<_>>());
|
// dbg!(chapters.collect::<Vec<_>>());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_tables(html: &str) -> Vec<(String, Vec<Vec<String>>)> {
|
fn extract_tables(html: &str) -> Result<Vec<(String, Vec<Vec<String>>)>, ScoringParseError> {
|
||||||
let document = Html::parse_document(html);
|
let document = Html::parse_document(html);
|
||||||
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
|
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
|
||||||
|
|
||||||
document.select(&card_selector).map(parse_card).collect()
|
document.select(&card_selector).map(parse_card).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_info(html: &str) {
|
fn parse_info(html: &str) -> Result<DanceInfo, ScoringParseError> {
|
||||||
let tables = extract_tables(html);
|
let tables = extract_tables(html)?;
|
||||||
let details = &tables
|
let details = &tables
|
||||||
.iter()
|
.iter()
|
||||||
.find(|(v, _)| v.to_lowercase().contains("detail"))
|
.find(|(v, _)| v.to_lowercase().contains("detail"))
|
||||||
.unwrap()
|
.ok_or_else(|| ScoringParseError::ParseMismatch("detail card".to_owned()))?
|
||||||
.1;
|
.1;
|
||||||
let stats = &tables
|
let stats = &tables
|
||||||
.iter()
|
.iter()
|
||||||
.find(|(v, _)| v.to_lowercase().contains("stats"))
|
.find(|(v, _)| v.to_lowercase().contains("stats"))
|
||||||
.unwrap()
|
.ok_or_else(|| ScoringParseError::ParseMismatch("stats card".to_owned()))?
|
||||||
.1;
|
.1;
|
||||||
|
|
||||||
dbg!(parse_stats(&stats));
|
let (dominant_role, dominant_role_comp, non_dominant_role_comp) = parse_stats(stats)?;
|
||||||
dbg!(parse_details(&details));
|
let (firstname, lastname) = parse_details(details)?;
|
||||||
|
|
||||||
|
Ok(DanceInfo {
|
||||||
|
firstname,
|
||||||
|
lastname,
|
||||||
|
dominant_role,
|
||||||
|
dominant_role_comp,
|
||||||
|
non_dominant_role_comp,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_table() {
|
fn test_parse_table() {
|
||||||
parse_info(include_str!("../../polina.html"));
|
dbg!(parse_info(include_str!("../../polina.html")));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||||
@@ -146,6 +192,5 @@ pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceIn
|
|||||||
.await
|
.await
|
||||||
.map_err(DanceInfoError::Request)?;
|
.map_err(DanceInfoError::Request)?;
|
||||||
|
|
||||||
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?;
|
parse_info(response.text().await.unwrap().as_str()).map_err(DanceInfoError::HtmlParse)
|
||||||
Ok(x.into())
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user