Using html scoring dance to collect stats
This commit is contained in:
11
Cargo.lock
generated
11
Cargo.lock
generated
@@ -2098,6 +2098,15 @@ dependencies = [
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_plain"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "1.0.0"
|
||||
@@ -2320,7 +2329,7 @@ dependencies = [
|
||||
"reqwest",
|
||||
"scraper",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_plain",
|
||||
"thiserror 2.0.12",
|
||||
"tokio",
|
||||
"tracing",
|
||||
|
||||
@@ -19,7 +19,7 @@ rand = "0.9.2"
|
||||
reqwest = "0.12.22"
|
||||
scraper = "0.24.0"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = "1.0.145"
|
||||
serde_plain = "1.0.2"
|
||||
thiserror = "2.0.12"
|
||||
tokio = { version = "1.46.1", features = ["rt"] }
|
||||
tracing = { version = "0.1.41", default-features = false, features = ["std"] }
|
||||
|
||||
@@ -1,19 +1,10 @@
|
||||
use std::fmt;
|
||||
|
||||
#[derive(serde::Deserialize, Debug, PartialEq, Eq)]
|
||||
#[derive(serde::Deserialize, serde::Serialize, Debug, PartialEq, Eq)]
|
||||
pub enum DanceRole {
|
||||
Leader,
|
||||
Follower,
|
||||
}
|
||||
|
||||
impl DanceRole {
|
||||
pub const fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Self::Leader => "Leader",
|
||||
Self::Follower => "Follower",
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub const fn other(&self) -> Self {
|
||||
match self {
|
||||
@@ -23,29 +14,12 @@ impl DanceRole {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseDanceRoleError;
|
||||
serde_plain::derive_display_from_serialize!(DanceRole);
|
||||
serde_plain::derive_fromstr_from_deserialize!(DanceRole);
|
||||
|
||||
impl std::fmt::Display for ParseDanceRoleError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "failed to parse DanceRole")
|
||||
}
|
||||
}
|
||||
impl std::error::Error for ParseDanceRoleError {}
|
||||
|
||||
impl TryFrom<&str> for DanceRole {
|
||||
type Error = ParseDanceRoleError;
|
||||
|
||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||
match value.to_lowercase().as_str() {
|
||||
"leader" => Ok(Self::Leader),
|
||||
"follower" => Ok(Self::Follower),
|
||||
_ => Err(ParseDanceRoleError),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
||||
#[derive(
|
||||
serde::Serialize, serde::Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy,
|
||||
)]
|
||||
pub enum DanceRank {
|
||||
Newcomer,
|
||||
Novice,
|
||||
@@ -56,19 +30,8 @@ pub enum DanceRank {
|
||||
AllStars,
|
||||
Champions,
|
||||
}
|
||||
|
||||
impl DanceRank {
|
||||
pub const fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Self::Newcomer => "Newcomer",
|
||||
Self::Novice => "Novice",
|
||||
Self::Intermediate => "Intermediate",
|
||||
Self::Advanced => "Advanced",
|
||||
Self::AllStars => "All-Stars",
|
||||
Self::Champions => "Champions",
|
||||
}
|
||||
}
|
||||
}
|
||||
serde_plain::derive_display_from_serialize!(DanceRank);
|
||||
serde_plain::derive_fromstr_from_deserialize!(DanceRank);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CompState {
|
||||
|
||||
@@ -5,6 +5,10 @@
|
||||
clippy::multiple_crate_versions,
|
||||
reason = "Don't know how to fix this, should be fine"
|
||||
)]
|
||||
#![allow(
|
||||
clippy::future_not_send,
|
||||
reason = "Probably makes sense to fix this, I'll wait for mwbot to fix it"
|
||||
)]
|
||||
#![allow(
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_precision_loss,
|
||||
|
||||
@@ -11,24 +11,21 @@ pub enum InfoCompileError {
|
||||
pub fn page_from_info(info: DanceInfo) -> Result<Wikicode, InfoCompileError> {
|
||||
let mut params = IndexMap::new();
|
||||
params.insert("name".to_string(), info.name());
|
||||
params.insert(
|
||||
"dominant_role".to_string(),
|
||||
info.dominant_role.as_str().to_string(),
|
||||
);
|
||||
params.insert("dominant_role".to_string(), info.dominant_role.to_string());
|
||||
params.insert(
|
||||
"allowed_rank".to_string(),
|
||||
info.dominant_role_comp.rank.as_str().to_string(),
|
||||
info.dominant_role_comp.rank.to_string(),
|
||||
);
|
||||
params.insert(
|
||||
"dominant_rank".to_string(),
|
||||
info.dominant_role_comp.rank.as_str().to_string(),
|
||||
info.dominant_role_comp.rank.to_string(),
|
||||
);
|
||||
params.insert(
|
||||
"dominant_points".to_string(),
|
||||
info.dominant_role_comp.points.to_string(),
|
||||
);
|
||||
if let Some(u) = info.non_dominant_role_comp {
|
||||
params.insert("non_dominant_rank".to_string(), u.rank.as_str().to_string());
|
||||
params.insert("non_dominant_rank".to_string(), u.rank.to_string());
|
||||
params.insert("non_dominant_points".to_string(), u.points.to_string());
|
||||
}
|
||||
let t = Template::new("Template:WSDCBox", ¶ms)?;
|
||||
|
||||
@@ -3,11 +3,12 @@ use std::collections::HashMap;
|
||||
use crate::{
|
||||
app_signature,
|
||||
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
||||
worldsdc::scoringdance::fetch_wsdc_info_scoring_dance,
|
||||
};
|
||||
use reqwest::ClientBuilder;
|
||||
mod scoringdance;
|
||||
|
||||
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
pub async fn fetch_wsdc_info_wsdc(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
let client = ClientBuilder::new()
|
||||
.user_agent(app_signature())
|
||||
.build()
|
||||
@@ -36,6 +37,10 @@ pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
Ok(x.into())
|
||||
}
|
||||
|
||||
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
fetch_wsdc_info_scoring_dance(id).await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
|
||||
@@ -70,6 +75,8 @@ pub enum DanceInfoError {
|
||||
Request(reqwest::Error),
|
||||
#[error("Failed to parse response: {0}")]
|
||||
JsonParse(reqwest::Error),
|
||||
#[error("Failed to parse html: {0}")]
|
||||
HtmlParse(#[from] scoringdance::ScoringParseError),
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize, Debug)]
|
||||
|
||||
@@ -1,24 +1,36 @@
|
||||
use std::{collections, str::FromStr};
|
||||
use std::str::FromStr;
|
||||
|
||||
use reqwest::ClientBuilder;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
|
||||
use crate::{
|
||||
app_signature,
|
||||
dance_info::{DanceInfo, DanceRank},
|
||||
worldsdc::{DanceInfoError, DanceInfoParser},
|
||||
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
||||
worldsdc::DanceInfoError,
|
||||
};
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum ScoringParseError {
|
||||
#[error("Could not parse: {0}")]
|
||||
ParseMismatch(String),
|
||||
}
|
||||
|
||||
fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
|
||||
fn parse_card(t: ElementRef) -> Result<(String, Vec<Vec<String>>), ScoringParseError> {
|
||||
#[allow(clippy::unwrap_used)]
|
||||
let title_selector = Selector::parse("div.card-header").unwrap();
|
||||
#[allow(clippy::unwrap_used)]
|
||||
let table_selector = Selector::parse("div.card-body > table").unwrap();
|
||||
#[allow(clippy::unwrap_used)]
|
||||
let row_selector = Selector::parse("tr").unwrap();
|
||||
#[allow(clippy::unwrap_used)]
|
||||
let cell_selector = Selector::parse("th,td").unwrap();
|
||||
let table = t.select(&table_selector).next().unwrap();
|
||||
let table = t
|
||||
.select(&table_selector)
|
||||
.next()
|
||||
.ok_or_else(|| ScoringParseError::ParseMismatch("table".to_owned()))?;
|
||||
let title = t
|
||||
.select(&title_selector)
|
||||
.next()
|
||||
.unwrap()
|
||||
.ok_or_else(|| ScoringParseError::ParseMismatch("title".to_owned()))?
|
||||
.text()
|
||||
.collect::<Vec<_>>()
|
||||
.join("")
|
||||
@@ -33,10 +45,10 @@ fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
(title, parsed_table)
|
||||
Ok((title, parsed_table))
|
||||
}
|
||||
|
||||
fn parse_details(d: &[Vec<String>]) {
|
||||
fn parse_details(d: &[Vec<String>]) -> Result<(String, String), ScoringParseError> {
|
||||
let first_name_row = d.iter().find(|v| {
|
||||
v.first()
|
||||
.is_some_and(|v| v.to_lowercase().contains("first name"))
|
||||
@@ -45,12 +57,20 @@ fn parse_details(d: &[Vec<String>]) {
|
||||
v.first()
|
||||
.is_some_and(|v| v.to_lowercase().contains("last name"))
|
||||
});
|
||||
let first_name = first_name_row.unwrap().last().unwrap();
|
||||
let last_name = last_name_row.unwrap().last().unwrap();
|
||||
dbg!(first_name, last_name);
|
||||
let first_name = first_name_row
|
||||
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name".to_owned()))?
|
||||
.last()
|
||||
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name value".to_owned()))?;
|
||||
let last_name = last_name_row
|
||||
.ok_or_else(|| ScoringParseError::ParseMismatch("last_name".to_owned()))?
|
||||
.last()
|
||||
.ok_or_else(|| ScoringParseError::ParseMismatch("first_name value".to_owned()))?;
|
||||
Ok((first_name.clone(), last_name.clone()))
|
||||
}
|
||||
|
||||
fn parse_stats(d: &[Vec<String>]) {
|
||||
fn parse_stats(
|
||||
d: &[Vec<String>],
|
||||
) -> Result<(DanceRole, CompState, Option<CompState>), ScoringParseError> {
|
||||
let chapters = d.chunk_by(|_, b| b.len() != 1).map(|v| {
|
||||
let (a, b) = v.split_first().unwrap();
|
||||
let a = a.first().unwrap();
|
||||
@@ -66,7 +86,7 @@ fn parse_stats(d: &[Vec<String>]) {
|
||||
let mut sorted_chapters = rest
|
||||
.into_iter()
|
||||
.map(|(chapter, items)| {
|
||||
let rank: DanceRank = serde_json::from_str(chapter).map_err(|_| chapter.to_owned())?;
|
||||
let rank: DanceRank = serde_plain::from_str(chapter).map_err(|_| chapter.to_owned())?;
|
||||
Ok::<(DanceRank, Vec<[&String; 2]>), String>((rank, items))
|
||||
})
|
||||
.filter_map(|v| match v {
|
||||
@@ -91,43 +111,69 @@ fn parse_stats(d: &[Vec<String>]) {
|
||||
.find(|[a, _]| a.to_lowercase().contains("points follower"))
|
||||
.map(|[_, points]| (rank, points))
|
||||
});
|
||||
let primary_role = all_time.iter().find_map(|[key, value]| {
|
||||
let primary_role: DanceRole = all_time
|
||||
.iter()
|
||||
.find_map(|[key, value]| {
|
||||
if !key.to_lowercase().contains("primary role") {
|
||||
return None;
|
||||
}
|
||||
Some(value)
|
||||
})
|
||||
.map(|arg0: &&std::string::String| DanceRole::from_str(arg0.as_str()).unwrap())
|
||||
.unwrap();
|
||||
|
||||
let ((rank, points), non_d) = match primary_role {
|
||||
DanceRole::Leader => (leader_rank.unwrap(), follower_rank),
|
||||
DanceRole::Follower => (follower_rank.unwrap(), leader_rank),
|
||||
};
|
||||
let dominant_comp = CompState {
|
||||
points: points.parse().unwrap(),
|
||||
rank: *rank,
|
||||
};
|
||||
let non_dominant_comp = non_d.map(|(rank, points)| CompState {
|
||||
points: points.parse().unwrap(),
|
||||
rank: *rank,
|
||||
});
|
||||
dbg!(leader_rank, follower_rank, primary_role);
|
||||
Ok((primary_role, dominant_comp, non_dominant_comp))
|
||||
|
||||
// dbg!(chapters.collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
fn extract_tables(html: &str) -> Vec<(String, Vec<Vec<String>>)> {
|
||||
fn extract_tables(html: &str) -> Result<Vec<(String, Vec<Vec<String>>)>, ScoringParseError> {
|
||||
let document = Html::parse_document(html);
|
||||
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
|
||||
|
||||
document.select(&card_selector).map(parse_card).collect()
|
||||
}
|
||||
|
||||
fn parse_info(html: &str) {
|
||||
let tables = extract_tables(html);
|
||||
fn parse_info(html: &str) -> Result<DanceInfo, ScoringParseError> {
|
||||
let tables = extract_tables(html)?;
|
||||
let details = &tables
|
||||
.iter()
|
||||
.find(|(v, _)| v.to_lowercase().contains("detail"))
|
||||
.unwrap()
|
||||
.ok_or_else(|| ScoringParseError::ParseMismatch("detail card".to_owned()))?
|
||||
.1;
|
||||
let stats = &tables
|
||||
.iter()
|
||||
.find(|(v, _)| v.to_lowercase().contains("stats"))
|
||||
.unwrap()
|
||||
.ok_or_else(|| ScoringParseError::ParseMismatch("stats card".to_owned()))?
|
||||
.1;
|
||||
|
||||
dbg!(parse_stats(&stats));
|
||||
dbg!(parse_details(&details));
|
||||
let (dominant_role, dominant_role_comp, non_dominant_role_comp) = parse_stats(stats)?;
|
||||
let (firstname, lastname) = parse_details(details)?;
|
||||
|
||||
Ok(DanceInfo {
|
||||
firstname,
|
||||
lastname,
|
||||
dominant_role,
|
||||
dominant_role_comp,
|
||||
non_dominant_role_comp,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_table() {
|
||||
parse_info(include_str!("../../polina.html"));
|
||||
dbg!(parse_info(include_str!("../../polina.html")));
|
||||
}
|
||||
|
||||
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
@@ -146,6 +192,5 @@ pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceIn
|
||||
.await
|
||||
.map_err(DanceInfoError::Request)?;
|
||||
|
||||
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?;
|
||||
Ok(x.into())
|
||||
parse_info(response.text().await.unwrap().as_str()).map_err(DanceInfoError::HtmlParse)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user