Worked on table parsing
This commit is contained in:
@@ -36,8 +36,8 @@ impl TryFrom<&str> for DanceRole {
|
|||||||
|
|
||||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||||
match value.to_lowercase().as_str() {
|
match value.to_lowercase().as_str() {
|
||||||
"leader" => Ok(DanceRole::Leader),
|
"leader" => Ok(Self::Leader),
|
||||||
"follower" => Ok(DanceRole::Follower),
|
"follower" => Ok(Self::Follower),
|
||||||
_ => Err(ParseDanceRoleError),
|
_ => Err(ParseDanceRoleError),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use reqwest::ClientBuilder;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
app_signature,
|
app_signature,
|
||||||
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
||||||
};
|
};
|
||||||
|
use reqwest::ClientBuilder;
|
||||||
|
mod scoringdance;
|
||||||
|
|
||||||
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||||
let client = ClientBuilder::new()
|
let client = ClientBuilder::new()
|
||||||
@@ -36,75 +36,6 @@ pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
|||||||
Ok(x.into())
|
Ok(x.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
use scraper::{Html, Selector};
|
|
||||||
|
|
||||||
fn extract_tables(html: &str) -> Vec<(String, Vec<(String, String)>)> {
|
|
||||||
let document = Html::parse_document(html);
|
|
||||||
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
|
|
||||||
let title_selector = Selector::parse("div.card-header").unwrap();
|
|
||||||
let table_selector = Selector::parse("div.card-body > table").unwrap();
|
|
||||||
let row_selector = Selector::parse("tr").unwrap();
|
|
||||||
let header_selector = Selector::parse("th").unwrap();
|
|
||||||
let cell_selector = Selector::parse("td").unwrap();
|
|
||||||
|
|
||||||
let mut pairs = Vec::new();
|
|
||||||
|
|
||||||
for card in document.select(&card_selector) {
|
|
||||||
let table = card.select(&table_selector).next().unwrap();
|
|
||||||
let title = card
|
|
||||||
.select(&title_selector)
|
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.text()
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join("sep")
|
|
||||||
.trim()
|
|
||||||
.to_owned();
|
|
||||||
|
|
||||||
for row in table.select(&row_selector) {
|
|
||||||
let header = row.select(&header_selector).next();
|
|
||||||
let cell = row.select(&cell_selector).next();
|
|
||||||
|
|
||||||
if let (Some(h), Some(c)) = (header, cell) {
|
|
||||||
let key = h.text().collect::<String>().trim().to_string();
|
|
||||||
let value = c.text().collect::<String>().trim().to_string();
|
|
||||||
pairs.push((key, value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_table(html: &str) {
|
|
||||||
let tables = extract_tables(html);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_table() {
|
|
||||||
parse_table(include_str!("../../polina.html"));
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
|
||||||
let client = ClientBuilder::new()
|
|
||||||
.user_agent(app_signature())
|
|
||||||
.build()
|
|
||||||
.map_err(DanceInfoError::ClientBuild)?;
|
|
||||||
|
|
||||||
let url = format!("https://scoring.dance/enUS/wsdc/registry/{id}.html");
|
|
||||||
let request = client
|
|
||||||
.request(reqwest::Method::GET, url)
|
|
||||||
.build()
|
|
||||||
.map_err(DanceInfoError::RequestBuild)?;
|
|
||||||
let response = client
|
|
||||||
.execute(request)
|
|
||||||
.await
|
|
||||||
.map_err(DanceInfoError::Request)?;
|
|
||||||
|
|
||||||
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?;
|
|
||||||
Ok(x.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
|
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
|
||||||
|
|||||||
103
src/worldsdc/scoringdance.rs
Normal file
103
src/worldsdc/scoringdance.rs
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
use reqwest::ClientBuilder;
|
||||||
|
use scraper::{ElementRef, Html, Selector};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
app_signature,
|
||||||
|
dance_info::DanceInfo,
|
||||||
|
worldsdc::{DanceInfoError, DanceInfoParser},
|
||||||
|
};
|
||||||
|
|
||||||
|
fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
|
||||||
|
let title_selector = Selector::parse("div.card-header").unwrap();
|
||||||
|
let table_selector = Selector::parse("div.card-body > table").unwrap();
|
||||||
|
let row_selector = Selector::parse("tr").unwrap();
|
||||||
|
let cell_selector = Selector::parse("th,td").unwrap();
|
||||||
|
let table = t.select(&table_selector).next().unwrap();
|
||||||
|
let title = t
|
||||||
|
.select(&title_selector)
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.text()
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("")
|
||||||
|
.trim()
|
||||||
|
.to_owned();
|
||||||
|
|
||||||
|
let parsed_table = table
|
||||||
|
.select(&row_selector)
|
||||||
|
.map(|row| {
|
||||||
|
row.select(&cell_selector)
|
||||||
|
.map(|v| v.text().collect::<String>().trim().to_string())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
(title, parsed_table)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_details(d: &[Vec<String>]) {
|
||||||
|
let first_name_row = d.iter().find(|v| {
|
||||||
|
v.first()
|
||||||
|
.is_some_and(|v| v.to_lowercase().contains("first name"))
|
||||||
|
});
|
||||||
|
let last_name_row = d.iter().find(|v| {
|
||||||
|
v.first()
|
||||||
|
.is_some_and(|v| v.to_lowercase().contains("last name"))
|
||||||
|
});
|
||||||
|
let first_name = first_name_row.unwrap().last().unwrap();
|
||||||
|
let last_name = last_name_row.unwrap().last().unwrap();
|
||||||
|
dbg!(first_name, last_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_stats(d: &[Vec<String>]) {
|
||||||
|
let chapters = d.chunk_by(|_, b| b.len() != 1);
|
||||||
|
dbg!(chapters.collect::<Vec<_>>());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_tables(html: &str) -> Vec<(String, Vec<Vec<String>>)> {
|
||||||
|
let document = Html::parse_document(html);
|
||||||
|
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
|
||||||
|
|
||||||
|
document.select(&card_selector).map(parse_card).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_info(html: &str) {
|
||||||
|
let tables = extract_tables(html);
|
||||||
|
let details = &tables
|
||||||
|
.iter()
|
||||||
|
.find(|(v, _)| v.to_lowercase().contains("detail"))
|
||||||
|
.unwrap()
|
||||||
|
.1;
|
||||||
|
let stats = &tables
|
||||||
|
.iter()
|
||||||
|
.find(|(v, _)| v.to_lowercase().contains("stats"))
|
||||||
|
.unwrap()
|
||||||
|
.1;
|
||||||
|
|
||||||
|
dbg!(parse_stats(&stats));
|
||||||
|
dbg!(parse_details(&details));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_table() {
|
||||||
|
parse_info(include_str!("../../polina.html"));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||||
|
let client = ClientBuilder::new()
|
||||||
|
.user_agent(app_signature())
|
||||||
|
.build()
|
||||||
|
.map_err(DanceInfoError::ClientBuild)?;
|
||||||
|
|
||||||
|
let url = format!("https://scoring.dance/enUS/wsdc/registry/{id}.html");
|
||||||
|
let request = client
|
||||||
|
.request(reqwest::Method::GET, url)
|
||||||
|
.build()
|
||||||
|
.map_err(DanceInfoError::RequestBuild)?;
|
||||||
|
let response = client
|
||||||
|
.execute(request)
|
||||||
|
.await
|
||||||
|
.map_err(DanceInfoError::Request)?;
|
||||||
|
|
||||||
|
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?;
|
||||||
|
Ok(x.into())
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user