Worked on table parsing
This commit is contained in:
@@ -36,8 +36,8 @@ impl TryFrom<&str> for DanceRole {
|
||||
|
||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||
match value.to_lowercase().as_str() {
|
||||
"leader" => Ok(DanceRole::Leader),
|
||||
"follower" => Ok(DanceRole::Follower),
|
||||
"leader" => Ok(Self::Leader),
|
||||
"follower" => Ok(Self::Follower),
|
||||
_ => Err(ParseDanceRoleError),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use reqwest::ClientBuilder;
|
||||
|
||||
use crate::{
|
||||
app_signature,
|
||||
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
||||
};
|
||||
use reqwest::ClientBuilder;
|
||||
mod scoringdance;
|
||||
|
||||
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
let client = ClientBuilder::new()
|
||||
@@ -36,75 +36,6 @@ pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
Ok(x.into())
|
||||
}
|
||||
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
fn extract_tables(html: &str) -> Vec<(String, Vec<(String, String)>)> {
|
||||
let document = Html::parse_document(html);
|
||||
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
|
||||
let title_selector = Selector::parse("div.card-header").unwrap();
|
||||
let table_selector = Selector::parse("div.card-body > table").unwrap();
|
||||
let row_selector = Selector::parse("tr").unwrap();
|
||||
let header_selector = Selector::parse("th").unwrap();
|
||||
let cell_selector = Selector::parse("td").unwrap();
|
||||
|
||||
let mut pairs = Vec::new();
|
||||
|
||||
for card in document.select(&card_selector) {
|
||||
let table = card.select(&table_selector).next().unwrap();
|
||||
let title = card
|
||||
.select(&title_selector)
|
||||
.next()
|
||||
.unwrap()
|
||||
.text()
|
||||
.collect::<Vec<_>>()
|
||||
.join("sep")
|
||||
.trim()
|
||||
.to_owned();
|
||||
|
||||
for row in table.select(&row_selector) {
|
||||
let header = row.select(&header_selector).next();
|
||||
let cell = row.select(&cell_selector).next();
|
||||
|
||||
if let (Some(h), Some(c)) = (header, cell) {
|
||||
let key = h.text().collect::<String>().trim().to_string();
|
||||
let value = c.text().collect::<String>().trim().to_string();
|
||||
pairs.push((key, value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn parse_table(html: &str) {
|
||||
let tables = extract_tables(html);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_table() {
|
||||
parse_table(include_str!("../../polina.html"));
|
||||
}
|
||||
|
||||
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
let client = ClientBuilder::new()
|
||||
.user_agent(app_signature())
|
||||
.build()
|
||||
.map_err(DanceInfoError::ClientBuild)?;
|
||||
|
||||
let url = format!("https://scoring.dance/enUS/wsdc/registry/{id}.html");
|
||||
let request = client
|
||||
.request(reqwest::Method::GET, url)
|
||||
.build()
|
||||
.map_err(DanceInfoError::RequestBuild)?;
|
||||
let response = client
|
||||
.execute(request)
|
||||
.await
|
||||
.map_err(DanceInfoError::Request)?;
|
||||
|
||||
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?;
|
||||
Ok(x.into())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
|
||||
|
||||
103
src/worldsdc/scoringdance.rs
Normal file
103
src/worldsdc/scoringdance.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use reqwest::ClientBuilder;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
|
||||
use crate::{
|
||||
app_signature,
|
||||
dance_info::DanceInfo,
|
||||
worldsdc::{DanceInfoError, DanceInfoParser},
|
||||
};
|
||||
|
||||
fn parse_card(t: ElementRef) -> (String, Vec<Vec<String>>) {
|
||||
let title_selector = Selector::parse("div.card-header").unwrap();
|
||||
let table_selector = Selector::parse("div.card-body > table").unwrap();
|
||||
let row_selector = Selector::parse("tr").unwrap();
|
||||
let cell_selector = Selector::parse("th,td").unwrap();
|
||||
let table = t.select(&table_selector).next().unwrap();
|
||||
let title = t
|
||||
.select(&title_selector)
|
||||
.next()
|
||||
.unwrap()
|
||||
.text()
|
||||
.collect::<Vec<_>>()
|
||||
.join("")
|
||||
.trim()
|
||||
.to_owned();
|
||||
|
||||
let parsed_table = table
|
||||
.select(&row_selector)
|
||||
.map(|row| {
|
||||
row.select(&cell_selector)
|
||||
.map(|v| v.text().collect::<String>().trim().to_string())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
(title, parsed_table)
|
||||
}
|
||||
|
||||
fn parse_details(d: &[Vec<String>]) {
|
||||
let first_name_row = d.iter().find(|v| {
|
||||
v.first()
|
||||
.is_some_and(|v| v.to_lowercase().contains("first name"))
|
||||
});
|
||||
let last_name_row = d.iter().find(|v| {
|
||||
v.first()
|
||||
.is_some_and(|v| v.to_lowercase().contains("last name"))
|
||||
});
|
||||
let first_name = first_name_row.unwrap().last().unwrap();
|
||||
let last_name = last_name_row.unwrap().last().unwrap();
|
||||
dbg!(first_name, last_name);
|
||||
}
|
||||
|
||||
fn parse_stats(d: &[Vec<String>]) {
|
||||
let chapters = d.chunk_by(|_, b| b.len() != 1);
|
||||
dbg!(chapters.collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
fn extract_tables(html: &str) -> Vec<(String, Vec<Vec<String>>)> {
|
||||
let document = Html::parse_document(html);
|
||||
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
|
||||
|
||||
document.select(&card_selector).map(parse_card).collect()
|
||||
}
|
||||
|
||||
fn parse_info(html: &str) {
|
||||
let tables = extract_tables(html);
|
||||
let details = &tables
|
||||
.iter()
|
||||
.find(|(v, _)| v.to_lowercase().contains("detail"))
|
||||
.unwrap()
|
||||
.1;
|
||||
let stats = &tables
|
||||
.iter()
|
||||
.find(|(v, _)| v.to_lowercase().contains("stats"))
|
||||
.unwrap()
|
||||
.1;
|
||||
|
||||
dbg!(parse_stats(&stats));
|
||||
dbg!(parse_details(&details));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_table() {
|
||||
parse_info(include_str!("../../polina.html"));
|
||||
}
|
||||
|
||||
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
let client = ClientBuilder::new()
|
||||
.user_agent(app_signature())
|
||||
.build()
|
||||
.map_err(DanceInfoError::ClientBuild)?;
|
||||
|
||||
let url = format!("https://scoring.dance/enUS/wsdc/registry/{id}.html");
|
||||
let request = client
|
||||
.request(reqwest::Method::GET, url)
|
||||
.build()
|
||||
.map_err(DanceInfoError::RequestBuild)?;
|
||||
let response = client
|
||||
.execute(request)
|
||||
.await
|
||||
.map_err(DanceInfoError::Request)?;
|
||||
|
||||
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?;
|
||||
Ok(x.into())
|
||||
}
|
||||
Reference in New Issue
Block a user