Worked on html scraping
This commit is contained in:
@@ -7,7 +7,6 @@ use crate::{
|
||||
dance_info::{CompState, DanceInfo, DanceRank, DanceRole},
|
||||
};
|
||||
|
||||
// mod caching;
|
||||
pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
let client = ClientBuilder::new()
|
||||
.user_agent(app_signature())
|
||||
@@ -37,6 +36,75 @@ pub async fn fetch_wsdc_info(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
Ok(x.into())
|
||||
}
|
||||
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
fn extract_tables(html: &str) -> Vec<(String, Vec<(String, String)>)> {
|
||||
let document = Html::parse_document(html);
|
||||
let card_selector = Selector::parse("div:has( > div.card-header)").unwrap();
|
||||
let title_selector = Selector::parse("div.card-header").unwrap();
|
||||
let table_selector = Selector::parse("div.card-body > table").unwrap();
|
||||
let row_selector = Selector::parse("tr").unwrap();
|
||||
let header_selector = Selector::parse("th").unwrap();
|
||||
let cell_selector = Selector::parse("td").unwrap();
|
||||
|
||||
let mut pairs = Vec::new();
|
||||
|
||||
for card in document.select(&card_selector) {
|
||||
let table = card.select(&table_selector).next().unwrap();
|
||||
let title = card
|
||||
.select(&title_selector)
|
||||
.next()
|
||||
.unwrap()
|
||||
.text()
|
||||
.collect::<Vec<_>>()
|
||||
.join("sep")
|
||||
.trim()
|
||||
.to_owned();
|
||||
|
||||
for row in table.select(&row_selector) {
|
||||
let header = row.select(&header_selector).next();
|
||||
let cell = row.select(&cell_selector).next();
|
||||
|
||||
if let (Some(h), Some(c)) = (header, cell) {
|
||||
let key = h.text().collect::<String>().trim().to_string();
|
||||
let value = c.text().collect::<String>().trim().to_string();
|
||||
pairs.push((key, value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn parse_table(html: &str) {
|
||||
let tables = extract_tables(html);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_table() {
|
||||
parse_table(include_str!("../../polina.html"));
|
||||
}
|
||||
|
||||
pub async fn fetch_wsdc_info_scoring_dance(id: u32) -> Result<DanceInfo, DanceInfoError> {
|
||||
let client = ClientBuilder::new()
|
||||
.user_agent(app_signature())
|
||||
.build()
|
||||
.map_err(DanceInfoError::ClientBuild)?;
|
||||
|
||||
let url = format!("https://scoring.dance/enUS/wsdc/registry/{id}.html");
|
||||
let request = client
|
||||
.request(reqwest::Method::GET, url)
|
||||
.build()
|
||||
.map_err(DanceInfoError::RequestBuild)?;
|
||||
let response = client
|
||||
.execute(request)
|
||||
.await
|
||||
.map_err(DanceInfoError::Request)?;
|
||||
|
||||
let x: DanceInfoParser = response.json().await.map_err(DanceInfoError::JsonParse)?;
|
||||
Ok(x.into())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used, reason = "Allow unwrap in tests")]
|
||||
|
||||
Reference in New Issue
Block a user