Made parsoid work
This commit is contained in:
856
Cargo.lock
generated
856
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -4,5 +4,5 @@ version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
mwbot = "0.6.1"
|
||||
mwbot = { git = "https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git", rev = "05cbb12188f18e2da710de158d89a9a4f1b42689" }
|
||||
tokio = { version = "1.46.1", features = ["rt", "rt-multi-thread", "macros"] }
|
||||
|
||||
31
src/main.rs
31
src/main.rs
@@ -1,13 +1,16 @@
|
||||
use mwbot::{
|
||||
Bot,
|
||||
generators::{CategoryMemberSort, Generator, SortDirection},
|
||||
parsoid::{self, WikinodeIterator},
|
||||
generators::{Generator, SortDirection, categories::CategoryMemberSort},
|
||||
};
|
||||
use std::{error::Error, path::Path};
|
||||
|
||||
use crate::old_style::get_description;
|
||||
|
||||
mod old_style;
|
||||
|
||||
fn list_teacher_pages(bot: &Bot) -> tokio::sync::mpsc::Receiver<Result<mwbot::Page, mwbot::Error>> {
|
||||
let category_title = "Category:Teachers";
|
||||
let pages = mwbot::generators::CategoryMembers::new(category_title)
|
||||
let pages = mwbot::generators::categories::CategoryMembers::new(category_title)
|
||||
.dir(SortDirection::Descending)
|
||||
.sort(CategoryMemberSort::Timestamp);
|
||||
|
||||
@@ -17,18 +20,18 @@ fn list_teacher_pages(bot: &Bot) -> tokio::sync::mpsc::Receiver<Result<mwbot::Pa
|
||||
}
|
||||
|
||||
async fn print_teachers(bot: &Bot) {
|
||||
while let Some(page) = list_teacher_pages(bot).recv().await {
|
||||
let mut v = list_teacher_pages(bot);
|
||||
while let Some(page) = v.recv().await {
|
||||
let p = page.unwrap();
|
||||
println!("- {}", p.as_title().dbkey());
|
||||
print_wsdc_id(bot, &p).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn print_wsdc_id(bot: &Bot, page: &mwbot::Page) {
|
||||
println!("{}", page.wikitext().await.unwrap());
|
||||
let x = page.html().await.unwrap().into_mutable();
|
||||
for w in &x.filter_external_links() {
|
||||
println!("{}", w.text_contents());
|
||||
println!(
|
||||
"- {} [{}]",
|
||||
p.as_title().dbkey(),
|
||||
old_style::get_wsdc_id(bot, &p)
|
||||
.await
|
||||
.map(|x| x.to_string())
|
||||
.unwrap_or("Unknown".to_string())
|
||||
);
|
||||
get_description(bot, &p).await;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
47
src/old_style.rs
Normal file
47
src/old_style.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
use mwbot::{Bot, parsoid::WikinodeIterator as _};
|
||||
|
||||
fn extract_number_from_url(url: &str) -> Option<u32> {
|
||||
// Split the URL into parts using '/'
|
||||
let parts: Vec<&str> = url.split('/').collect();
|
||||
|
||||
// Iterate over the parts to find the one that ends with ".html"
|
||||
for part in parts {
|
||||
if part.ends_with(".html") {
|
||||
// Remove the ".html" suffix
|
||||
let number_str = part.trim_end_matches(".html");
|
||||
// Parse the remaining string into an integer
|
||||
return number_str.parse().ok();
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn get_wsdc_id(bot: &Bot, page: &mwbot::Page) -> Option<u32> {
|
||||
let x = page.html().await.unwrap().into_mutable();
|
||||
for w in &x.filter_external_links() {
|
||||
if let Some(id) = extract_number_from_url(&w.target()) {
|
||||
return Some(id);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn get_description(bot: &Bot, page: &mwbot::Page) -> Option<String> {
|
||||
let x = page.html().await.unwrap().into_mutable();
|
||||
for w in &x.iter_sections() {
|
||||
dbg!(w);
|
||||
let Some(h) = w.heading() else {
|
||||
println!("No heading");
|
||||
continue;
|
||||
};
|
||||
dbg!(&h);
|
||||
let Some(t) = h.as_text() else {
|
||||
println!("No text");
|
||||
continue;
|
||||
};
|
||||
|
||||
dbg!(t.borrow());
|
||||
}
|
||||
None
|
||||
}
|
||||
Reference in New Issue
Block a user