Made parsoid work
This commit is contained in:
856
Cargo.lock
generated
856
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -4,5 +4,5 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
mwbot = "0.6.1"
|
mwbot = { git = "https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git", rev = "05cbb12188f18e2da710de158d89a9a4f1b42689" }
|
||||||
tokio = { version = "1.46.1", features = ["rt", "rt-multi-thread", "macros"] }
|
tokio = { version = "1.46.1", features = ["rt", "rt-multi-thread", "macros"] }
|
||||||
|
|||||||
31
src/main.rs
31
src/main.rs
@@ -1,13 +1,16 @@
|
|||||||
use mwbot::{
|
use mwbot::{
|
||||||
Bot,
|
Bot,
|
||||||
generators::{CategoryMemberSort, Generator, SortDirection},
|
generators::{Generator, SortDirection, categories::CategoryMemberSort},
|
||||||
parsoid::{self, WikinodeIterator},
|
|
||||||
};
|
};
|
||||||
use std::{error::Error, path::Path};
|
use std::{error::Error, path::Path};
|
||||||
|
|
||||||
|
use crate::old_style::get_description;
|
||||||
|
|
||||||
|
mod old_style;
|
||||||
|
|
||||||
fn list_teacher_pages(bot: &Bot) -> tokio::sync::mpsc::Receiver<Result<mwbot::Page, mwbot::Error>> {
|
fn list_teacher_pages(bot: &Bot) -> tokio::sync::mpsc::Receiver<Result<mwbot::Page, mwbot::Error>> {
|
||||||
let category_title = "Category:Teachers";
|
let category_title = "Category:Teachers";
|
||||||
let pages = mwbot::generators::CategoryMembers::new(category_title)
|
let pages = mwbot::generators::categories::CategoryMembers::new(category_title)
|
||||||
.dir(SortDirection::Descending)
|
.dir(SortDirection::Descending)
|
||||||
.sort(CategoryMemberSort::Timestamp);
|
.sort(CategoryMemberSort::Timestamp);
|
||||||
|
|
||||||
@@ -17,18 +20,18 @@ fn list_teacher_pages(bot: &Bot) -> tokio::sync::mpsc::Receiver<Result<mwbot::Pa
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn print_teachers(bot: &Bot) {
|
async fn print_teachers(bot: &Bot) {
|
||||||
while let Some(page) = list_teacher_pages(bot).recv().await {
|
let mut v = list_teacher_pages(bot);
|
||||||
|
while let Some(page) = v.recv().await {
|
||||||
let p = page.unwrap();
|
let p = page.unwrap();
|
||||||
println!("- {}", p.as_title().dbkey());
|
println!(
|
||||||
print_wsdc_id(bot, &p).await;
|
"- {} [{}]",
|
||||||
}
|
p.as_title().dbkey(),
|
||||||
}
|
old_style::get_wsdc_id(bot, &p)
|
||||||
|
.await
|
||||||
async fn print_wsdc_id(bot: &Bot, page: &mwbot::Page) {
|
.map(|x| x.to_string())
|
||||||
println!("{}", page.wikitext().await.unwrap());
|
.unwrap_or("Unknown".to_string())
|
||||||
let x = page.html().await.unwrap().into_mutable();
|
);
|
||||||
for w in &x.filter_external_links() {
|
get_description(bot, &p).await;
|
||||||
println!("{}", w.text_contents());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
47
src/old_style.rs
Normal file
47
src/old_style.rs
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
use mwbot::{Bot, parsoid::WikinodeIterator as _};
|
||||||
|
|
||||||
|
fn extract_number_from_url(url: &str) -> Option<u32> {
|
||||||
|
// Split the URL into parts using '/'
|
||||||
|
let parts: Vec<&str> = url.split('/').collect();
|
||||||
|
|
||||||
|
// Iterate over the parts to find the one that ends with ".html"
|
||||||
|
for part in parts {
|
||||||
|
if part.ends_with(".html") {
|
||||||
|
// Remove the ".html" suffix
|
||||||
|
let number_str = part.trim_end_matches(".html");
|
||||||
|
// Parse the remaining string into an integer
|
||||||
|
return number_str.parse().ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_wsdc_id(bot: &Bot, page: &mwbot::Page) -> Option<u32> {
|
||||||
|
let x = page.html().await.unwrap().into_mutable();
|
||||||
|
for w in &x.filter_external_links() {
|
||||||
|
if let Some(id) = extract_number_from_url(&w.target()) {
|
||||||
|
return Some(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_description(bot: &Bot, page: &mwbot::Page) -> Option<String> {
|
||||||
|
let x = page.html().await.unwrap().into_mutable();
|
||||||
|
for w in &x.iter_sections() {
|
||||||
|
dbg!(w);
|
||||||
|
let Some(h) = w.heading() else {
|
||||||
|
println!("No heading");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
dbg!(&h);
|
||||||
|
let Some(t) = h.as_text() else {
|
||||||
|
println!("No text");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
dbg!(t.borrow());
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user