diff --git a/Cargo.lock b/Cargo.lock index 6a4fdfd..2ed79e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,6 +54,12 @@ dependencies = [ "tokio", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.5.0" @@ -195,6 +201,16 @@ dependencies = [ "url", ] +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -261,6 +277,8 @@ name = "dancing-bot-teachers" version = "0.1.0" dependencies = [ "mwbot", + "reqwest", + "serde", "tokio", ] @@ -451,12 +469,37 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "flate2" version = "1.1.2" @@ -473,6 +516,21 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -631,6 +689,25 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "h2" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17da50a276f1e01e0ba6c029e47b7100754904ee8a278f886546e98575380785" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.10.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -712,6 +789,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2", "http", "http-body", "httparse", @@ -739,6 +817,22 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.15" @@ -758,9 +852,11 @@ dependencies = [ "percent-encoding", "pin-project-lite", "socket2", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -1023,6 +1119,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "litemap" version = "0.8.0" @@ -1228,6 +1330,23 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "native-tls" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -1280,6 +1399,50 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "openssl" +version = "0.10.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" +dependencies = [ + "bitflags 2.9.1", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "openssl-sys" +version = "0.9.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -1473,6 +1636,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "potential_utf" version = "0.1.2" @@ -1811,17 +1980,22 @@ dependencies = [ "bytes", "cookie", "cookie_store", + "encoding_rs", "futures-core", "futures-util", + "h2", "http", "http-body", "http-body-util", "hyper", "hyper-rustls", + "hyper-tls", "hyper-util", "js-sys", "log", + "mime", "mime_guess", + "native-tls", "percent-encoding", "pin-project-lite", "quinn", @@ -1832,6 +2006,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", + "tokio-native-tls", "tokio-rustls", "tokio-util", "tower", @@ -1880,6 +2055,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +dependencies = [ + "bitflags 2.9.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + [[package]] name = "rustls" version = "0.23.29" @@ -1936,12 +2124,44 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.9.1", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "selectors" version = "0.22.0" @@ -2197,6 +2417,40 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags 2.9.1", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "tendril" version = "0.4.3" @@ -2348,6 +2602,16 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.2" @@ -2594,6 +2858,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -2831,6 +3101,17 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-registry" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1da3e436dc7653dfdf3da67332e22bff09bb0e28b0239e1624499c7830842e" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + [[package]] name = "windows-result" version = "0.3.4" diff --git a/Cargo.toml b/Cargo.toml index 254d137..89ea4a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,6 @@ edition = "2024" [dependencies] mwbot = { git = "https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git", rev = "05cbb12188f18e2da710de158d89a9a4f1b42689" } +reqwest = "0.12.22" +serde = { version = "1.0.219", features = ["derive"] } tokio = { version = "1.46.1", features = ["rt", "rt-multi-thread", "macros"] } diff --git a/src/main.rs b/src/main.rs index 7e6236e..9e0c68e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,13 @@ use mwbot::{ Bot, - generators::{Generator, SortDirection, categories::CategoryMemberSort}, + generators::{ + Generator, SortDirection, categories::CategoryMemberSort, querypage::QueryPage, + search::Search, + }, }; -use std::{error::Error, path::Path}; - -use crate::old_style::get_description; +use reqwest::RequestBuilder; +use serde::{Deserialize, Deserializer}; +use std::{collections::HashMap, error::Error, path::Path}; mod old_style; @@ -19,20 +22,115 @@ fn list_teacher_pages(bot: &Bot) -> tokio::sync::mpsc::Receiver Vec { + let mut gene = QueryPage::new("Wantedpages").generate(bot); + let mut result = vec![]; + while let Some(x) = gene.recv().await { + let p = match x { + Ok(p) => p, + Err(e) => { + eprintln!("Could not get search result: {e}"); + continue; + } + }; + if let Ok(n) = parse_wsdc_page_name(p.title()) { + result.push(n); + } } + result +} + +fn parse_wsdc_page_name(name: &str) -> Result { + if !name.starts_with("WSDC/") { + eprintln!("{name} is a wrong match"); + return Err(()); + } + match name.trim_start_matches("WSDC/").parse::() { + Ok(n) => Ok(n), + Err(e) => { + eprintln!("Page {name} does not fit: {e}"); + Err(()) + } + } +} + +async fn index_wsdc_ids(bot: &Bot) -> Vec { + let mut gene = Search::new("WSDC/").generate(bot); + let mut result = vec![]; + while let Some(x) = gene.recv().await { + let p = match x { + Ok(p) => p, + Err(e) => { + eprintln!("Could not get search result: {e}"); + continue; + } + }; + if let Ok(n) = parse_wsdc_page_name(&p.title()) { + result.push(n); + } + } + result } #[tokio::main] @@ -44,8 +142,11 @@ async fn main() -> Result<(), Box> { return Ok(()); } }; - print_teachers(&bot).await; - + // dbg!(index_wsdc_ids(&bot).await); + // dbg!(wanted_ids(&bot).await); + // fetch_wsdc_info(1234).await; + // fetch_wsdc_info(1010).await; + fetch_wsdc_info(18080).await; Ok(()) // // Monitor changes on these pages diff --git a/src/old_style.rs b/src/old_style.rs index e83c430..2c9f21a 100644 --- a/src/old_style.rs +++ b/src/old_style.rs @@ -1,4 +1,8 @@ -use mwbot::{Bot, parsoid::WikinodeIterator as _}; +use mwbot::{ + parsoid::{self, map::IndexMap, Wikicode, WikinodeIterator}, Bot, SaveOptions +}; + +use crate::list_teacher_pages; fn extract_number_from_url(url: &str) -> Option { // Split the URL into parts using '/' @@ -17,7 +21,60 @@ fn extract_number_from_url(url: &str) -> Option { None } -pub async fn get_wsdc_id(bot: &Bot, page: &mwbot::Page) -> Option { +async fn convert_teachers(bot: &Bot) { + let mut v = list_teacher_pages(bot); + while let Some(page) = v.recv().await { + let p = page.unwrap(); + println!( + "- {} [{}]", + p.as_title().dbkey(), + crate::old_style::get_wsdc_id(&p) + .await + .map(|x| x.to_string()) + .unwrap_or("Unknown".to_string()) + ); + let Some(os) = convert_old_style(bot, &p).await else { + continue; + }; + + p.save( + os, + &SaveOptions::summary("Converted teacher profile to template usage") + .mark_as_bot(true) + .mark_as_minor(false), + ) + .await + .unwrap(); + } +} + +pub async fn convert_old_style(bot: &Bot, page: &mwbot::Page) -> Option { + if !is_old_style(page).await { + eprintln!("Not old style: {}", page.title()); + return None; + } + let mut params = IndexMap::new(); + if let Some(id) = get_wsdc_id(page).await { + params.insert("wsdc_id".to_owned(), id.to_string()); + } + if let Some(description) = get_description(page).await { + params.insert("description".to_owned(), description); + } + let t = parsoid::Template::new("Template:Teacher", ¶ms).unwrap(); + let result = Wikicode::new(""); + result.append(&t); + Some(result) +} + +pub async fn is_old_style(page: &mwbot::Page) -> bool { + let x = page.html().await.unwrap().into_mutable(); + !x.filter_templates() + .unwrap() + .iter() + .any(|x| x.name() == "Template:Teacher") +} + +pub async fn get_wsdc_id(page: &mwbot::Page) -> Option { let x = page.html().await.unwrap().into_mutable(); for w in &x.filter_external_links() { if let Some(id) = extract_number_from_url(&w.target()) { @@ -27,21 +84,7 @@ pub async fn get_wsdc_id(bot: &Bot, page: &mwbot::Page) -> Option { None } -pub async fn get_description(bot: &Bot, page: &mwbot::Page) -> Option { +pub async fn get_description(page: &mwbot::Page) -> Option { let x = page.html().await.unwrap().into_mutable(); - for w in &x.iter_sections() { - dbg!(w); - let Some(h) = w.heading() else { - println!("No heading"); - continue; - }; - dbg!(&h); - let Some(t) = h.as_text() else { - println!("No text"); - continue; - }; - - dbg!(t.borrow()); - } - None + Some(x.iter_sections().first()?.first_child()?.text_contents()) }