Worked on parsing

This commit is contained in:
Lukas Wölfer
2025-05-06 01:46:50 +02:00
parent 92629d4bf9
commit 6fc14bb595
6 changed files with 134 additions and 64 deletions

67
main.ts
View File

@@ -1,5 +1,6 @@
import { Mwn } from 'npm:mwn'
import process from "node:process";
import { parseSummary } from "./parse.ts";
async function getWorkshopVideos(bot: Mwn): Promise<string[]> {
const response = await bot.request({
@@ -14,10 +15,10 @@ async function getWorkshopVideos(bot: Mwn): Promise<string[]> {
}
// Extract and print the file titles
return response.query.allimages.map(image => image.title);
return response.query.allimages.map(function (image: { title: string }) { return image.title });
}
interface VideoDescription {
export interface VideoDescription {
teachers: string[]
location: string
event: string
@@ -27,62 +28,6 @@ interface VideoDescription {
notes: string
}
function parseTemplate(description: string): Record<string, string> {
const templateRegex = /{{(.*DanceWorkshopDescription.*)}}/s;
const match = description.match(templateRegex);
if (!match) {
throw new Error("Could not find template")
}
const propertyRegex = /\|\s*(?<key>[^|]*?)\s*=(?<value>[^|]*?)\s*$/gm
const properties = match[1].matchAll(propertyRegex).map(v => ({ [v.groups['key']]: v.groups['value'] }))
return properties
}
function parseSummary(description: string, name: string): VideoDescription {
const properties = parseTemplate(description)
const b: Partial<VideoDescription> = {};
if (Object.hasOwn(properties, 'teachers')) {
console.warn(`Page ${name} has old template usage [contains 'teachers' key]`)
const t = properties['teachers'].split('&').map(item => item.trim());
b.teachers = t
} else {
b.teachers = [properties['leader'], properties['follower']].filter(v => v !== undefined)
}
b.location = properties['location']
b.date = properties['date']
b.level = properties['level']
b.event = properties['event'] || b.location
if (!Object.hasOwn(properties, 'patterns')) {
console.warn(`Page ${name} has old template usage [no 'patterns' key]`)
const patternsRegex = /===\s*Shown Patterns\s*===\s*(?<patterns>(^\s*\*+.*$\s+)+)/gm
const match = description.match(patternsRegex)
if (!match) {
throw new Error("Could not find \"Shown Patterns\"")
}
b.patterns = match.groups['patterns']
}
if (!Object.hasOwn(properties, 'notes')) {
console.warn(`Page ${name} has old template usage [no 'patterns' key]`)
const notesRegex = /===\s*Notes\s*===\s*(?<notes>(^\s*:+.*$\s+)+)/gm
const match = description.match(notesRegex)
if (!match) {
throw new Error("Could not find \"Notes\"")
}
b.notes = match.groups['notes']
}
return b
}
async function main() {
const bot = new Mwn({
@@ -99,7 +44,13 @@ async function main() {
await bot.login();
const relevantFiles = await getWorkshopVideos(bot)
const file_content = await bot.read(relevantFiles[0])
if (file_content.revisions === undefined) {
throw new Error("Page has no revisions")
}
const content = file_content.revisions[0].content
if (content === undefined) {
throw new Error("Latest revision has no content")
}
console.log(content)
parseSummary(content, relevantFiles[0])
} catch (error) {