From 7e2e39ef6cc4ca15d49afe9bbc48745710b64d6e Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Sat, 10 Jan 2026 01:26:58 -0500 Subject: Correctly decompose other incorrect images --- src/main.py | 6 +++--- src/scrape_lexicanum.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/main.py b/src/main.py index c773f4f..b16794d 100644 --- a/src/main.py +++ b/src/main.py @@ -22,13 +22,13 @@ async def ping(ctx): @bot.command(name="Explain", aliases=["explain", "What's", "what's"]) async def explain(ctx, *args): args = list(args) - if args[0].lower() == "whfb": + if args[0].lower() == "!fantasy": args.pop(0) config["site"] = "https://whfb.lexicanum.com" - elif args[0].lower() == "wh40k": + elif args[0].lower() == "!40k": args.pop(0) config["site"] = "https://wh40k.lexicanum.com" - elif args[0].lower() == "aos": + elif args[0].lower() == "!aos": args.pop(0) config["site"] = "https://ageofsigmar.lexicanum.com" else: diff --git a/src/scrape_lexicanum.py b/src/scrape_lexicanum.py index 2252ba7..d327811 100644 --- a/src/scrape_lexicanum.py +++ b/src/scrape_lexicanum.py @@ -3,6 +3,7 @@ from markdownify import markdownify from requests.exceptions import HTTPError import requests +import textwrap class WikiError(Exception): pass @@ -60,15 +61,19 @@ def get_page_content(config, page_name): decompose(parser_output, "div.portal") decompose(parser_output, "table.nottemplate") decompose(parser_output, "table.Anpassen") + decompose(parser_output, "div.quotebox") - image = parser_output.find("a.image") + image = parser_output.find("a", {"class": "image"}) image_link = None + if image: + if image["href"] == "/wiki/File:Targetdrone.gif": + image = parser_output.find_all("a", {"class": "image"})[1] + if image: if image.contents[0]: fix_link(config, image.contents[0], "src") image_link = image.contents[0]["src"] - decompose(parser_output, "div.quotebox") decompose(parser_output, "br") decompose(parser_output, "table") @@ -81,7 +86,7 @@ def get_page_content(config, page_name): fix_link(config, a, "href") header_md = markdownify(str(soup.find(id="firstHeading").contents[0])) - content_md = markdownify(str(content)) + content_md = textwrap.shorten(markdownify(str(content)), width=1000, placeholder="...") return header_md, content_md, image_link -- cgit v1.2.3