From a41f159731b580852661bd8f222e730dc737e664 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Fri, 9 Jan 2026 13:52:51 -0500 Subject: Remove annoying initial tables --- src/main.py | 5 +++-- src/scrape_lexicanum.py | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/main.py b/src/main.py index d3950fc..b6a7e1c 100644 --- a/src/main.py +++ b/src/main.py @@ -21,10 +21,11 @@ async def ping(ctx): @bot.command(name="Explain", aliases=["explain", "What's", "what's"]) async def explain(ctx, *args): - if args[0].lower == "whfb": + args = list(args) + if args[0].lower() == "whfb": args.pop(0) config["site"] = "https://whfb.lexicanum.com" - if args[0].lower == "aos": + if args[0].lower() == "aos": args.pop(0) config["site"] = "https://ageofsigmar.lexicanum.com" diff --git a/src/scrape_lexicanum.py b/src/scrape_lexicanum.py index 6853455..20bc0da 100644 --- a/src/scrape_lexicanum.py +++ b/src/scrape_lexicanum.py @@ -51,6 +51,10 @@ def get_page_content(config, page_name): soup = BeautifulSoup(response.content, "html.parser") parser_output = soup.find(id="mw-content-text").contents[0] + + for unwanted in parser_output.select("table"): + unwanted.decompose() + content = parser_output.find("p") for a in content.findAll("a"): fix_link(config, a, "href") -- cgit v1.2.3