diff options
| -rw-r--r-- | src/main.py | 6 | ||||
| -rw-r--r-- | src/scrape_lexicanum.py | 11 |
2 files changed, 11 insertions, 6 deletions
diff --git a/src/main.py b/src/main.py index c773f4f..b16794d 100644 --- a/src/main.py +++ b/src/main.py @@ -22,13 +22,13 @@ async def ping(ctx): @bot.command(name="Explain", aliases=["explain", "What's", "what's"]) async def explain(ctx, *args): args = list(args) - if args[0].lower() == "whfb": + if args[0].lower() == "!fantasy": args.pop(0) config["site"] = "https://whfb.lexicanum.com" - elif args[0].lower() == "wh40k": + elif args[0].lower() == "!40k": args.pop(0) config["site"] = "https://wh40k.lexicanum.com" - elif args[0].lower() == "aos": + elif args[0].lower() == "!aos": args.pop(0) config["site"] = "https://ageofsigmar.lexicanum.com" else: diff --git a/src/scrape_lexicanum.py b/src/scrape_lexicanum.py index 2252ba7..d327811 100644 --- a/src/scrape_lexicanum.py +++ b/src/scrape_lexicanum.py @@ -3,6 +3,7 @@ from markdownify import markdownify from requests.exceptions import HTTPError import requests +import textwrap class WikiError(Exception): pass @@ -60,15 +61,19 @@ def get_page_content(config, page_name): decompose(parser_output, "div.portal") decompose(parser_output, "table.nottemplate") decompose(parser_output, "table.Anpassen") + decompose(parser_output, "div.quotebox") - image = parser_output.find("a.image") + image = parser_output.find("a", {"class": "image"}) image_link = None if image: + if image["href"] == "/wiki/File:Targetdrone.gif": + image = parser_output.find_all("a", {"class": "image"})[1] + + if image: if image.contents[0]: fix_link(config, image.contents[0], "src") image_link = image.contents[0]["src"] - decompose(parser_output, "div.quotebox") decompose(parser_output, "br") decompose(parser_output, "table") @@ -81,7 +86,7 @@ def get_page_content(config, page_name): fix_link(config, a, "href") header_md = markdownify(str(soup.find(id="firstHeading").contents[0])) - content_md = markdownify(str(content)) + content_md = textwrap.shorten(markdownify(str(content)), width=1000, placeholder="...") return header_md, content_md, image_link |
