diff options
| author | Samuel Johnson <[email protected]> | 2026-01-09 00:29:45 -0500 |
|---|---|---|
| committer | Samuel Johnson <[email protected]> | 2026-01-09 00:29:45 -0500 |
| commit | f041cdfb2e86f742ab0f1b470d43de8659995cfe (patch) | |
| tree | 90ce33c38cf510d602bd996db375143e04cd1e5c | |
| parent | 3d2380cb6102cd9ddc153527d5dda05ed294d3fa (diff) | |
Add basic scraping and markdown
| -rw-r--r-- | src/main.py | 26 | ||||
| -rw-r--r-- | src/scrape_lexicanum.py | 58 |
2 files changed, 84 insertions, 0 deletions
diff --git a/src/main.py b/src/main.py index 33affa2..08cdd05 100644 --- a/src/main.py +++ b/src/main.py @@ -4,6 +4,8 @@ from discord.ext import commands import discord import os +import scrape_lexicanum + config = { **dotenv_values(".env"), **os.environ, @@ -17,4 +19,28 @@ bot = commands.Bot(command_prefix="Sigmar! ", intents=intents) async def ping(ctx): await ctx.send("pong") [email protected](name="Explain", aliases=["explain", "What's", "what's"]) +async def explain(ctx, query): + try: + search_content = scrape_lexicanum.get_search_response(config, query) + page_header, page_content = scrape_lexicanum.get_page_content(config, search_content[0]) + + embed = discord.Embed( + title=page_header, + description=search_content.pop(0), + color=discord.Colour.blurple(), + ) + + string_results = " ".join(str(x) for x in search_content) + + embed.add_field(name="Overview", value=page_content) + embed.add_field(name="You May Have Meant", value=string_results) + + await ctx.send(embed=embed) + except scrape_lexicanum.WikiError as e: + await ctx.send(f"{e}") + except Exception as e: + print(f"Could not complete explanation: {e}") + await ctx.send("Something has gone most terribly wrong...") + bot.run(config['token']) diff --git a/src/scrape_lexicanum.py b/src/scrape_lexicanum.py new file mode 100644 index 0000000..a719fb1 --- /dev/null +++ b/src/scrape_lexicanum.py @@ -0,0 +1,58 @@ +from bs4 import BeautifulSoup +from markdownify import markdownify +from requests.exceptions import HTTPError + +import requests + +class WikiError(Exception): + pass + +def get_search_response(config, query): + try: + response = requests.get(config['site'] + '/mediawiki/api.php?action=opensearch&format=json&formatversion=2&search=' + query + '&namespace=0&limit=6&suggest=true', timeout=7) + except requests.exceptions.Timeout: + raise WikiError("Request timed out.") + except requests.exceptions.RequestException as e: + print(f"Failed to complete request: {e}") + raise WikiError("Error with wiki request.") + + try: + response.raise_for_status() + except HTTPError as e: + print(f"HTTP error occured: {e}") + raise WikiError("Bad HTTP response.") + + data = response.json() + + if not data[3]: + raise WikiError("No such wiki entry found.") + + return data[3] + +def get_page_content(config, page_name): + try: + response = requests.get(page_name, timeout=7) + except requests.exceptions.Timeout: + raise WikiError("Request timed out.") + except requests.exceptions.RequestException as e: + print(f"Failed to complete request: {e}") + raise WikiError("Error with wiki request.") + + try: + response.raise_for_status() + except HTTPError as e: + print(f"HTTP error occured: {e}") + raise WikiError("Bad HTTP response.") + + soup = BeautifulSoup(response.content, "html.parser") + + parser_output = soup.find(id="mw-content-text").contents[0] + content = parser_output.find("p") + for a in content.findAll("a"): + a["href"] = a["href"].replace("/wiki", config["site"] + "/wiki") + + header_md = markdownify(str(soup.find(id="firstHeading").contents[0])) + content_md = markdownify(str(content)) + + return header_md, content_md + |
