summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Johnson <[email protected]>2026-01-09 00:29:45 -0500
committerSamuel Johnson <[email protected]>2026-01-09 00:29:45 -0500
commitf041cdfb2e86f742ab0f1b470d43de8659995cfe (patch)
tree90ce33c38cf510d602bd996db375143e04cd1e5c
parent3d2380cb6102cd9ddc153527d5dda05ed294d3fa (diff)
Add basic scraping and markdown
-rw-r--r--src/main.py26
-rw-r--r--src/scrape_lexicanum.py58
2 files changed, 84 insertions, 0 deletions
diff --git a/src/main.py b/src/main.py
index 33affa2..08cdd05 100644
--- a/src/main.py
+++ b/src/main.py
@@ -4,6 +4,8 @@ from discord.ext import commands
import discord
import os
+import scrape_lexicanum
+
config = {
**dotenv_values(".env"),
**os.environ,
@@ -17,4 +19,28 @@ bot = commands.Bot(command_prefix="Sigmar! ", intents=intents)
async def ping(ctx):
await ctx.send("pong")
[email protected](name="Explain", aliases=["explain", "What's", "what's"])
+async def explain(ctx, query):
+ try:
+ search_content = scrape_lexicanum.get_search_response(config, query)
+ page_header, page_content = scrape_lexicanum.get_page_content(config, search_content[0])
+
+ embed = discord.Embed(
+ title=page_header,
+ description=search_content.pop(0),
+ color=discord.Colour.blurple(),
+ )
+
+ string_results = " ".join(str(x) for x in search_content)
+
+ embed.add_field(name="Overview", value=page_content)
+ embed.add_field(name="You May Have Meant", value=string_results)
+
+ await ctx.send(embed=embed)
+ except scrape_lexicanum.WikiError as e:
+ await ctx.send(f"{e}")
+ except Exception as e:
+ print(f"Could not complete explanation: {e}")
+ await ctx.send("Something has gone most terribly wrong...")
+
bot.run(config['token'])
diff --git a/src/scrape_lexicanum.py b/src/scrape_lexicanum.py
new file mode 100644
index 0000000..a719fb1
--- /dev/null
+++ b/src/scrape_lexicanum.py
@@ -0,0 +1,58 @@
+from bs4 import BeautifulSoup
+from markdownify import markdownify
+from requests.exceptions import HTTPError
+
+import requests
+
+class WikiError(Exception):
+ pass
+
+def get_search_response(config, query):
+ try:
+ response = requests.get(config['site'] + '/mediawiki/api.php?action=opensearch&format=json&formatversion=2&search=' + query + '&namespace=0&limit=6&suggest=true', timeout=7)
+ except requests.exceptions.Timeout:
+ raise WikiError("Request timed out.")
+ except requests.exceptions.RequestException as e:
+ print(f"Failed to complete request: {e}")
+ raise WikiError("Error with wiki request.")
+
+ try:
+ response.raise_for_status()
+ except HTTPError as e:
+ print(f"HTTP error occured: {e}")
+ raise WikiError("Bad HTTP response.")
+
+ data = response.json()
+
+ if not data[3]:
+ raise WikiError("No such wiki entry found.")
+
+ return data[3]
+
+def get_page_content(config, page_name):
+ try:
+ response = requests.get(page_name, timeout=7)
+ except requests.exceptions.Timeout:
+ raise WikiError("Request timed out.")
+ except requests.exceptions.RequestException as e:
+ print(f"Failed to complete request: {e}")
+ raise WikiError("Error with wiki request.")
+
+ try:
+ response.raise_for_status()
+ except HTTPError as e:
+ print(f"HTTP error occured: {e}")
+ raise WikiError("Bad HTTP response.")
+
+ soup = BeautifulSoup(response.content, "html.parser")
+
+ parser_output = soup.find(id="mw-content-text").contents[0]
+ content = parser_output.find("p")
+ for a in content.findAll("a"):
+ a["href"] = a["href"].replace("/wiki", config["site"] + "/wiki")
+
+ header_md = markdownify(str(soup.find(id="firstHeading").contents[0]))
+ content_md = markdownify(str(content))
+
+ return header_md, content_md
+