From 7e2e39ef6cc4ca15d49afe9bbc48745710b64d6e Mon Sep 17 00:00:00 2001
From: Samuel Johnson <sjohnson1149@proton.me>
Date: Sat, 10 Jan 2026 01:26:58 -0500
Subject: Correctly decompose other incorrect images

---
 src/scrape_lexicanum.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/scrape_lexicanum.py')

diff --git a/src/scrape_lexicanum.py b/src/scrape_lexicanum.py
index 2252ba7..d327811 100644
--- a/src/scrape_lexicanum.py
+++ b/src/scrape_lexicanum.py
@@ -3,6 +3,7 @@ from markdownify import markdownify
 from requests.exceptions import HTTPError
 
 import requests
+import textwrap
 
 class WikiError(Exception):
     pass
@@ -60,15 +61,19 @@ def get_page_content(config, page_name):
     decompose(parser_output, "div.portal")
     decompose(parser_output, "table.nottemplate")
     decompose(parser_output, "table.Anpassen")
+    decompose(parser_output, "div.quotebox")
 
-    image = parser_output.find("a.image")
+    image = parser_output.find("a", {"class": "image"})
     image_link = None
+    if image:
+        if image["href"] == "/wiki/File:Targetdrone.gif":
+            image = parser_output.find_all("a", {"class": "image"})[1]
+
     if image:
         if image.contents[0]:
             fix_link(config, image.contents[0], "src")
             image_link = image.contents[0]["src"]
 
-    decompose(parser_output, "div.quotebox")
     decompose(parser_output, "br")
     decompose(parser_output, "table")
 
@@ -81,7 +86,7 @@ def get_page_content(config, page_name):
         fix_link(config, a, "href")
 
     header_md = markdownify(str(soup.find(id="firstHeading").contents[0]))
-    content_md = markdownify(str(content))
+    content_md = textwrap.shorten(markdownify(str(content)), width=1000, placeholder="...")
 
     return header_md, content_md, image_link
 
-- 
cgit v1.2.3