from bs4 import BeautifulSoup from markdownify import markdownify from requests.exceptions import HTTPError import requests class WikiError(Exception): pass def fix_link(config, node, field): node[field] = node[field].replace("/wiki", config["site"] + "/wiki") node[field] = node[field].replace("/mediawiki", config["site"] + "/mediawiki") def get_search_response(config, query): try: response = requests.get(config['site'] + '/mediawiki/api.php?action=opensearch&format=json&formatversion=2&search=' + query + '&namespace=0&limit=6&suggest=true', timeout=7) except requests.exceptions.Timeout: raise WikiError("Request timed out.") except requests.exceptions.RequestException as e: print(f"Failed to complete request: {e}") raise WikiError("Error with wiki request.") try: response.raise_for_status() except HTTPError as e: print(f"HTTP error occured: {e}") raise WikiError("Bad HTTP response.") data = response.json() if not data[3]: raise WikiError("No such wiki entry found.") return data[3] def get_page_content(config, page_name): try: response = requests.get(page_name, timeout=7) except requests.exceptions.Timeout: raise WikiError("Request timed out.") except requests.exceptions.RequestException as e: print(f"Failed to complete request: {e}") raise WikiError("Error with wiki request.") try: response.raise_for_status() except HTTPError as e: print(f"HTTP error occured: {e}") raise WikiError("Bad HTTP response.") soup = BeautifulSoup(response.content, "html.parser") parser_output = soup.find(id="mw-content-text").contents[0] for unwanted in parser_output.select("table"): unwanted.decompose() content = parser_output.find("p") for a in content.findAll("a"): fix_link(config, a, "href") image = parser_output.find("a", {"class": "image"}) image_link = None if image: if image.contents[0]: fix_link(config, image.contents[0], "src") image_link = image.contents[0]["src"] header_md = markdownify(str(soup.find(id="firstHeading").contents[0])) content_md = markdownify(str(content)) return header_md, content_md, image_link