diff options
| author | Samuel Johnson <[email protected]> | 2026-04-06 22:17:58 -0400 |
|---|---|---|
| committer | Samuel Johnson <[email protected]> | 2026-04-06 22:17:58 -0400 |
| commit | cff155f7cf091f5bfa30812f8af51363d3b93bf4 (patch) | |
| tree | 3db3f0f427a36954736b36fb2faf9be3849b9b76 | |
| parent | 8d91f47d3903a2145816cb24dd9950fdfec0030e (diff) | |
| -rw-r--r-- | src/scrape_lexicanum.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/src/scrape_lexicanum.py b/src/scrape_lexicanum.py index d55f58b..7920a0d 100644 --- a/src/scrape_lexicanum.py +++ b/src/scrape_lexicanum.py @@ -6,6 +6,10 @@ from urllib.parse import urlparse import requests import textwrap +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0" + } + class WikiError(Exception): pass @@ -19,7 +23,7 @@ def decompose(node, field): def get_search_response(config, api_root, query): try: - response = requests.get(config['site'] + api_root + '/api.php?action=opensearch&format=json&formatversion=2&search=' + query + '&namespace=0&limit=6&suggest=true', timeout=7) + response = requests.get(config['site'] + api_root + '/api.php?action=opensearch&format=json&formatversion=2&search=' + query + '&namespace=0&limit=6&suggest=true', headers=headers, timeout=7) except requests.exceptions.Timeout: raise WikiError("Request timed out.") except requests.exceptions.RequestException as e: @@ -41,7 +45,7 @@ def get_search_response(config, api_root, query): def get_page_content(config, page_name): try: - response = requests.get(page_name, timeout=7) + response = requests.get(page_name, headers=headers, timeout=7) except requests.exceptions.Timeout: raise WikiError("Request timed out.") except requests.exceptions.RequestException as e: |
