diff options
Diffstat (limited to 'src/scrape_lexicanum.py')
| -rw-r--r-- | src/scrape_lexicanum.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/src/scrape_lexicanum.py b/src/scrape_lexicanum.py index d55f58b..7920a0d 100644 --- a/src/scrape_lexicanum.py +++ b/src/scrape_lexicanum.py @@ -6,6 +6,10 @@ from urllib.parse import urlparse import requests import textwrap +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0" + } + class WikiError(Exception): pass @@ -19,7 +23,7 @@ def decompose(node, field): def get_search_response(config, api_root, query): try: - response = requests.get(config['site'] + api_root + '/api.php?action=opensearch&format=json&formatversion=2&search=' + query + '&namespace=0&limit=6&suggest=true', timeout=7) + response = requests.get(config['site'] + api_root + '/api.php?action=opensearch&format=json&formatversion=2&search=' + query + '&namespace=0&limit=6&suggest=true', headers=headers, timeout=7) except requests.exceptions.Timeout: raise WikiError("Request timed out.") except requests.exceptions.RequestException as e: @@ -41,7 +45,7 @@ def get_search_response(config, api_root, query): def get_page_content(config, page_name): try: - response = requests.get(page_name, timeout=7) + response = requests.get(page_name, headers=headers, timeout=7) except requests.exceptions.Timeout: raise WikiError("Request timed out.") except requests.exceptions.RequestException as e: |
