diff --git a/owl/post.py b/owl/post.py index 9b5385188eb529e2c3551d3181b8a02cd5bea966..03b65a583a5bd90af9c81881b172a5345ee5f840 100644 --- a/owl/post.py +++ b/owl/post.py @@ -96,12 +96,17 @@ def is_utf8(f) -> bool: f.seek(0) x = io.TextIOWrapper(f, encoding='utf-8', errors='strict') try: - x.read(4096) + head = x.read(4096) + if head.startswith('%PDF-'): + # PDF files are expected to contain non-ASCII bytes at their beginning, + # but surprisingly enough, these bytes sometimes decode as UTF-8. + verdict = False + else: + verdict = True except UnicodeDecodeError: - x.detach() - return False + verdict = False x.detach() - return True + return verdict # Presentation of score