From 16d5e569fcec9a395110f897a7bf61e2ce5b3765 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Pracha=C5=99?= <jan.prachar@gmail.com>
Date: Thu, 18 Mar 2021 12:13:59 +0100
Subject: [PATCH] Our version of img2pdf
---
img2pdf/__init__.py | 0
img2pdf/img2pdf.py | 3681 +++++++++++++++++++++++++++++++++++++++++++
img2pdf/jp2.py | 124 ++
mo/submit.py | 1 +
4 files changed, 3806 insertions(+)
create mode 100644 img2pdf/__init__.py
create mode 100644 img2pdf/img2pdf.py
create mode 100644 img2pdf/jp2.py
diff --git a/img2pdf/__init__.py b/img2pdf/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/img2pdf/img2pdf.py b/img2pdf/img2pdf.py
new file mode 100644
index 00000000..0599ee1d
--- /dev/null
+++ b/img2pdf/img2pdf.py
@@ -0,0 +1,3681 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation, either
+# version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program. If not, see
+# <http://www.gnu.org/licenses/>.
+
+import sys
+import os
+import zlib
+import argparse
+from PIL import Image, TiffImagePlugin
+
+# TiffImagePlugin.DEBUG = True
+from PIL.ExifTags import TAGS
+from datetime import datetime
+from img2pdf import jp2
+from enum import Enum
+from io import BytesIO
+import logging
+import struct
+import platform
+import hashlib
+from itertools import chain
+
+have_pdfrw = True
+try:
+ import pdfrw
+except ImportError:
+ have_pdfrw = False
+
+have_pikepdf = True
+try:
+ import pikepdf
+except ImportError:
+ have_pikepdf = False
+
+__version__ = "0.4.0"
+default_dpi = 96.0
+papersizes = {
+ "letter": "8.5inx11in",
+ "a0": "841mmx1189mm",
+ "a1": "594mmx841mm",
+ "a2": "420mmx594mm",
+ "a3": "297mmx420mm",
+ "a4": "210mmx297mm",
+ "a5": "148mmx210mm",
+ "a6": "105mmx148mm",
+ "legal": "8.5inx14in",
+ "tabloid": "11inx17in",
+}
+papernames = {
+ "letter": "Letter",
+ "a0": "A0",
+ "a1": "A1",
+ "a2": "A2",
+ "a3": "A3",
+ "a4": "A4",
+ "a5": "A5",
+ "a6": "A6",
+ "legal": "Legal",
+ "tabloid": "Tabloid",
+}
+
+Engine = Enum("Engine", "internal pdfrw pikepdf")
+
+FitMode = Enum("FitMode", "into fill exact shrink enlarge")
+
+PageOrientation = Enum("PageOrientation", "portrait landscape")
+
+Colorspace = Enum("Colorspace", "RGB L 1 CMYK CMYK;I RGBA P other")
+
+ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG TIFF other")
+
+PageMode = Enum("PageMode", "none outlines thumbs")
+
+PageLayout = Enum("PageLayout", "single onecolumn twocolumnright twocolumnleft")
+
+Magnification = Enum("Magnification", "fit fith fitbh")
+
+ImgSize = Enum("ImgSize", "abs perc dpi")
+
+Unit = Enum("Unit", "pt cm mm inch")
+
+ImgUnit = Enum("ImgUnit", "pt cm mm inch perc dpi")
+
+TIFFBitRevTable = [
+ 0x00,
+ 0x80,
+ 0x40,
+ 0xC0,
+ 0x20,
+ 0xA0,
+ 0x60,
+ 0xE0,
+ 0x10,
+ 0x90,
+ 0x50,
+ 0xD0,
+ 0x30,
+ 0xB0,
+ 0x70,
+ 0xF0,
+ 0x08,
+ 0x88,
+ 0x48,
+ 0xC8,
+ 0x28,
+ 0xA8,
+ 0x68,
+ 0xE8,
+ 0x18,
+ 0x98,
+ 0x58,
+ 0xD8,
+ 0x38,
+ 0xB8,
+ 0x78,
+ 0xF8,
+ 0x04,
+ 0x84,
+ 0x44,
+ 0xC4,
+ 0x24,
+ 0xA4,
+ 0x64,
+ 0xE4,
+ 0x14,
+ 0x94,
+ 0x54,
+ 0xD4,
+ 0x34,
+ 0xB4,
+ 0x74,
+ 0xF4,
+ 0x0C,
+ 0x8C,
+ 0x4C,
+ 0xCC,
+ 0x2C,
+ 0xAC,
+ 0x6C,
+ 0xEC,
+ 0x1C,
+ 0x9C,
+ 0x5C,
+ 0xDC,
+ 0x3C,
+ 0xBC,
+ 0x7C,
+ 0xFC,
+ 0x02,
+ 0x82,
+ 0x42,
+ 0xC2,
+ 0x22,
+ 0xA2,
+ 0x62,
+ 0xE2,
+ 0x12,
+ 0x92,
+ 0x52,
+ 0xD2,
+ 0x32,
+ 0xB2,
+ 0x72,
+ 0xF2,
+ 0x0A,
+ 0x8A,
+ 0x4A,
+ 0xCA,
+ 0x2A,
+ 0xAA,
+ 0x6A,
+ 0xEA,
+ 0x1A,
+ 0x9A,
+ 0x5A,
+ 0xDA,
+ 0x3A,
+ 0xBA,
+ 0x7A,
+ 0xFA,
+ 0x06,
+ 0x86,
+ 0x46,
+ 0xC6,
+ 0x26,
+ 0xA6,
+ 0x66,
+ 0xE6,
+ 0x16,
+ 0x96,
+ 0x56,
+ 0xD6,
+ 0x36,
+ 0xB6,
+ 0x76,
+ 0xF6,
+ 0x0E,
+ 0x8E,
+ 0x4E,
+ 0xCE,
+ 0x2E,
+ 0xAE,
+ 0x6E,
+ 0xEE,
+ 0x1E,
+ 0x9E,
+ 0x5E,
+ 0xDE,
+ 0x3E,
+ 0xBE,
+ 0x7E,
+ 0xFE,
+ 0x01,
+ 0x81,
+ 0x41,
+ 0xC1,
+ 0x21,
+ 0xA1,
+ 0x61,
+ 0xE1,
+ 0x11,
+ 0x91,
+ 0x51,
+ 0xD1,
+ 0x31,
+ 0xB1,
+ 0x71,
+ 0xF1,
+ 0x09,
+ 0x89,
+ 0x49,
+ 0xC9,
+ 0x29,
+ 0xA9,
+ 0x69,
+ 0xE9,
+ 0x19,
+ 0x99,
+ 0x59,
+ 0xD9,
+ 0x39,
+ 0xB9,
+ 0x79,
+ 0xF9,
+ 0x05,
+ 0x85,
+ 0x45,
+ 0xC5,
+ 0x25,
+ 0xA5,
+ 0x65,
+ 0xE5,
+ 0x15,
+ 0x95,
+ 0x55,
+ 0xD5,
+ 0x35,
+ 0xB5,
+ 0x75,
+ 0xF5,
+ 0x0D,
+ 0x8D,
+ 0x4D,
+ 0xCD,
+ 0x2D,
+ 0xAD,
+ 0x6D,
+ 0xED,
+ 0x1D,
+ 0x9D,
+ 0x5D,
+ 0xDD,
+ 0x3D,
+ 0xBD,
+ 0x7D,
+ 0xFD,
+ 0x03,
+ 0x83,
+ 0x43,
+ 0xC3,
+ 0x23,
+ 0xA3,
+ 0x63,
+ 0xE3,
+ 0x13,
+ 0x93,
+ 0x53,
+ 0xD3,
+ 0x33,
+ 0xB3,
+ 0x73,
+ 0xF3,
+ 0x0B,
+ 0x8B,
+ 0x4B,
+ 0xCB,
+ 0x2B,
+ 0xAB,
+ 0x6B,
+ 0xEB,
+ 0x1B,
+ 0x9B,
+ 0x5B,
+ 0xDB,
+ 0x3B,
+ 0xBB,
+ 0x7B,
+ 0xFB,
+ 0x07,
+ 0x87,
+ 0x47,
+ 0xC7,
+ 0x27,
+ 0xA7,
+ 0x67,
+ 0xE7,
+ 0x17,
+ 0x97,
+ 0x57,
+ 0xD7,
+ 0x37,
+ 0xB7,
+ 0x77,
+ 0xF7,
+ 0x0F,
+ 0x8F,
+ 0x4F,
+ 0xCF,
+ 0x2F,
+ 0xAF,
+ 0x6F,
+ 0xEF,
+ 0x1F,
+ 0x9F,
+ 0x5F,
+ 0xDF,
+ 0x3F,
+ 0xBF,
+ 0x7F,
+ 0xFF,
+]
+
+
+class NegativeDimensionError(Exception):
+ pass
+
+
+class UnsupportedColorspaceError(Exception):
+ pass
+
+
+class ImageOpenError(Exception):
+ pass
+
+
+class JpegColorspaceError(Exception):
+ pass
+
+
+class PdfTooLargeError(Exception):
+ pass
+
+
+class AlphaChannelError(Exception):
+ pass
+
+
+class ExifOrientationError(Exception):
+ pass
+
+
+# without pdfrw this function is a no-op
+def my_convert_load(string):
+ return string
+
+
+def parse(cont, indent=1):
+ if type(cont) is dict:
+ return (
+ b"<<\n"
+ + b"\n".join(
+ [
+ 4 * indent * b" " + k + b" " + parse(v, indent + 1)
+ for k, v in sorted(cont.items())
+ ]
+ )
+ + b"\n"
+ + 4 * (indent - 1) * b" "
+ + b">>"
+ )
+ elif type(cont) is int:
+ return str(cont).encode()
+ elif type(cont) is float:
+ if int(cont) == cont:
+ return parse(int(cont))
+ else:
+ return ("%0.4f" % cont).rstrip("0").encode()
+ elif isinstance(cont, MyPdfDict):
+ # if cont got an identifier, then addobj() has been called with it
+ # and a link to it will be added, otherwise add it inline
+ if hasattr(cont, "identifier"):
+ return ("%d 0 R" % cont.identifier).encode()
+ else:
+ return parse(cont.content, indent)
+ elif type(cont) is str or isinstance(cont, bytes):
+ if type(cont) is str and type(cont) is not bytes:
+ raise TypeError(
+ "parse must be passed a bytes object in py3. Got: %s" % cont
+ )
+ return cont
+ elif isinstance(cont, list):
+ return b"[ " + b" ".join([parse(c, indent) for c in cont]) + b" ]"
+ else:
+ raise TypeError("cannot handle type %s with content %s" % (type(cont), cont))
+
+
+class MyPdfDict(object):
+ def __init__(self, *args, **kw):
+ self.content = dict()
+ if args:
+ if len(args) == 1:
+ args = args[0]
+ self.content.update(args)
+ self.stream = None
+ for key, value in kw.items():
+ if key == "stream":
+ self.stream = value
+ self.content[MyPdfName.Length] = len(value)
+ elif key == "indirect":
+ pass
+ else:
+ self.content[getattr(MyPdfName, key)] = value
+
+ def tostring(self):
+ if self.stream is not None:
+ return (
+ ("%d 0 obj\n" % self.identifier).encode()
+ + parse(self.content)
+ + b"\nstream\n"
+ + self.stream
+ + b"\nendstream\nendobj\n"
+ )
+ else:
+ return (
+ ("%d 0 obj\n" % self.identifier).encode()
+ + parse(self.content)
+ + b"\nendobj\n"
+ )
+
+ def __setitem__(self, key, value):
+ self.content[key] = value
+
+ def __getitem__(self, key):
+ return self.content[key]
+
+ def __contains__(self, key):
+ return key in self.content
+
+
+class MyPdfName:
+ def __getattr__(self, name):
+ return b"/" + name.encode("ascii")
+
+
+MyPdfName = MyPdfName()
+
+
+class MyPdfObject(bytes):
+ def __new__(cls, string):
+ return bytes.__new__(cls, string.encode("ascii"))
+
+
+class MyPdfArray(list):
+ pass
+
+
+class MyPdfWriter:
+ def __init__(self):
+ self.objects = []
+ # create an incomplete pages object so that a /Parent entry can be
+ # added to each page
+ self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0)
+ self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog)
+ self.pagearray = []
+
+ def addobj(self, obj):
+ newid = len(self.objects) + 1
+ obj.identifier = newid
+ self.objects.append(obj)
+
+ def tostream(self, info, stream, version="1.3", ident=None):
+ xreftable = list()
+
+ # justification of the random binary garbage in the header from
+ # adobe:
+ #
+ # > Note: If a PDF file contains binary data, as most do (see Section
+ # > 3.1, “Lexical Conventions”), it is recommended that the header
+ # > line be immediately followed by a comment line containing at
+ # > least four binary characters—that is, characters whose codes are
+ # > 128 or greater. This ensures proper behavior of file transfer
+ # > applications that inspect data near the beginning of a file to
+ # > determine whether to treat the file’s contents as text or as
+ # > binary.
+ #
+ # the choice of binary characters is arbitrary but those four seem to
+ # be used elsewhere.
+ pdfheader = ("%%PDF-%s\n" % version).encode("ascii")
+ pdfheader += b"%\xe2\xe3\xcf\xd3\n"
+ stream.write(pdfheader)
+
+ # From section 3.4.3 of the PDF Reference (version 1.7):
+ #
+ # > Each entry is exactly 20 bytes long, including the end-of-line
+ # > marker.
+ # >
+ # > [...]
+ # >
+ # > The format of an in-use entry is
+ # > nnnnnnnnnn ggggg n eol
+ # > where
+ # > nnnnnnnnnn is a 10-digit byte offset
+ # > ggggg is a 5-digit generation number
+ # > n is a literal keyword identifying this as an in-use entry
+ # > eol is a 2-character end-of-line sequence
+ # >
+ # > [...]
+ # >
+ # > If the file’s end-of-line marker is a single character (either a
+ # > carriage return or a line feed), it is preceded by a single space;
+ #
+ # Since we chose to use a single character eol marker, we precede it by
+ # a space
+ pos = len(pdfheader)
+ xreftable.append(b"0000000000 65535 f \n")
+ for o in self.objects:
+ xreftable.append(("%010d 00000 n \n" % pos).encode())
+ content = o.tostring()
+ stream.write(content)
+ pos += len(content)
+
+ xrefoffset = pos
+ stream.write(b"xref\n")
+ stream.write(("0 %d\n" % len(xreftable)).encode())
+ for x in xreftable:
+ stream.write(x)
+ stream.write(b"trailer\n")
+ trailer = {b"/Size": len(xreftable), b"/Info": info, b"/Root": self.catalog}
+ if ident is not None:
+ md5 = hashlib.md5(ident).hexdigest().encode("ascii")
+ trailer[b"/ID"] = b"[<%s><%s>]" % (md5, md5)
+ stream.write(parse(trailer) + b"\n")
+ stream.write(b"startxref\n")
+ stream.write(("%d\n" % xrefoffset).encode())
+ stream.write(b"%%EOF\n")
+ return
+
+ def addpage(self, page):
+ page[b"/Parent"] = self.pages
+ self.pagearray.append(page)
+ self.pages.content[b"/Kids"].append(page)
+ self.pages.content[b"/Count"] += 1
+ self.addobj(page)
+
+
+class MyPdfString:
+ @classmethod
+ def encode(cls, string, hextype=False):
+ if hextype:
+ return (
+ b"< " + b" ".join(("%06x" % c).encode("ascii") for c in string) + b" >"
+ )
+ else:
+ try:
+ string = string.encode("ascii")
+ except UnicodeEncodeError:
+ string = b"\xfe\xff" + string.encode("utf-16-be")
+ # We should probably encode more here because at least
+ # ghostscript interpretes a carriage return byte (0x0D) as a
+ # new line byte (0x0A)
+ # PDF supports: \n, \r, \t, \b and \f
+ string = string.replace(b"\\", b"\\\\")
+ string = string.replace(b"(", b"\\(")
+ string = string.replace(b")", b"\\)")
+ return b"(" + string + b")"
+
+
+class pdfdoc(object):
+ def __init__(
+ self,
+ engine=Engine.internal,
+ version="1.3",
+ title=None,
+ author=None,
+ creator=None,
+ producer=None,
+ creationdate=None,
+ moddate=None,
+ subject=None,
+ keywords=None,
+ nodate=False,
+ panes=None,
+ initial_page=None,
+ magnification=None,
+ page_layout=None,
+ fit_window=False,
+ center_window=False,
+ fullscreen=False,
+ pdfa=None,
+ ):
+ if engine is None:
+ if have_pikepdf:
+ engine = Engine.pikepdf
+ elif have_pdfrw:
+ engine = Engine.pdfrw
+ else:
+ engine = Engine.internal
+
+ if engine == Engine.pikepdf:
+ PdfWriter = pikepdf.new
+ PdfDict = pikepdf.Dictionary
+ PdfName = pikepdf.Name
+ elif engine == Engine.pdfrw:
+ from pdfrw import PdfWriter, PdfDict, PdfName, PdfString
+ elif engine == Engine.internal:
+ PdfWriter = MyPdfWriter
+ PdfDict = MyPdfDict
+ PdfName = MyPdfName
+ PdfString = MyPdfString
+ else:
+ raise ValueError("unknown engine: %s" % engine)
+
+ self.writer = PdfWriter()
+ if engine != Engine.pikepdf:
+ self.writer.docinfo = PdfDict(indirect=True)
+
+ def datetime_to_pdfdate(dt):
+ return dt.strftime("%Y%m%d%H%M%SZ")
+
+ for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
+ v = locals()[k.lower()]
+ if v is None or v == "":
+ continue
+ if engine != Engine.pikepdf:
+ v = PdfString.encode(v)
+ self.writer.docinfo[getattr(PdfName, k)] = v
+
+ now = datetime.now()
+ for k in ["CreationDate", "ModDate"]:
+ v = locals()[k.lower()]
+ if v is None and nodate:
+ continue
+ if v is None:
+ v = now
+ v = ("D:" + datetime_to_pdfdate(v)).encode("ascii")
+ if engine == Engine.internal:
+ v = b"(" + v + b")"
+ self.writer.docinfo[getattr(PdfName, k)] = v
+ if keywords is not None:
+ if engine == Engine.pikepdf:
+ self.writer.docinfo[PdfName.Keywords] = ",".join(keywords)
+ else:
+ self.writer.docinfo[PdfName.Keywords] = PdfString.encode(
+ ",".join(keywords)
+ )
+
+ def datetime_to_xmpdate(dt):
+ return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+ self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
+<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
+<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:iX='http://ns.adobe.com/iX/1.0/'>
+ <rdf:Description rdf:about='' xmlns:pdf='http://ns.adobe.com/pdf/1.3/'%s/>
+ <rdf:Description rdf:about='' xmlns:xmp='http://ns.adobe.com/xap/1.0/'>
+ %s
+ %s
+ </rdf:Description>
+ <rdf:Description rdf:about='' xmlns:pdfaid='http://www.aiim.org/pdfa/ns/id/' pdfaid:part='1' pdfaid:conformance='B'/>
+</rdf:RDF>
+</x:xmpmeta>
+
+<?xpacket end='w'?>
+""" % (
+ b" pdf:Producer='%s'" % producer.encode("ascii")
+ if producer is not None
+ else b"",
+ b""
+ if creationdate is None and nodate
+ else b"<xmp:ModifyDate>%s</xmp:ModifyDate>"
+ % datetime_to_xmpdate(now if creationdate is None else creationdate).encode(
+ "ascii"
+ ),
+ b""
+ if moddate is None and nodate
+ else b"<xmp:CreateDate>%s</xmp:CreateDate>"
+ % datetime_to_xmpdate(now if moddate is None else moddate).encode("ascii"),
+ )
+
+ if engine != Engine.pikepdf:
+ # this is done because pdfrw adds info, catalog and pages as the first
+ # three objects in this order
+ if engine == Engine.internal:
+ self.writer.addobj(self.writer.docinfo)
+ self.writer.addobj(self.writer.catalog)
+ self.writer.addobj(self.writer.pages)
+
+ self.panes = panes
+ self.initial_page = initial_page
+ self.magnification = magnification
+ self.page_layout = page_layout
+ self.fit_window = fit_window
+ self.center_window = center_window
+ self.fullscreen = fullscreen
+ self.engine = engine
+ self.output_version = version
+ self.pdfa = pdfa
+
+ def add_imagepage(
+ self,
+ color,
+ imgwidthpx,
+ imgheightpx,
+ imgformat,
+ imgdata,
+ imgwidthpdf,
+ imgheightpdf,
+ imgxpdf,
+ imgypdf,
+ pagewidth,
+ pageheight,
+ userunit=None,
+ palette=None,
+ inverted=False,
+ depth=0,
+ rotate=0,
+ cropborder=None,
+ bleedborder=None,
+ trimborder=None,
+ artborder=None,
+ iccp=None,
+ ):
+ if self.engine == Engine.pikepdf:
+ PdfArray = pikepdf.Array
+ PdfDict = pikepdf.Dictionary
+ PdfName = pikepdf.Name
+ elif self.engine == Engine.pdfrw:
+ from pdfrw import PdfDict, PdfName, PdfObject, PdfString
+ from pdfrw.py23_diffs import convert_load
+ elif self.engine == Engine.internal:
+ PdfDict = MyPdfDict
+ PdfName = MyPdfName
+ PdfObject = MyPdfObject
+ PdfString = MyPdfString
+ convert_load = my_convert_load
+ else:
+ raise ValueError("unknown engine: %s" % self.engine)
+ TrueObject = True if self.engine == Engine.pikepdf else PdfObject("true")
+ FalseObject = False if self.engine == Engine.pikepdf else PdfObject("false")
+
+ if color == Colorspace["1"] or color == Colorspace.L:
+ colorspace = PdfName.DeviceGray
+ elif color == Colorspace.RGB:
+ colorspace = PdfName.DeviceRGB
+ elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
+ colorspace = PdfName.DeviceCMYK
+ elif color == Colorspace.P:
+ if self.engine == Engine.pdfrw:
+ # https://github.com/pmaupin/pdfrw/issues/128
+ # https://github.com/pmaupin/pdfrw/issues/147
+ raise Exception(
+ "pdfrw does not support hex strings for "
+ "palette image input, re-run with "
+ "--engine=internal or --engine=pikepdf"
+ )
+ assert len(palette) % 3 == 0
+ colorspace = [
+ PdfName.Indexed,
+ PdfName.DeviceRGB,
+ (len(palette) // 3) - 1,
+ bytes(palette)
+ if self.engine == Engine.pikepdf
+ else PdfString.encode(
+ [
+ int.from_bytes(palette[i : i + 3], "big")
+ for i in range(0, len(palette), 3)
+ ],
+ hextype=True,
+ ),
+ ]
+ else:
+ raise UnsupportedColorspaceError("unsupported color space: %s" % color.name)
+
+ if iccp is not None:
+ if self.engine == Engine.pikepdf:
+ iccpdict = self.writer.make_stream(iccp)
+ else:
+ iccpdict = PdfDict(stream=convert_load(iccp))
+ iccpdict[PdfName.Alternate] = colorspace
+ if color == Colorspace["1"] or color == Colorspace.L:
+ iccpdict[PdfName.N] = 1
+ elif color == Colorspace.RGB:
+ iccpdict[PdfName.N] = 3
+ elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
+ iccpdict[PdfName.N] = 4
+ elif color == Colorspace.P:
+ raise Exception("Cannot have Palette images with ICC profile")
+ colorspace = [PdfName.ICCBased, iccpdict]
+
+ # either embed the whole jpeg or deflate the bitmap representation
+ if imgformat is ImageFormat.JPEG:
+ ofilter = PdfName.DCTDecode
+ elif imgformat is ImageFormat.JPEG2000:
+ ofilter = PdfName.JPXDecode
+ self.output_version = "1.5" # jpeg2000 needs pdf 1.5
+ elif imgformat is ImageFormat.CCITTGroup4:
+ ofilter = [PdfName.CCITTFaxDecode]
+ else:
+ ofilter = PdfName.FlateDecode
+
+ if self.engine == Engine.pikepdf:
+ image = self.writer.make_stream(imgdata)
+ else:
+ image = PdfDict(stream=convert_load(imgdata))
+
+ image[PdfName.Type] = PdfName.XObject
+ image[PdfName.Subtype] = PdfName.Image
+ image[PdfName.Filter] = ofilter
+ image[PdfName.Width] = imgwidthpx
+ image[PdfName.Height] = imgheightpx
+ image[PdfName.ColorSpace] = colorspace
+ image[PdfName.BitsPerComponent] = depth
+
+ if color == Colorspace["CMYK;I"]:
+ # Inverts all four channels
+ image[PdfName.Decode] = [1, 0, 1, 0, 1, 0, 1, 0]
+
+ if imgformat is ImageFormat.CCITTGroup4:
+ decodeparms = PdfDict()
+ # The default for the K parameter is 0 which indicates Group 3 1-D
+ # encoding. We set it to -1 because we want Group 4 encoding.
+ decodeparms[PdfName.K] = -1
+ if inverted:
+ decodeparms[PdfName.BlackIs1] = FalseObject
+ else:
+ decodeparms[PdfName.BlackIs1] = TrueObject
+ decodeparms[PdfName.Columns] = imgwidthpx
+ decodeparms[PdfName.Rows] = imgheightpx
+ image[PdfName.DecodeParms] = [decodeparms]
+ elif imgformat is ImageFormat.PNG:
+ decodeparms = PdfDict()
+ decodeparms[PdfName.Predictor] = 15
+ if color in [Colorspace.P, Colorspace["1"], Colorspace.L]:
+ decodeparms[PdfName.Colors] = 1
+ else:
+ decodeparms[PdfName.Colors] = 3
+ decodeparms[PdfName.Columns] = imgwidthpx
+ decodeparms[PdfName.BitsPerComponent] = depth
+ image[PdfName.DecodeParms] = decodeparms
+
+ text = (
+ "q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ"
+ % (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)
+ ).encode("ascii")
+
+ if self.engine == Engine.pikepdf:
+ content = self.writer.make_stream(text)
+ else:
+ content = PdfDict(stream=convert_load(text))
+ resources = PdfDict(XObject=PdfDict(Im0=image))
+
+ if self.engine == Engine.pikepdf:
+ page = self.writer.add_blank_page(page_size=(pagewidth, pageheight))
+ else:
+ page = PdfDict(indirect=True)
+ page[PdfName.Type] = PdfName.Page
+ page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight]
+ # 14.11.2 Page Boundaries
+ # ...
+ # The crop, bleed, trim, and art boxes shall not ordinarily extend
+ # beyond the boundaries of the media box. If they do, they are
+ # effectively reduced to their intersection with the media box.
+ if cropborder is not None:
+ page[PdfName.CropBox] = [
+ cropborder[1],
+ cropborder[0],
+ pagewidth - 2 * cropborder[1],
+ pageheight - 2 * cropborder[0],
+ ]
+ if bleedborder is None:
+ if PdfName.CropBox in page:
+ page[PdfName.BleedBox] = page[PdfName.CropBox]
+ else:
+ page[PdfName.BleedBox] = [
+ bleedborder[1],
+ bleedborder[0],
+ pagewidth - 2 * bleedborder[1],
+ pageheight - 2 * bleedborder[0],
+ ]
+ if trimborder is None:
+ if PdfName.CropBox in page:
+ page[PdfName.TrimBox] = page[PdfName.CropBox]
+ else:
+ page[PdfName.TrimBox] = [
+ trimborder[1],
+ trimborder[0],
+ pagewidth - 2 * trimborder[1],
+ pageheight - 2 * trimborder[0],
+ ]
+ if artborder is None:
+ if PdfName.CropBox in page:
+ page[PdfName.ArtBox] = page[PdfName.CropBox]
+ else:
+ page[PdfName.ArtBox] = [
+ artborder[1],
+ artborder[0],
+ pagewidth - 2 * artborder[1],
+ pageheight - 2 * artborder[0],
+ ]
+ page[PdfName.Resources] = resources
+ page[PdfName.Contents] = content
+ if rotate != 0:
+ page[PdfName.Rotate] = rotate
+ if userunit is not None:
+ # /UserUnit requires PDF 1.6
+ if self.output_version < "1.6":
+ self.output_version = "1.6"
+ page[PdfName.UserUnit] = userunit
+
+ if self.engine != Engine.pikepdf:
+ self.writer.addpage(page)
+
+ if self.engine == Engine.internal:
+ self.writer.addobj(content)
+ self.writer.addobj(image)
+ if iccp is not None:
+ self.writer.addobj(iccpdict)
+
+ def tostring(self):
+ stream = BytesIO()
+ self.tostream(stream)
+ return stream.getvalue()
+
+ def tostream(self, outputstream):
+ if self.engine == Engine.pikepdf:
+ PdfArray = pikepdf.Array
+ PdfDict = pikepdf.Dictionary
+ PdfName = pikepdf.Name
+ elif self.engine == Engine.pdfrw:
+ from pdfrw import PdfDict, PdfName, PdfArray, PdfObject
+ from pdfrw.py23_diffs import convert_load
+ elif self.engine == Engine.internal:
+ PdfDict = MyPdfDict
+ PdfName = MyPdfName
+ PdfObject = MyPdfObject
+ PdfArray = MyPdfArray
+ convert_load = my_convert_load
+ else:
+ raise ValueError("unknown engine: %s" % self.engine)
+ NullObject = None if self.engine == Engine.pikepdf else PdfObject("null")
+ TrueObject = True if self.engine == Engine.pikepdf else PdfObject("true")
+
+ # We fill the catalog with more information like /ViewerPreferences,
+ # /PageMode, /PageLayout or /OpenAction because the latter refers to a
+ # page object which has to be present so that we can get its id.
+ #
+ # Furthermore, if using pdfrw, the trailer is cleared every time a page
+ # is added, so we can only start using it after all pages have been
+ # written.
+
+ if self.engine == Engine.pikepdf:
+ catalog = self.writer.Root
+ elif self.engine == Engine.pdfrw:
+ catalog = self.writer.trailer.Root
+ elif self.engine == Engine.internal:
+ catalog = self.writer.catalog
+ else:
+ raise ValueError("unknown engine: %s" % self.engine)
+
+ if (
+ self.fullscreen
+ or self.fit_window
+ or self.center_window
+ or self.panes is not None
+ ):
+ catalog[PdfName.ViewerPreferences] = PdfDict()
+
+ if self.fullscreen:
+ # this setting might be overwritten later by the page mode
+ catalog[PdfName.ViewerPreferences][
+ PdfName.NonFullScreenPageMode
+ ] = PdfName.UseNone
+
+ if self.panes == PageMode.thumbs:
+ catalog[PdfName.ViewerPreferences][
+ PdfName.NonFullScreenPageMode
+ ] = PdfName.UseThumbs
+ # this setting might be overwritten later if fullscreen
+ catalog[PdfName.PageMode] = PdfName.UseThumbs
+ elif self.panes == PageMode.outlines:
+ catalog[PdfName.ViewerPreferences][
+ PdfName.NonFullScreenPageMode
+ ] = PdfName.UseOutlines
+ # this setting might be overwritten later if fullscreen
+ catalog[PdfName.PageMode] = PdfName.UseOutlines
+ elif self.panes in [PageMode.none, None]:
+ pass
+ else:
+ raise ValueError("unknown page mode: %s" % self.panes)
+
+ if self.fit_window:
+ catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject
+
+ if self.center_window:
+ catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = TrueObject
+
+ if self.fullscreen:
+ catalog[PdfName.PageMode] = PdfName.FullScreen
+
+ # see table 8.2 in section 8.2.1 in
+ # http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf
+ # Fit - Fits the page to the window.
+ # FitH - Fits the width of the page to the window.
+ # FitV - Fits the height of the page to the window.
+ # FitR - Fits the rectangle specified by the four coordinates to the
+ # window.
+ # FitB - Fits the page bounding box to the window. This basically
+ # reduces the amount of whitespace (margins) that is displayed
+ # and thus focussing more on the text content.
+ # FitBH - Fits the width of the page bounding box to the window.
+ # FitBV - Fits the height of the page bounding box to the window.
+
+ # by default the initial page is the first one
+ if self.engine == Engine.pikepdf:
+ initial_page = self.writer.pages[0]
+ else:
+ initial_page = self.writer.pagearray[0]
+ # we set the open action here to make sure we open on the requested
+ # initial page but this value might be overwritten by a custom open
+ # action later while still taking the requested initial page into
+ # account
+ if self.initial_page is not None:
+ if self.engine == Engine.pikepdf:
+ initial_page = self.writer.pages[self.initial_page - 1]
+ else:
+ initial_page = self.writer.pagearray[self.initial_page - 1]
+ catalog[PdfName.OpenAction] = PdfArray(
+ [initial_page, PdfName.XYZ, NullObject, NullObject, 0]
+ )
+
+ if self.magnification == Magnification.fit:
+ catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit])
+ elif self.magnification == Magnification.fith:
+ pagewidth = initial_page[PdfName.MediaBox][2]
+ catalog[PdfName.OpenAction] = PdfArray(
+ [initial_page, PdfName.FitH, pagewidth]
+ )
+ elif self.magnification == Magnification.fitbh:
+ # quick hack to determine the image width on the page
+ imgwidth = float(initial_page[PdfName.Contents].stream.split()[4])
+ catalog[PdfName.OpenAction] = PdfArray(
+ [initial_page, PdfName.FitBH, imgwidth]
+ )
+ elif isinstance(self.magnification, float):
+ catalog[PdfName.OpenAction] = PdfArray(
+ [initial_page, PdfName.XYZ, NullObject, NullObject, self.magnification]
+ )
+ elif self.magnification is None:
+ pass
+ else:
+ raise ValueError("unknown magnification: %s" % self.magnification)
+
+ if self.page_layout == PageLayout.single:
+ catalog[PdfName.PageLayout] = PdfName.SinglePage
+ elif self.page_layout == PageLayout.onecolumn:
+ catalog[PdfName.PageLayout] = PdfName.OneColumn
+ elif self.page_layout == PageLayout.twocolumnright:
+ catalog[PdfName.PageLayout] = PdfName.TwoColumnRight
+ elif self.page_layout == PageLayout.twocolumnleft:
+ catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft
+ elif self.page_layout is None:
+ pass
+ else:
+ raise ValueError("unknown page layout: %s" % self.page_layout)
+
+ if self.pdfa is not None:
+ if self.engine == Engine.pikepdf:
+ metadata = self.writer.make_stream(self.xmp)
+ else:
+ metadata = PdfDict(stream=convert_load(self.xmp))
+ metadata[PdfName.Subtype] = PdfName.XML
+ metadata[PdfName.Type] = PdfName.Metadata
+ with open(self.pdfa, "rb") as f:
+ icc = f.read()
+ intents = PdfDict()
+ if self.engine == Engine.pikepdf:
+ iccstream = self.writer.make_stream(icc)
+ iccstream.stream_dict.N = 3
+ else:
+ iccstream = PdfDict(stream=convert_load(zlib.compress(icc)))
+ iccstream[PdfName.N] = 3
+ iccstream[PdfName.Filter] = PdfName.FlateDecode
+ intents[PdfName.S] = PdfName.GTS_PDFA1
+ intents[PdfName.Type] = PdfName.OutputIntent
+ intents[PdfName.OutputConditionIdentifier] = (
+ b"sRGB" if self.engine == Engine.pikepdf else b"(sRGB)"
+ )
+ intents[PdfName.DestOutputProfile] = iccstream
+ catalog[PdfName.OutputIntents] = PdfArray([intents])
+ catalog[PdfName.Metadata] = metadata
+
+ if self.engine == Engine.internal:
+ self.writer.addobj(metadata)
+ self.writer.addobj(iccstream)
+
+ # now write out the PDF
+ if self.engine == Engine.pikepdf:
+ self.writer.save(
+ outputstream, min_version=self.output_version, linearize=True
+ )
+ elif self.engine == Engine.pdfrw:
+ self.writer.trailer.Info = self.writer.docinfo
+ # setting the version attribute of the pdfrw PdfWriter object will
+ # influence the behaviour of the write() function
+ self.writer.version = self.output_version
+ if self.pdfa:
+ md5 = hashlib.md5(b"").hexdigest().encode("ascii")
+ self.writer.trailer[PdfName.ID] = PdfArray([md5, md5])
+ self.writer.write(outputstream)
+ elif self.engine == Engine.internal:
+ self.writer.tostream(
+ self.writer.docinfo,
+ outputstream,
+ self.output_version,
+ None if self.pdfa is None else b"",
+ )
+ else:
+ raise ValueError("unknown engine: %s" % self.engine)
+
+
+def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
+ if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
+ # this codepath gets called if the PIL installation is not able to
+ # handle JPEG2000 files
+ imgwidthpx, imgheightpx, ics, hdpi, vdpi = jp2.parsejp2(rawdata)
+
+ if hdpi is None:
+ hdpi = default_dpi
+ if vdpi is None:
+ vdpi = default_dpi
+ ndpi = (hdpi, vdpi)
+ else:
+ imgwidthpx, imgheightpx = imgdata.size
+
+ ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
+ # In python3, the returned dpi value for some tiff images will
+ # not be an integer but a float. To make the behaviour of
+ # img2pdf the same between python2 and python3, we convert that
+ # float into an integer by rounding.
+ # Search online for the 72.009 dpi problem for more info.
+ ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
+ ics = imgdata.mode
+
+ if ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info:
+ logging.warning("Image contains transparency which cannot be retained in PDF.")
+ logging.warning("img2pdf will not perform a lossy operation.")
+ logging.warning("You can remove the alpha channel using imagemagick:")
+ logging.warning(
+ " $ convert input.png -background white -alpha "
+ "remove -alpha off output.png"
+ )
+ raise AlphaChannelError("Refusing to work on images with alpha channel")
+
+ # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0
+ # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG
+ # header. In that case it can happen that the horizontal and vertical DPI
+ # are set to zero.
+ if ndpi == (0, 0):
+ ndpi = (default_dpi, default_dpi)
+
+ # PIL defaults to a dpi of 1 if a TIFF image does not specify the dpi.
+ # In that case, we want to use a different default.
+ if ndpi == (1, 1) and imgformat == ImageFormat.TIFF:
+ ndpi = (
+ imgdata.tag_v2.get(TiffImagePlugin.X_RESOLUTION, default_dpi),
+ imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi),
+ )
+
+ logging.debug("input dpi = %d x %d", *ndpi)
+
+ rotation = 0
+ if hasattr(imgdata, "_getexif") and imgdata._getexif() is not None:
+ for tag, value in imgdata._getexif().items():
+ if TAGS.get(tag, tag) == "Orientation":
+ # Detailed information on EXIF rotation tags:
+ # http://impulseadventure.com/photo/exif-orientation.html
+ if value in (0, 1): # some mobile phones produce invalid rotation
+ rotation = 0
+ elif value == 6:
+ rotation = 90
+ elif value == 3:
+ rotation = 180
+ elif value == 8:
+ rotation = 270
+ elif value in (2, 4, 5, 7):
+ raise ExifOrientationError(
+ "Unsupported flipped rotation mode (%d)" % value
+ )
+ else:
+ raise ExifOrientationError("Invalid rotation (%d)" % value)
+
+ logging.debug("rotation = %d°", rotation)
+
+ if colorspace:
+ color = colorspace
+ logging.debug("input colorspace (forced) = %s", color)
+ else:
+ color = None
+ for c in Colorspace:
+ if c.name == ics:
+ color = c
+ if color is None:
+ # PIL does not provide the information about the original
+ # colorspace for 16bit grayscale PNG images. Thus, we retrieve
+ # that info manually by looking at byte 10 in the IHDR chunk. We
+ # know where to find that in the file because the IHDR chunk must
+ # be the first chunk
+ if (
+ rawdata is not None
+ and imgformat == ImageFormat.PNG
+ and rawdata[25] == 0
+ ):
+ color = Colorspace.L
+ else:
+ raise ValueError("unknown colorspace")
+ if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG:
+ # Adobe inverts CMYK JPEGs for some reason, and others
+ # have followed suit as well. Some software assumes the
+ # JPEG is inverted if the Adobe tag (APP14), while other
+ # software assumes all CMYK JPEGs are inverted. I don't
+ # have enough experience with these to know which is
+ # better for images currently in the wild, so I'm going
+ # with the first approach for now.
+ if "adobe" in imgdata.info:
+ color = Colorspace["CMYK;I"]
+ logging.debug("input colorspace = %s", color.name)
+
+ iccp = None
+ if "icc_profile" in imgdata.info:
+ iccp = imgdata.info.get("icc_profile")
+
+ logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
+
+ return (color, ndpi, imgwidthpx, imgheightpx, rotation, iccp)
+
+
+def ccitt_payload_location_from_pil(img):
+ # If Pillow is passed an invalid compression argument it will ignore it;
+ # make sure the image actually got compressed.
+ if img.info["compression"] != "group4":
+ raise ValueError(
+ "Image not compressed with CCITT Group 4 but with: %s"
+ % img.info["compression"]
+ )
+
+ # Read the TIFF tags to find the offset(s) of the compressed data strips.
+ strip_offsets = img.tag_v2[TiffImagePlugin.STRIPOFFSETS]
+ strip_bytes = img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]
+ rows_per_strip = img.tag_v2.get(TiffImagePlugin.ROWSPERSTRIP, 2 ** 32 - 1)
+
+ # PIL always seems to create a single strip even for very large TIFFs when
+ # it saves images, so assume we only have to read a single strip.
+ # A test ~10 GPixel image was still encoded as a single strip. Just to be
+ # safe check throw an error if there is more than one offset.
+ if len(strip_offsets) != 1 or len(strip_bytes) != 1:
+ raise NotImplementedError("Transcoding multiple strips not supported")
+
+ (offset,), (length,) = strip_offsets, strip_bytes
+
+ logging.debug("TIFF strip_offsets: %d" % offset)
+ logging.debug("TIFF strip_bytes: %d" % length)
+
+ return offset, length
+
+
+def transcode_monochrome(imgdata):
+ """Convert the open PIL.Image imgdata to compressed CCITT Group4 data"""
+
+ logging.debug("Converting monochrome to CCITT Group4")
+
+ # Convert the image to Group 4 in memory. If libtiff is not installed and
+ # Pillow is not compiled against it, .save() will raise an exception.
+ newimgio = BytesIO()
+
+ # we create a whole new PIL image or otherwise it might happen with some
+ # input images, that libtiff fails an assert and the whole process is
+ # killed by a SIGABRT:
+ # https://gitlab.mister-muffin.de/josch/img2pdf/issues/46
+ im = Image.frombytes(imgdata.mode, imgdata.size, imgdata.tobytes())
+ im.save(newimgio, format="TIFF", compression="group4")
+
+ # Open new image in memory
+ newimgio.seek(0)
+ newimg = Image.open(newimgio)
+
+ offset, length = ccitt_payload_location_from_pil(newimg)
+
+ newimgio.seek(offset)
+ return newimgio.read(length)
+
+
+def parse_png(rawdata):
+ pngidat = b""
+ palette = b""
+ i = 16
+ while i < len(rawdata):
+ # once we can require Python >= 3.2 we can use int.from_bytes() instead
+ (n,) = struct.unpack(">I", rawdata[i - 8 : i - 4])
+ if i + n > len(rawdata):
+ raise Exception("invalid png: %d %d %d" % (i, n, len(rawdata)))
+ if rawdata[i - 4 : i] == b"IDAT":
+ pngidat += rawdata[i : i + n]
+ elif rawdata[i - 4 : i] == b"PLTE":
+ palette += rawdata[i : i + n]
+ i += n
+ i += 12
+ return pngidat, palette
+
+
+def read_images(rawdata, colorspace, first_frame_only=False):
+ im = BytesIO(rawdata)
+ im.seek(0)
+ imgdata = None
+ try:
+ imgdata = Image.open(im)
+ except IOError as e:
+ # test if it is a jpeg2000 image
+ if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
+ raise ImageOpenError(
+ "cannot read input image (not jpeg2000). "
+ "PIL: error reading image: %s" % e
+ )
+ # image is jpeg2000
+ imgformat = ImageFormat.JPEG2000
+ else:
+ imgformat = None
+ for f in ImageFormat:
+ if f.name == imgdata.format:
+ imgformat = f
+ if imgformat is None:
+ imgformat = ImageFormat.other
+
+ logging.debug("imgformat = %s", imgformat.name)
+
+ # depending on the input format, determine whether to pass the raw
+ # image or the zlib compressed color information
+
+ # JPEG and JPEG2000 can be embedded into the PDF as-is
+ if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
+ color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace, rawdata
+ )
+ if color == Colorspace["1"]:
+ raise JpegColorspaceError("jpeg can't be monochrome")
+ if color == Colorspace["P"]:
+ raise JpegColorspaceError("jpeg can't have a color palette")
+ if color == Colorspace["RGBA"]:
+ raise JpegColorspaceError("jpeg can't have an alpha channel")
+ im.close()
+ logging.debug("read_images() embeds a JPEG")
+ return [
+ (
+ color,
+ ndpi,
+ imgformat,
+ rawdata,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ False,
+ 8,
+ rotation,
+ iccp,
+ )
+ ]
+
+ # We can directly embed the IDAT chunk of PNG images if the PNG is not
+ # interlaced
+ #
+ # PIL does not provide the information whether a PNG was stored interlaced
+ # or not. Thus, we retrieve that info manually by looking at byte 13 in the
+ # IHDR chunk. We know where to find that in the file because the IHDR chunk
+ # must be the first chunk.
+ if imgformat == ImageFormat.PNG and rawdata[28] == 0:
+ color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace, rawdata
+ )
+ pngidat, palette = parse_png(rawdata)
+ im.close()
+ # PIL does not provide the information about the original bits per
+ # sample. Thus, we retrieve that info manually by looking at byte 9 in
+ # the IHDR chunk. We know where to find that in the file because the
+ # IHDR chunk must be the first chunk
+ depth = rawdata[24]
+ if depth not in [1, 2, 4, 8, 16]:
+ raise ValueError("invalid bit depth: %d" % depth)
+ logging.debug("read_images() embeds a PNG")
+ return [
+ (
+ color,
+ ndpi,
+ imgformat,
+ pngidat,
+ imgwidthpx,
+ imgheightpx,
+ palette,
+ False,
+ depth,
+ rotation,
+ iccp,
+ )
+ ]
+
+ # If our input is not JPEG or PNG, then we might have a format that
+ # supports multiple frames (like TIFF or GIF), so we need a loop to
+ # iterate through all frames of the image.
+ #
+ # Each frame gets compressed using PNG compression *except* if:
+ #
+ # * The image is monochrome => encode using CCITT group 4
+ #
+ # * The image is CMYK => zip plain RGB data
+ #
+ # * We are handling a CCITT encoded TIFF frame => embed data
+
+ result = []
+ img_page_count = 0
+ # loop through all frames of the image (example: multipage TIFF)
+ while True:
+ try:
+ imgdata.seek(img_page_count)
+ except EOFError:
+ break
+
+ if first_frame_only and img_page_count > 0:
+ break
+
+ # PIL is unable to preserve the data of 16-bit RGB TIFF files and will
+ # convert it to 8-bit without the possibility to retrieve the original
+ # data
+ # https://github.com/python-pillow/Pillow/issues/1888
+ #
+ # Some tiff images do not have BITSPERSAMPLE set. Use this to create
+ # such a tiff: tiffset -u 258 test.tif
+ if (
+ imgformat == ImageFormat.TIFF
+ and max(imgdata.tag_v2.get(TiffImagePlugin.BITSPERSAMPLE, [1])) > 8
+ ):
+ raise ValueError("PIL is unable to preserve more than 8 bits per sample")
+
+ # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it
+ # only contains a single strip
+ if (
+ imgformat == ImageFormat.TIFF
+ and imgdata.info["compression"] == "group4"
+ and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1
+ ):
+ photo = imgdata.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION]
+ inverted = False
+ if photo == 0:
+ inverted = True
+ elif photo != 1:
+ raise ValueError(
+ "unsupported photometric interpretation for "
+ "group4 tiff: %d" % photo
+ )
+ color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace, rawdata
+ )
+ offset, length = ccitt_payload_location_from_pil(imgdata)
+ im.seek(offset)
+ rawdata = im.read(length)
+ fillorder = imgdata.tag_v2.get(TiffImagePlugin.FILLORDER)
+ if fillorder is None:
+ # no FillOrder: nothing to do
+ pass
+ elif fillorder == 1:
+ # msb-to-lsb: nothing to do
+ pass
+ elif fillorder == 2:
+ logging.debug("fillorder is lsb-to-msb => reverse bits")
+ # lsb-to-msb: reverse bits of each byte
+ rawdata = bytearray(rawdata)
+ for i in range(len(rawdata)):
+ rawdata[i] = TIFFBitRevTable[rawdata[i]]
+ rawdata = bytes(rawdata)
+ else:
+ raise ValueError("unsupported FillOrder: %d" % fillorder)
+ logging.debug("read_images() embeds Group4 from TIFF")
+ result.append(
+ (
+ color,
+ ndpi,
+ ImageFormat.CCITTGroup4,
+ rawdata,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ inverted,
+ 1,
+ rotation,
+ iccp,
+ )
+ )
+ img_page_count += 1
+ continue
+
+ logging.debug("Converting frame: %d" % img_page_count)
+
+ color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace
+ )
+
+ newimg = None
+ if color == Colorspace["1"]:
+ try:
+ ccittdata = transcode_monochrome(imgdata)
+ logging.debug("read_images() encoded a B/W image as CCITT group 4")
+ result.append(
+ (
+ color,
+ ndpi,
+ ImageFormat.CCITTGroup4,
+ ccittdata,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ False,
+ 1,
+ rotation,
+ iccp,
+ )
+ )
+ img_page_count += 1
+ continue
+ except Exception as e:
+ logging.debug(e)
+ logging.debug("Converting colorspace 1 to L")
+ newimg = imgdata.convert("L")
+ color = Colorspace.L
+ elif color in [
+ Colorspace.RGB,
+ Colorspace.L,
+ Colorspace.CMYK,
+ Colorspace["CMYK;I"],
+ Colorspace.P,
+ ]:
+ logging.debug("Colorspace is OK: %s", color)
+ newimg = imgdata
+ else:
+ raise ValueError("unknown or unsupported colorspace: %s" % color.name)
+ # the PNG format does not support CMYK, so we fall back to normal
+ # compression
+ if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]:
+ imggz = zlib.compress(newimg.tobytes())
+ logging.debug("read_images() encoded CMYK with flate compression")
+ result.append(
+ (
+ color,
+ ndpi,
+ imgformat,
+ imggz,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ False,
+ 8,
+ rotation,
+ iccp,
+ )
+ )
+ else:
+ # cheapo version to retrieve a PNG encoding of the payload is to
+ # just save it with PIL. In the future this could be replaced by
+ # dedicated function applying the Paeth PNG filter to the raw pixel
+ pngbuffer = BytesIO()
+ newimg.save(pngbuffer, format="png")
+ pngidat, palette = parse_png(pngbuffer.getvalue())
+ # PIL does not provide the information about the original bits per
+ # sample. Thus, we retrieve that info manually by looking at byte 9 in
+ # the IHDR chunk. We know where to find that in the file because the
+ # IHDR chunk must be the first chunk
+ pngbuffer.seek(24)
+ depth = ord(pngbuffer.read(1))
+ if depth not in [1, 2, 4, 8, 16]:
+ raise ValueError("invalid bit depth: %d" % depth)
+ logging.debug("read_images() encoded an image as PNG")
+ result.append(
+ (
+ color,
+ ndpi,
+ ImageFormat.PNG,
+ pngidat,
+ imgwidthpx,
+ imgheightpx,
+ palette,
+ False,
+ depth,
+ rotation,
+ iccp,
+ )
+ )
+ img_page_count += 1
+ # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
+ # close() method
+ try:
+ imgdata.close()
+ except AttributeError:
+ pass
+ im.close()
+ return result
+
+
+# converts a length in pixels to a length in PDF units (1/72 of an inch)
+def px_to_pt(length, dpi):
+ return 72.0 * length / dpi
+
+
+def cm_to_pt(length):
+ return (72.0 * length) / 2.54
+
+
+def mm_to_pt(length):
+ return (72.0 * length) / 25.4
+
+
+def in_to_pt(length):
+ return 72.0 * length
+
+
+def get_layout_fun(
+ pagesize=None, imgsize=None, border=None, fit=None, auto_orient=False
+):
+ def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight):
+ if fitwidth is None and fitheight is None:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ # if fit is fill or enlarge then it is okay if one of the dimensions
+ # are negative but one of them must still be positive
+ # if fit is not fill or enlarge then both dimensions must be positive
+ if (
+ fit in [FitMode.fill, FitMode.enlarge]
+ and fitwidth is not None
+ and fitwidth < 0
+ and fitheight is not None
+ and fitheight < 0
+ ):
+ raise ValueError(
+ "cannot fit into a rectangle where both dimensions are negative"
+ )
+ elif fit not in [FitMode.fill, FitMode.enlarge] and (
+ (fitwidth is not None and fitwidth < 0)
+ or (fitheight is not None and fitheight < 0)
+ ):
+ raise Exception(
+ "cannot fit into a rectangle where either dimensions are negative"
+ )
+
+ def default():
+ if fitwidth is not None and fitheight is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ if newimgheight > fitheight:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitwidth is None and fitheight is not None:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitheight is None and fitwidth is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return newimgwidth, newimgheight
+
+ if fit is None or fit == FitMode.into:
+ return default()
+ elif fit == FitMode.fill:
+ if fitwidth is not None and fitheight is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ if newimgheight < fitheight:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitwidth is None and fitheight is not None:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitheight is None and fitwidth is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return newimgwidth, newimgheight
+ elif fit == FitMode.exact:
+ if fitwidth is not None and fitheight is not None:
+ return fitwidth, fitheight
+ elif fitwidth is None and fitheight is not None:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitheight is None and fitwidth is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return newimgwidth, newimgheight
+ elif fit == FitMode.shrink:
+ if fitwidth is not None and fitheight is not None:
+ if imgwidth <= fitwidth and imgheight <= fitheight:
+ return imgwidth, imgheight
+ elif fitwidth is None and fitheight is not None:
+ if imgheight <= fitheight:
+ return imgwidth, imgheight
+ elif fitheight is None and fitwidth is not None:
+ if imgwidth <= fitwidth:
+ return imgwidth, imgheight
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return default()
+ elif fit == FitMode.enlarge:
+ if fitwidth is not None and fitheight is not None:
+ if imgwidth > fitwidth or imgheight > fitheight:
+ return imgwidth, imgheight
+ elif fitwidth is None and fitheight is not None:
+ if imgheight > fitheight:
+ return imgwidth, imgheight
+ elif fitheight is None and fitwidth is not None:
+ if imgwidth > fitwidth:
+ return imgwidth, imgheight
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return default()
+ else:
+ raise NotImplementedError
+
+ # if no layout arguments are given, then the image size is equal to the
+ # page size and will be drawn with the default dpi
+ if pagesize is None and imgsize is None and border is None:
+ return default_layout_fun
+ if pagesize is None and imgsize is None and border is not None:
+
+ def layout_fun(imgwidthpx, imgheightpx, ndpi):
+ imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0])
+ imgheightpdf = px_to_pt(imgheightpx, ndpi[1])
+ pagewidth = imgwidthpdf + 2 * border[1]
+ pageheight = imgheightpdf + 2 * border[0]
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+ return layout_fun
+ if border is None:
+ border = (0, 0)
+ # if the pagesize is given but the imagesize is not, then the imagesize
+ # will be calculated from the pagesize, taking into account the border
+ # and the fitting
+ if pagesize is not None and imgsize is None:
+
+ def layout_fun(imgwidthpx, imgheightpx, ndpi):
+ if (
+ pagesize[0] is not None
+ and pagesize[1] is not None
+ and auto_orient
+ and (
+ (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1])
+ or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])
+ )
+ ):
+ pagewidth, pageheight = pagesize[1], pagesize[0]
+ newborder = border[1], border[0]
+ else:
+ pagewidth, pageheight = pagesize[0], pagesize[1]
+ newborder = border
+ if pagewidth is not None:
+ fitwidth = pagewidth - 2 * newborder[1]
+ else:
+ fitwidth = None
+ if pageheight is not None:
+ fitheight = pageheight - 2 * newborder[0]
+ else:
+ fitheight = None
+ if (
+ fit in [FitMode.fill, FitMode.enlarge]
+ and fitwidth is not None
+ and fitwidth < 0
+ and fitheight is not None
+ and fitheight < 0
+ ):
+ raise NegativeDimensionError(
+ "at least one border dimension musts be smaller than half "
+ "the respective page dimension"
+ )
+ elif fit not in [FitMode.fill, FitMode.enlarge] and (
+ (fitwidth is not None and fitwidth < 0)
+ or (fitheight is not None and fitheight < 0)
+ ):
+ raise NegativeDimensionError(
+ "one border dimension is larger than half of the "
+ "respective page dimension"
+ )
+ imgwidthpdf, imgheightpdf = fitfun(
+ fit,
+ px_to_pt(imgwidthpx, ndpi[0]),
+ px_to_pt(imgheightpx, ndpi[1]),
+ fitwidth,
+ fitheight,
+ )
+ if pagewidth is None:
+ pagewidth = imgwidthpdf + border[1] * 2
+ if pageheight is None:
+ pageheight = imgheightpdf + border[0] * 2
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+ return layout_fun
+
+ def scale_imgsize(s, px, dpi):
+ if s is None:
+ return None
+ mode, value = s
+ if mode == ImgSize.abs:
+ return value
+ if mode == ImgSize.perc:
+ return (px_to_pt(px, dpi) * value) / 100
+ if mode == ImgSize.dpi:
+ return px_to_pt(px, value)
+ raise NotImplementedError
+
+ if pagesize is None and imgsize is not None:
+
+ def layout_fun(imgwidthpx, imgheightpx, ndpi):
+ imgwidthpdf, imgheightpdf = fitfun(
+ fit,
+ px_to_pt(imgwidthpx, ndpi[0]),
+ px_to_pt(imgheightpx, ndpi[1]),
+ scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
+ scale_imgsize(imgsize[1], imgheightpx, ndpi[1]),
+ )
+ pagewidth = imgwidthpdf + 2 * border[1]
+ pageheight = imgheightpdf + 2 * border[0]
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+ return layout_fun
+ if pagesize is not None and imgsize is not None:
+
+ def layout_fun(imgwidthpx, imgheightpx, ndpi):
+ if (
+ pagesize[0] is not None
+ and pagesize[1] is not None
+ and auto_orient
+ and (
+ (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1])
+ or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])
+ )
+ ):
+ pagewidth, pageheight = pagesize[1], pagesize[0]
+ else:
+ pagewidth, pageheight = pagesize[0], pagesize[1]
+ imgwidthpdf, imgheightpdf = fitfun(
+ fit,
+ px_to_pt(imgwidthpx, ndpi[0]),
+ px_to_pt(imgheightpx, ndpi[1]),
+ scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
+ scale_imgsize(imgsize[1], imgheightpx, ndpi[1]),
+ )
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+ return layout_fun
+ raise NotImplementedError
+
+
+def default_layout_fun(imgwidthpx, imgheightpx, ndpi):
+ imgwidthpdf = pagewidth = px_to_pt(imgwidthpx, ndpi[0])
+ imgheightpdf = pageheight = px_to_pt(imgheightpx, ndpi[1])
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+
+def get_fixed_dpi_layout_fun(fixed_dpi):
+ """Layout function that overrides whatever DPI is claimed in input images.
+
+ >>> layout_fun = get_fixed_dpi_layout_fun((300, 300))
+ >>> convert(image1, layout_fun=layout_fun, ... outputstream=...)
+ """
+
+ def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi):
+ return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi)
+
+ return fixed_dpi_layout_fun
+
+
+def find_scale(pagewidth, pageheight):
+ """Find the power of 10 (10, 100, 1000...) that will reduce the scale
+ below the PDF specification limit of 14400 PDF units (=200 inches)"""
+ from math import log10, ceil
+
+ major = max(pagewidth, pageheight)
+ oversized = major / 14400.0
+
+ return 10 ** ceil(log10(oversized))
+
+
+# given one or more input image, depending on outputstream, either return a
+# string containing the whole PDF if outputstream is None or write the PDF
+# data to the given file-like object and return None
+#
+# Input images can be given as file like objects (they must implement read()),
+# as a binary string representing the image content or as filenames to the
+# images.
+def convert(*images, **kwargs):
+
+ _default_kwargs = dict(
+ engine=None,
+ title=None,
+ author=None,
+ creator=None,
+ producer=None,
+ creationdate=None,
+ moddate=None,
+ subject=None,
+ keywords=None,
+ colorspace=None,
+ nodate=False,
+ layout_fun=default_layout_fun,
+ viewer_panes=None,
+ viewer_initial_page=None,
+ viewer_magnification=None,
+ viewer_page_layout=None,
+ viewer_fit_window=False,
+ viewer_center_window=False,
+ viewer_fullscreen=False,
+ outputstream=None,
+ first_frame_only=False,
+ allow_oversized=True,
+ cropborder=None,
+ bleedborder=None,
+ trimborder=None,
+ artborder=None,
+ pdfa=None,
+ )
+ for kwname, default in _default_kwargs.items():
+ if kwname not in kwargs:
+ kwargs[kwname] = default
+
+ pdf = pdfdoc(
+ kwargs["engine"],
+ "1.3",
+ kwargs["title"],
+ kwargs["author"],
+ kwargs["creator"],
+ kwargs["producer"],
+ kwargs["creationdate"],
+ kwargs["moddate"],
+ kwargs["subject"],
+ kwargs["keywords"],
+ kwargs["nodate"],
+ kwargs["viewer_panes"],
+ kwargs["viewer_initial_page"],
+ kwargs["viewer_magnification"],
+ kwargs["viewer_page_layout"],
+ kwargs["viewer_fit_window"],
+ kwargs["viewer_center_window"],
+ kwargs["viewer_fullscreen"],
+ kwargs["pdfa"],
+ )
+
+ # backwards compatibility with older img2pdf versions where the first
+ # argument to the function had to be given as a list
+ if len(images) == 1:
+ # if only one argument was given and it is a list, expand it
+ if isinstance(images[0], (list, tuple)):
+ images = images[0]
+
+ if not isinstance(images, (list, tuple)):
+ images = [images]
+ else:
+ if len(images) == 0:
+ raise ValueError("Unable to process empty list")
+
+ for img in images:
+ # img is allowed to be a path, a binary string representing image data
+ # or a file-like object (really anything that implements read())
+ try:
+ rawdata = img.read()
+ except AttributeError:
+ if not isinstance(img, (str, bytes)):
+ raise TypeError("Neither implements read() nor is str or bytes")
+ # the thing doesn't have a read() function, so try if we can treat
+ # it as a file name
+ try:
+ f = open(img, "rb")
+ except Exception:
+ # whatever the exception is (string could contain NUL
+ # characters or the path could just not exist) it's not a file
+ # name so we now try treating it as raw image content
+ rawdata = img
+ else:
+ # we are not using a "with" block here because we only want to
+ # catch exceptions thrown by open(). The read() may throw its
+ # own exceptions like MemoryError which should be handled
+ # differently.
+ rawdata = f.read()
+ f.close()
+
+ for (
+ color,
+ ndpi,
+ imgformat,
+ imgdata,
+ imgwidthpx,
+ imgheightpx,
+ palette,
+ inverted,
+ depth,
+ rotation,
+ iccp,
+ ) in read_images(rawdata, kwargs["colorspace"], kwargs["first_frame_only"]):
+ pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
+ imgwidthpx, imgheightpx, ndpi
+ )
+
+ userunit = None
+ if pagewidth < 3.00 or pageheight < 3.00:
+ logging.warning(
+ "pdf width or height is below 3.00 - too small for some viewers!"
+ )
+ elif pagewidth > 14400.0 or pageheight > 14400.0:
+ if kwargs["allow_oversized"]:
+ userunit = find_scale(pagewidth, pageheight)
+ pagewidth /= userunit
+ pageheight /= userunit
+ imgwidthpdf /= userunit
+ imgheightpdf /= userunit
+ else:
+ raise PdfTooLargeError(
+ "pdf width or height must not exceed 200 inches."
+ )
+ # the image is always centered on the page
+ imgxpdf = (pagewidth - imgwidthpdf) / 2.0
+ imgypdf = (pageheight - imgheightpdf) / 2.0
+ pdf.add_imagepage(
+ color,
+ imgwidthpx,
+ imgheightpx,
+ imgformat,
+ imgdata,
+ imgwidthpdf,
+ imgheightpdf,
+ imgxpdf,
+ imgypdf,
+ pagewidth,
+ pageheight,
+ userunit,
+ palette,
+ inverted,
+ depth,
+ rotation,
+ kwargs["cropborder"],
+ kwargs["bleedborder"],
+ kwargs["trimborder"],
+ kwargs["artborder"],
+ iccp,
+ )
+
+ if kwargs["outputstream"]:
+ pdf.tostream(kwargs["outputstream"])
+ return
+
+ return pdf.tostring()
+
+
+def parse_num(num, name):
+ if num == "":
+ return None
+ unit = None
+ if num.endswith("pt"):
+ unit = Unit.pt
+ elif num.endswith("cm"):
+ unit = Unit.cm
+ elif num.endswith("mm"):
+ unit = Unit.mm
+ elif num.endswith("in"):
+ unit = Unit.inch
+ else:
+ try:
+ num = float(num)
+ except ValueError:
+ msg = (
+ "%s is not a floating point number and doesn't have a "
+ "valid unit: %s" % (name, num)
+ )
+ raise argparse.ArgumentTypeError(msg)
+ if unit is None:
+ unit = Unit.pt
+ else:
+ num = num[:-2]
+ try:
+ num = float(num)
+ except ValueError:
+ msg = "%s is not a floating point number: %s" % (name, num)
+ raise argparse.ArgumentTypeError(msg)
+ if num < 0:
+ msg = "%s must not be negative: %s" % (name, num)
+ raise argparse.ArgumentTypeError(msg)
+ if unit == Unit.cm:
+ num = cm_to_pt(num)
+ elif unit == Unit.mm:
+ num = mm_to_pt(num)
+ elif unit == Unit.inch:
+ num = in_to_pt(num)
+ return num
+
+
+def parse_imgsize_num(num, name):
+ if num == "":
+ return None
+ unit = None
+ if num.endswith("pt"):
+ unit = ImgUnit.pt
+ elif num.endswith("cm"):
+ unit = ImgUnit.cm
+ elif num.endswith("mm"):
+ unit = ImgUnit.mm
+ elif num.endswith("in"):
+ unit = ImgUnit.inch
+ elif num.endswith("dpi"):
+ unit = ImgUnit.dpi
+ elif num.endswith("%"):
+ unit = ImgUnit.perc
+ else:
+ try:
+ num = float(num)
+ except ValueError:
+ msg = (
+ "%s is not a floating point number and doesn't have a "
+ "valid unit: %s" % (name, num)
+ )
+ raise argparse.ArgumentTypeError(msg)
+ if unit is None:
+ unit = ImgUnit.pt
+ else:
+ # strip off unit from string
+ if unit == ImgUnit.dpi:
+ num = num[:-3]
+ elif unit == ImgUnit.perc:
+ num = num[:-1]
+ else:
+ num = num[:-2]
+ try:
+ num = float(num)
+ except ValueError:
+ msg = "%s is not a floating point number: %s" % (name, num)
+ raise argparse.ArgumentTypeError(msg)
+ if unit == ImgUnit.cm:
+ num = (ImgSize.abs, cm_to_pt(num))
+ elif unit == ImgUnit.mm:
+ num = (ImgSize.abs, mm_to_pt(num))
+ elif unit == ImgUnit.inch:
+ num = (ImgSize.abs, in_to_pt(num))
+ elif unit == ImgUnit.pt:
+ num = (ImgSize.abs, num)
+ elif unit == ImgUnit.dpi:
+ num = (ImgSize.dpi, num)
+ elif unit == ImgUnit.perc:
+ num = (ImgSize.perc, num)
+ return num
+
+
+def parse_pagesize_rectarg(string):
+ transposed = string.endswith("^T")
+ if transposed:
+ string = string[:-2]
+ if papersizes.get(string.lower()):
+ string = papersizes[string.lower()]
+ if "x" not in string:
+ # if there is no separating "x" in the string, then the string is
+ # interpreted as the width
+ w = parse_num(string, "width")
+ h = None
+ else:
+ w, h = string.split("x", 1)
+ w = parse_num(w, "width")
+ h = parse_num(h, "height")
+ if transposed:
+ w, h = h, w
+ if w is None and h is None:
+ raise argparse.ArgumentTypeError("at least one dimension must be specified")
+ return w, h
+
+
+def parse_imgsize_rectarg(string):
+ transposed = string.endswith("^T")
+ if transposed:
+ string = string[:-2]
+ if papersizes.get(string.lower()):
+ string = papersizes[string.lower()]
+ if "x" not in string:
+ # if there is no separating "x" in the string, then the string is
+ # interpreted as the width
+ w = parse_imgsize_num(string, "width")
+ h = None
+ else:
+ w, h = string.split("x", 1)
+ w = parse_imgsize_num(w, "width")
+ h = parse_imgsize_num(h, "height")
+ if transposed:
+ w, h = h, w
+ if w is None and h is None:
+ raise argparse.ArgumentTypeError("at least one dimension must be specified")
+ return w, h
+
+
+def parse_colorspacearg(string):
+ for c in Colorspace:
+ if c.name == string:
+ return c
+ allowed = ", ".join([c.name for c in Colorspace])
+ raise argparse.ArgumentTypeError(
+ "Unsupported colorspace: %s. Must be one of: %s." % (string, allowed)
+ )
+
+
+def parse_enginearg(string):
+ for c in Engine:
+ if c.name == string:
+ return c
+ allowed = ", ".join([c.name for c in Engine])
+ raise argparse.ArgumentTypeError(
+ "Unsupported engine: %s. Must be one of: %s." % (string, allowed)
+ )
+
+
+def parse_borderarg(string):
+ if ":" in string:
+ h, v = string.split(":", 1)
+ if h == "":
+ raise argparse.ArgumentTypeError("missing value before colon")
+ if v == "":
+ raise argparse.ArgumentTypeError("missing value after colon")
+ else:
+ if string == "":
+ raise argparse.ArgumentTypeError("border option cannot be empty")
+ h, v = string, string
+ h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border")
+ if h is None and v is None:
+ raise argparse.ArgumentTypeError("missing value")
+ return h, v
+
+
+def input_images(path_expr):
+ if path_expr == "-":
+ # we slurp in all data from stdin because we need to seek in it later
+ result = sys.stdin.buffer.read()
+ if len(result) == 0:
+ raise argparse.ArgumentTypeError('"%s" is empty' % path_expr)
+ else:
+ result = []
+ paths = [path_expr]
+ if sys.platform == "win32" and ("*" in path_expr or "?" in path_expr):
+ # on windows, program is responsible for expanding wildcards such as *.jpg
+ # glob won't return files that don't exist so we only use it for wildcards
+ # paths without wildcards that do not exist will trigger "does not exist"
+ from glob import glob
+ paths = glob(path_expr)
+ for path in paths:
+ try:
+ if os.path.getsize(path) == 0:
+ raise argparse.ArgumentTypeError('"%s" is empty' % path)
+ # test-read a byte from it so that we can abort early in case
+ # we cannot read data from the file
+ with open(path, "rb") as im:
+ im.read(1)
+ except IsADirectoryError:
+ raise argparse.ArgumentTypeError('"%s" is a directory' % path)
+ except PermissionError:
+ raise argparse.ArgumentTypeError('"%s" permission denied' % path)
+ except FileNotFoundError:
+ raise argparse.ArgumentTypeError('"%s" does not exist' % path)
+ result.append(path)
+ return result
+
+
+def parse_fitarg(string):
+ for m in FitMode:
+ if m.name == string.lower():
+ return m
+ raise argparse.ArgumentTypeError("unknown fit mode: %s" % string)
+
+
+def parse_panes(string):
+ for m in PageMode:
+ if m.name == string.lower():
+ return m
+ allowed = ", ".join([m.name for m in PageMode])
+ raise argparse.ArgumentTypeError(
+ "Unsupported page mode: %s. Must be one of: %s." % (string, allowed)
+ )
+
+
+def parse_magnification(string):
+ for m in Magnification:
+ if m.name == string.lower():
+ return m
+ try:
+ return float(string)
+ except ValueError:
+ pass
+ allowed = ", ".join([m.name for m in Magnification])
+ raise argparse.ArgumentTypeError(
+ "Unsupported magnification: %s. Must be "
+ "a floating point number or one of: %s." % (string, allowed)
+ )
+
+
+def parse_layout(string):
+ for l in PageLayout:
+ if l.name == string.lower():
+ return l
+ allowed = ", ".join([l.name for l in PageLayout])
+ raise argparse.ArgumentTypeError(
+ "Unsupported page layout: %s. Must be one of: %s." % (string, allowed)
+ )
+
+
+def valid_date(string):
+ # first try parsing in ISO8601 format
+ try:
+ return datetime.strptime(string, "%Y-%m-%d")
+ except ValueError:
+ pass
+ try:
+ return datetime.strptime(string, "%Y-%m-%dT%H:%M")
+ except ValueError:
+ pass
+ try:
+ return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
+ except ValueError:
+ pass
+ # then try dateutil
+ try:
+ from dateutil import parser
+ except ImportError:
+ pass
+ else:
+ try:
+ return parser.parse(string)
+ except TypeError:
+ pass
+ # as a last resort, try the local date utility
+ try:
+ import subprocess
+ except ImportError:
+ pass
+ else:
+ try:
+ utime = subprocess.check_output(["date", "--date", string, "+%s"])
+ except subprocess.CalledProcessError:
+ pass
+ else:
+ return datetime.utcfromtimestamp(int(utime))
+ raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
+
+
+def gui():
+ import tkinter
+ import tkinter.filedialog
+
+ have_fitz = True
+ try:
+ import fitz
+ except ImportError:
+ have_fitz = False
+
+ # from Python 3.7 Lib/idlelib/configdialog.py
+ # Copyright 2015-2017 Terry Jan Reedy
+ # Python License
+ class VerticalScrolledFrame(tkinter.Frame):
+ """A pure Tkinter vertically scrollable frame.
+
+ * Use the 'interior' attribute to place widgets inside the scrollable frame
+ * Construct and pack/place/grid normally
+ * This frame only allows vertical scrolling
+ """
+
+ def __init__(self, parent, *args, **kw):
+ tkinter.Frame.__init__(self, parent, *args, **kw)
+
+ # Create a canvas object and a vertical scrollbar for scrolling it.
+ vscrollbar = tkinter.Scrollbar(self, orient=tkinter.VERTICAL)
+ vscrollbar.pack(fill=tkinter.Y, side=tkinter.RIGHT, expand=tkinter.FALSE)
+ canvas = tkinter.Canvas(
+ self,
+ borderwidth=0,
+ highlightthickness=0,
+ yscrollcommand=vscrollbar.set,
+ width=240,
+ )
+ canvas.pack(side=tkinter.LEFT, fill=tkinter.BOTH, expand=tkinter.TRUE)
+ vscrollbar.config(command=canvas.yview)
+
+ # Reset the view.
+ canvas.xview_moveto(0)
+ canvas.yview_moveto(0)
+
+ # Create a frame inside the canvas which will be scrolled with it.
+ self.interior = interior = tkinter.Frame(canvas)
+ interior_id = canvas.create_window(0, 0, window=interior, anchor=tkinter.NW)
+
+ # Track changes to the canvas and frame width and sync them,
+ # also updating the scrollbar.
+ def _configure_interior(event):
+ # Update the scrollbars to match the size of the inner frame.
+ size = (interior.winfo_reqwidth(), interior.winfo_reqheight())
+ canvas.config(scrollregion="0 0 %s %s" % size)
+
+ interior.bind("<Configure>", _configure_interior)
+
+ def _configure_canvas(event):
+ if interior.winfo_reqwidth() != canvas.winfo_width():
+ # Update the inner frame's width to fill the canvas.
+ canvas.itemconfigure(interior_id, width=canvas.winfo_width())
+
+ canvas.bind("<Configure>", _configure_canvas)
+
+ return
+
+ # From Python 3.7 Lib/tkinter/__init__.py
+ # Copyright 2000 Fredrik Lundh
+ # Python License
+ #
+ # add support for 'state' and 'name' kwargs
+ # add support for updating list of options
+ class OptionMenu(tkinter.Menubutton):
+ """OptionMenu which allows the user to select a value from a menu."""
+
+ def __init__(self, master, variable, value, *values, **kwargs):
+ """Construct an optionmenu widget with the parent MASTER, with
+ the resource textvariable set to VARIABLE, the initially selected
+ value VALUE, the other menu values VALUES and an additional
+ keyword argument command."""
+ kw = {
+ "borderwidth": 2,
+ "textvariable": variable,
+ "indicatoron": 1,
+ "relief": tkinter.RAISED,
+ "anchor": "c",
+ "highlightthickness": 2,
+ }
+ if "state" in kwargs:
+ kw["state"] = kwargs["state"]
+ del kwargs["state"]
+ if "name" in kwargs:
+ kw["name"] = kwargs["name"]
+ del kwargs["name"]
+ tkinter.Widget.__init__(self, master, "menubutton", kw)
+ self.widgetName = "tk_optionMenu"
+ self.callback = kwargs.get("command")
+ self.variable = variable
+ if "command" in kwargs:
+ del kwargs["command"]
+ if kwargs:
+ raise tkinter.TclError("unknown option -" + list(kwargs.keys())[0])
+ self.set_values([value] + list(values))
+
+ def __getitem__(self, name):
+ if name == "menu":
+ return self.__menu
+ return tkinter.Widget.__getitem__(self, name)
+
+ def set_values(self, values):
+ menu = self.__menu = tkinter.Menu(self, name="menu", tearoff=0)
+ self.menuname = menu._w
+ for v in values:
+ menu.add_command(
+ label=v, command=tkinter._setit(self.variable, v, self.callback)
+ )
+ self["menu"] = menu
+
+ def destroy(self):
+ """Destroy this widget and the associated menu."""
+ tkinter.Menubutton.destroy(self)
+ self.__menu = None
+
+ root = tkinter.Tk()
+ app = tkinter.Frame(master=root)
+
+ infiles = []
+ maxpagewidth = 0
+ maxpageheight = 0
+ doc = None
+
+ args = {
+ "engine": tkinter.StringVar(),
+ "first_frame_only": tkinter.BooleanVar(),
+ "auto_orient": tkinter.BooleanVar(),
+ "fit": tkinter.StringVar(),
+ "title": tkinter.StringVar(),
+ "author": tkinter.StringVar(),
+ "creator": tkinter.StringVar(),
+ "producer": tkinter.StringVar(),
+ "subject": tkinter.StringVar(),
+ "keywords": tkinter.StringVar(),
+ "nodate": tkinter.BooleanVar(),
+ "creationdate": tkinter.StringVar(),
+ "moddate": tkinter.StringVar(),
+ "viewer_panes": tkinter.StringVar(),
+ "viewer_initial_page": tkinter.IntVar(),
+ "viewer_magnification": tkinter.StringVar(),
+ "viewer_page_layout": tkinter.StringVar(),
+ "viewer_fit_window": tkinter.BooleanVar(),
+ "viewer_center_window": tkinter.BooleanVar(),
+ "viewer_fullscreen": tkinter.BooleanVar(),
+ "pagesize_dropdown": tkinter.StringVar(),
+ "pagesize_width": tkinter.DoubleVar(),
+ "pagesize_height": tkinter.DoubleVar(),
+ "imgsize_dropdown": tkinter.StringVar(),
+ "imgsize_width": tkinter.DoubleVar(),
+ "imgsize_height": tkinter.DoubleVar(),
+ "colorspace": tkinter.StringVar(),
+ "first_frame_only": tkinter.BooleanVar(),
+ }
+ args["engine"].set("auto")
+ args["title"].set("")
+ args["auto_orient"].set(False)
+ args["fit"].set("into")
+ args["colorspace"].set("auto")
+ args["viewer_panes"].set("auto")
+ args["viewer_initial_page"].set(1)
+ args["viewer_magnification"].set("auto")
+ args["viewer_page_layout"].set("auto")
+ args["first_frame_only"].set(False)
+ args["pagesize_dropdown"].set("auto")
+ args["imgsize_dropdown"].set("auto")
+
+ def on_open_button():
+ nonlocal infiles
+ nonlocal doc
+ nonlocal maxpagewidth
+ nonlocal maxpageheight
+ infiles = tkinter.filedialog.askopenfilenames(
+ parent=root,
+ title="open image",
+ filetypes=[
+ (
+ "images",
+ "*.bmp *.eps *.gif *.ico *.jpeg *.jpg *.jp2 *.pcx *.png *.ppm *.tiff",
+ ),
+ ("all files", "*"),
+ ],
+ # initialdir="/home/josch/git/plakativ",
+ # initialfile="test.pdf",
+ )
+ if have_fitz:
+ with BytesIO() as f:
+ save_pdf(f)
+ f.seek(0)
+ doc = fitz.open(stream=f, filetype="pdf")
+ for page in doc:
+ if page.getDisplayList().rect.width > maxpagewidth:
+ maxpagewidth = page.getDisplayList().rect.width
+ if page.getDisplayList().rect.height > maxpageheight:
+ maxpageheight = page.getDisplayList().rect.height
+ draw()
+
+ def save_pdf(stream):
+ pagesizearg = None
+ if args["pagesize_dropdown"].get() == "auto":
+ # nothing to do
+ pass
+ elif args["pagesize_dropdown"].get() == "custom":
+ pagesizearg = args["pagesize_width"].get(), args["pagesize_height"].get()
+ elif args["pagesize_dropdown"].get() in papernames.values():
+ raise NotImplemented()
+ else:
+ raise Exception("no such pagesize: %s" % args["pagesize_dropdown"].get())
+ imgsizearg = None
+ if args["imgsize_dropdown"].get() == "auto":
+ # nothing to do
+ pass
+ elif args["imgsize_dropdown"].get() == "custom":
+ imgsizearg = args["imgsize_width"].get(), args["imgsize_height"].get()
+ elif args["imgsize_dropdown"].get() in papernames.values():
+ raise NotImplemented()
+ else:
+ raise Exception("no such imgsize: %s" % args["imgsize_dropdown"].get())
+ borderarg = None
+ layout_fun = get_layout_fun(
+ pagesizearg,
+ imgsizearg,
+ borderarg,
+ args["fit"].get(),
+ args["auto_orient"].get(),
+ )
+ viewer_panesarg = None
+ if args["viewer_panes"].get() == "auto":
+ # nothing to do
+ pass
+ elif args["viewer_panes"].get() in PageMode:
+ viewer_panesarg = args["viewer_panes"].get()
+ else:
+ raise Exception("no such viewer_panes: %s" % args["viewer_panes"].get())
+ viewer_magnificationarg = None
+ if args["viewer_magnification"].get() == "auto":
+ # nothing to do
+ pass
+ elif args["viewer_magnification"].get() in Magnification:
+ viewer_magnificationarg = args["viewer_magnification"].get()
+ else:
+ raise Exception(
+ "no such viewer_magnification: %s" % args["viewer_magnification"].get()
+ )
+ viewer_page_layoutarg = None
+ if args["viewer_page_layout"].get() == "auto":
+ # nothing to do
+ pass
+ elif args["viewer_page_layout"].get() in PageLayout:
+ viewer_page_layoutarg = args["viewer_page_layout"].get()
+ else:
+ raise Exception(
+ "no such viewer_page_layout: %s" % args["viewer_page_layout"].get()
+ )
+ colorspacearg = None
+ if args["colorspace"].get() != "auto":
+ colorspacearg = next(
+ v for v in Colorspace if v.name == args["colorspace"].get()
+ )
+ enginearg = None
+ if args["engine"].get() != "auto":
+ enginearg = next(v for v in Engine if v.name == args["engine"].get())
+
+ convert(
+ *infiles,
+ engine=enginearg,
+ title=args["title"].get() if args["title"].get() else None,
+ author=args["author"].get() if args["author"].get() else None,
+ creator=args["creator"].get() if args["creator"].get() else None,
+ producer=args["producer"].get() if args["producer"].get() else None,
+ creationdate=args["creationdate"].get()
+ if args["creationdate"].get()
+ else None,
+ moddate=args["moddate"].get() if args["moddate"].get() else None,
+ subject=args["subject"].get() if args["subject"].get() else None,
+ keywords=args["keywords"].get() if args["keywords"].get() else None,
+ colorspace=colorspacearg,
+ nodate=args["nodate"].get(),
+ layout_fun=layout_fun,
+ viewer_panes=viewer_panesarg,
+ viewer_initial_page=args["viewer_initial_page"].get()
+ if args["viewer_initial_page"].get() > 1
+ else None,
+ viewer_magnification=viewer_magnificationarg,
+ viewer_page_layout=viewer_page_layoutarg,
+ viewer_fit_window=(args["viewer_fit_window"].get() or None),
+ viewer_center_window=(args["viewer_center_window"].get() or None),
+ viewer_fullscreen=(args["viewer_fullscreen"].get() or None),
+ outputstream=stream,
+ first_frame_only=args["first_frame_only"].get(),
+ cropborder=None,
+ bleedborder=None,
+ trimborder=None,
+ artborder=None,
+ )
+
+ def on_save_button():
+ filename = tkinter.filedialog.asksaveasfilename(
+ parent=root,
+ title="save PDF",
+ defaultextension=".pdf",
+ filetypes=[("pdf documents", "*.pdf"), ("all files", "*")],
+ # initialdir="/home/josch/git/plakativ",
+ # initialfile=base + "_poster" + ext,
+ )
+ with open(filename, "wb") as f:
+ save_pdf(f)
+
+ root.title("img2pdf")
+ app.pack(fill=tkinter.BOTH, expand=tkinter.TRUE)
+
+ canvas = tkinter.Canvas(app, bg="black")
+
+ def draw():
+ canvas.delete(tkinter.ALL)
+ if not infiles:
+ canvas.create_text(
+ canvas.size[0] / 2,
+ canvas.size[1] / 2,
+ text='Click on the "Open Image(s)" button in the upper right.',
+ fill="white",
+ )
+ return
+
+ if not doc:
+ canvas.create_text(
+ canvas.size[0] / 2,
+ canvas.size[1] / 2,
+ text="PyMuPDF not available. Install the Python fitz module\n"
+ + "for preview functionality.",
+ fill="white",
+ )
+ return
+
+ canvas_padding = 10
+ # factor to convert from pdf dimensions (given in pt) into canvas
+ # dimensions (given in pixels)
+ zoom = min(
+ (canvas.size[0] - canvas_padding) / maxpagewidth,
+ (canvas.size[1] - canvas_padding) / maxpageheight,
+ )
+
+ pagenum = 0
+ mat_0 = fitz.Matrix(zoom, zoom)
+ canvas.image = tkinter.PhotoImage(
+ data=doc[pagenum]
+ .getDisplayList()
+ .getPixmap(matrix=mat_0, alpha=False)
+ .getImageData("ppm")
+ )
+ canvas.create_image(
+ (canvas.size[0] - maxpagewidth * zoom) / 2,
+ (canvas.size[1] - maxpageheight * zoom) / 2,
+ anchor=tkinter.NW,
+ image=canvas.image,
+ )
+
+ canvas.create_rectangle(
+ (canvas.size[0] - maxpagewidth * zoom) / 2,
+ (canvas.size[1] - maxpageheight * zoom) / 2,
+ (canvas.size[0] - maxpagewidth * zoom) / 2 + canvas.image.width(),
+ (canvas.size[1] - maxpageheight * zoom) / 2 + canvas.image.height(),
+ outline="red",
+ )
+
+ def on_resize(event):
+ canvas.size = (event.width, event.height)
+ draw()
+
+ canvas.pack(fill=tkinter.BOTH, side=tkinter.LEFT, expand=tkinter.TRUE)
+ canvas.bind("<Configure>", on_resize)
+
+ frame_right = tkinter.Frame(app)
+ frame_right.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.Y)
+
+ top_frame = tkinter.Frame(frame_right)
+ top_frame.pack(fill=tkinter.X)
+
+ tkinter.Button(top_frame, text="Open Image(s)", command=on_open_button).pack(
+ side=tkinter.LEFT, expand=tkinter.TRUE, fill=tkinter.X
+ )
+ tkinter.Button(top_frame, text="Help", state=tkinter.DISABLED).pack(
+ side=tkinter.RIGHT, expand=tkinter.TRUE, fill=tkinter.X
+ )
+
+ frame1 = VerticalScrolledFrame(frame_right)
+ frame1.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.Y)
+
+ output_options = tkinter.LabelFrame(frame1.interior, text="Output Options")
+ output_options.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X)
+ tkinter.Label(output_options, text="colorspace").grid(
+ row=0, column=0, sticky=tkinter.W
+ )
+ OptionMenu(output_options, args["colorspace"], "auto", state=tkinter.DISABLED).grid(
+ row=0, column=1, sticky=tkinter.W
+ )
+ tkinter.Label(output_options, text="engine").grid(row=1, column=0, sticky=tkinter.W)
+ OptionMenu(output_options, args["engine"], "auto", state=tkinter.DISABLED).grid(
+ row=1, column=1, sticky=tkinter.W
+ )
+ tkinter.Checkbutton(
+ output_options,
+ text="Suppress timestamp",
+ variable=args["nodate"],
+ state=tkinter.DISABLED,
+ ).grid(row=2, column=0, columnspan=2, sticky=tkinter.W)
+ tkinter.Checkbutton(
+ output_options,
+ text="only first frame",
+ variable=args["first_frame_only"],
+ state=tkinter.DISABLED,
+ ).grid(row=3, column=0, columnspan=2, sticky=tkinter.W)
+ tkinter.Checkbutton(
+ output_options, text="force large input", state=tkinter.DISABLED
+ ).grid(row=4, column=0, columnspan=2, sticky=tkinter.W)
+ image_size_frame = tkinter.LabelFrame(frame1.interior, text="Image size")
+ image_size_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X)
+ OptionMenu(
+ image_size_frame,
+ args["imgsize_dropdown"],
+ *(["auto", "custom"] + sorted(papernames.values())),
+ state=tkinter.DISABLED,
+ ).grid(row=1, column=0, columnspan=3, sticky=tkinter.W)
+
+ tkinter.Label(
+ image_size_frame, text="Width:", state=tkinter.DISABLED, name="size_label_width"
+ ).grid(row=2, column=0, sticky=tkinter.W)
+ tkinter.Spinbox(
+ image_size_frame,
+ format="%.2f",
+ increment=0.01,
+ from_=0,
+ to=100,
+ width=5,
+ state=tkinter.DISABLED,
+ name="spinbox_width",
+ ).grid(row=2, column=1, sticky=tkinter.W)
+ tkinter.Label(
+ image_size_frame, text="mm", state=tkinter.DISABLED, name="size_label_width_mm"
+ ).grid(row=2, column=2, sticky=tkinter.W)
+
+ tkinter.Label(
+ image_size_frame,
+ text="Height:",
+ state=tkinter.DISABLED,
+ name="size_label_height",
+ ).grid(row=3, column=0, sticky=tkinter.W)
+ tkinter.Spinbox(
+ image_size_frame,
+ format="%.2f",
+ increment=0.01,
+ from_=0,
+ to=100,
+ width=5,
+ state=tkinter.DISABLED,
+ name="spinbox_height",
+ ).grid(row=3, column=1, sticky=tkinter.W)
+ tkinter.Label(
+ image_size_frame, text="mm", state=tkinter.DISABLED, name="size_label_height_mm"
+ ).grid(row=3, column=2, sticky=tkinter.W)
+
+ page_size_frame = tkinter.LabelFrame(frame1.interior, text="Page size")
+ page_size_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X)
+ OptionMenu(
+ page_size_frame,
+ args["pagesize_dropdown"],
+ *(["auto", "custom"] + sorted(papernames.values())),
+ state=tkinter.DISABLED,
+ ).grid(row=1, column=0, columnspan=3, sticky=tkinter.W)
+
+ tkinter.Label(
+ page_size_frame, text="Width:", state=tkinter.DISABLED, name="size_label_width"
+ ).grid(row=2, column=0, sticky=tkinter.W)
+ tkinter.Spinbox(
+ page_size_frame,
+ format="%.2f",
+ increment=0.01,
+ from_=0,
+ to=100,
+ width=5,
+ state=tkinter.DISABLED,
+ name="spinbox_width",
+ ).grid(row=2, column=1, sticky=tkinter.W)
+ tkinter.Label(
+ page_size_frame, text="mm", state=tkinter.DISABLED, name="size_label_width_mm"
+ ).grid(row=2, column=2, sticky=tkinter.W)
+
+ tkinter.Label(
+ page_size_frame,
+ text="Height:",
+ state=tkinter.DISABLED,
+ name="size_label_height",
+ ).grid(row=3, column=0, sticky=tkinter.W)
+ tkinter.Spinbox(
+ page_size_frame,
+ format="%.2f",
+ increment=0.01,
+ from_=0,
+ to=100,
+ width=5,
+ state=tkinter.DISABLED,
+ name="spinbox_height",
+ ).grid(row=3, column=1, sticky=tkinter.W)
+ tkinter.Label(
+ page_size_frame, text="mm", state=tkinter.DISABLED, name="size_label_height_mm"
+ ).grid(row=3, column=2, sticky=tkinter.W)
+ layout_frame = tkinter.LabelFrame(frame1.interior, text="Layout")
+ layout_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X)
+ tkinter.Label(layout_frame, text="border", state=tkinter.DISABLED).grid(
+ row=0, column=0, sticky=tkinter.W
+ )
+ tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid(
+ row=0, column=1, sticky=tkinter.W
+ )
+ tkinter.Label(layout_frame, text="fit", state=tkinter.DISABLED).grid(
+ row=1, column=0, sticky=tkinter.W
+ )
+ OptionMenu(
+ layout_frame, args["fit"], *[v.name for v in FitMode], state=tkinter.DISABLED
+ ).grid(row=1, column=1, sticky=tkinter.W)
+ tkinter.Checkbutton(
+ layout_frame,
+ text="auto orient",
+ state=tkinter.DISABLED,
+ variable=args["auto_orient"],
+ ).grid(row=2, column=0, columnspan=2, sticky=tkinter.W)
+ tkinter.Label(layout_frame, text="crop border", state=tkinter.DISABLED).grid(
+ row=3, column=0, sticky=tkinter.W
+ )
+ tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid(
+ row=3, column=1, sticky=tkinter.W
+ )
+ tkinter.Label(layout_frame, text="bleed border", state=tkinter.DISABLED).grid(
+ row=4, column=0, sticky=tkinter.W
+ )
+ tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid(
+ row=4, column=1, sticky=tkinter.W
+ )
+ tkinter.Label(layout_frame, text="trim border", state=tkinter.DISABLED).grid(
+ row=5, column=0, sticky=tkinter.W
+ )
+ tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid(
+ row=5, column=1, sticky=tkinter.W
+ )
+ tkinter.Label(layout_frame, text="art border", state=tkinter.DISABLED).grid(
+ row=6, column=0, sticky=tkinter.W
+ )
+ tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid(
+ row=6, column=1, sticky=tkinter.W
+ )
+ metadata_frame = tkinter.LabelFrame(frame1.interior, text="PDF metadata")
+ metadata_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X)
+ tkinter.Label(metadata_frame, text="title", state=tkinter.DISABLED).grid(
+ row=0, column=0, sticky=tkinter.W
+ )
+ tkinter.Entry(
+ metadata_frame, textvariable=args["title"], state=tkinter.DISABLED
+ ).grid(row=0, column=1, sticky=tkinter.W)
+ tkinter.Label(metadata_frame, text="author", state=tkinter.DISABLED).grid(
+ row=1, column=0, sticky=tkinter.W
+ )
+ tkinter.Entry(
+ metadata_frame, textvariable=args["author"], state=tkinter.DISABLED
+ ).grid(row=1, column=1, sticky=tkinter.W)
+ tkinter.Label(metadata_frame, text="creator", state=tkinter.DISABLED).grid(
+ row=2, column=0, sticky=tkinter.W
+ )
+ tkinter.Entry(
+ metadata_frame, textvariable=args["creator"], state=tkinter.DISABLED
+ ).grid(row=2, column=1, sticky=tkinter.W)
+ tkinter.Label(metadata_frame, text="producer", state=tkinter.DISABLED).grid(
+ row=3, column=0, sticky=tkinter.W
+ )
+ tkinter.Entry(
+ metadata_frame, textvariable=args["producer"], state=tkinter.DISABLED
+ ).grid(row=3, column=1, sticky=tkinter.W)
+ tkinter.Label(metadata_frame, text="creation date", state=tkinter.DISABLED).grid(
+ row=4, column=0, sticky=tkinter.W
+ )
+ tkinter.Entry(
+ metadata_frame, textvariable=args["creationdate"], state=tkinter.DISABLED
+ ).grid(row=4, column=1, sticky=tkinter.W)
+ tkinter.Label(
+ metadata_frame, text="modification date", state=tkinter.DISABLED
+ ).grid(row=5, column=0, sticky=tkinter.W)
+ tkinter.Entry(
+ metadata_frame, textvariable=args["moddate"], state=tkinter.DISABLED
+ ).grid(row=5, column=1, sticky=tkinter.W)
+ tkinter.Label(metadata_frame, text="subject", state=tkinter.DISABLED).grid(
+ row=6, column=0, sticky=tkinter.W
+ )
+ tkinter.Entry(metadata_frame, state=tkinter.DISABLED).grid(
+ row=6, column=1, sticky=tkinter.W
+ )
+ tkinter.Label(metadata_frame, text="keywords", state=tkinter.DISABLED).grid(
+ row=7, column=0, sticky=tkinter.W
+ )
+ tkinter.Entry(metadata_frame, state=tkinter.DISABLED).grid(
+ row=7, column=1, sticky=tkinter.W
+ )
+ viewer_frame = tkinter.LabelFrame(frame1.interior, text="PDF viewer options")
+ viewer_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X)
+ tkinter.Label(viewer_frame, text="panes", state=tkinter.DISABLED).grid(
+ row=0, column=0, sticky=tkinter.W
+ )
+ OptionMenu(
+ viewer_frame,
+ args["viewer_panes"],
+ *(["auto"] + [v.name for v in PageMode]),
+ state=tkinter.DISABLED,
+ ).grid(row=0, column=1, sticky=tkinter.W)
+ tkinter.Label(viewer_frame, text="initial page", state=tkinter.DISABLED).grid(
+ row=1, column=0, sticky=tkinter.W
+ )
+ tkinter.Spinbox(
+ viewer_frame,
+ increment=1,
+ from_=1,
+ to=10000,
+ width=6,
+ textvariable=args["viewer_initial_page"],
+ state=tkinter.DISABLED,
+ name="viewer_initial_page_spinbox",
+ ).grid(row=1, column=1, sticky=tkinter.W)
+ tkinter.Label(viewer_frame, text="magnification", state=tkinter.DISABLED).grid(
+ row=2, column=0, sticky=tkinter.W
+ )
+ OptionMenu(
+ viewer_frame,
+ args["viewer_magnification"],
+ *(["auto", "custom"] + [v.name for v in Magnification]),
+ state=tkinter.DISABLED,
+ ).grid(row=2, column=1, sticky=tkinter.W)
+ tkinter.Label(viewer_frame, text="page layout", state=tkinter.DISABLED).grid(
+ row=3, column=0, sticky=tkinter.W
+ )
+ OptionMenu(
+ viewer_frame,
+ args["viewer_page_layout"],
+ *(["auto"] + [v.name for v in PageLayout]),
+ state=tkinter.DISABLED,
+ ).grid(row=3, column=1, sticky=tkinter.W)
+ tkinter.Checkbutton(
+ viewer_frame,
+ text="fit window to page size",
+ variable=args["viewer_fit_window"],
+ state=tkinter.DISABLED,
+ ).grid(row=4, column=0, columnspan=2, sticky=tkinter.W)
+ tkinter.Checkbutton(
+ viewer_frame,
+ text="center window",
+ variable=args["viewer_center_window"],
+ state=tkinter.DISABLED,
+ ).grid(row=5, column=0, columnspan=2, sticky=tkinter.W)
+ tkinter.Checkbutton(
+ viewer_frame,
+ text="open in fullscreen",
+ variable=args["viewer_fullscreen"],
+ state=tkinter.DISABLED,
+ ).grid(row=6, column=0, columnspan=2, sticky=tkinter.W)
+
+ option_frame = tkinter.LabelFrame(frame1.interior, text="Program options")
+ option_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X)
+
+ tkinter.Label(option_frame, text="Unit:", state=tkinter.DISABLED).grid(
+ row=0, column=0, sticky=tkinter.W
+ )
+ unit = tkinter.StringVar()
+ unit.set("mm")
+ OptionMenu(option_frame, unit, ["mm"], state=tkinter.DISABLED).grid(
+ row=0, column=1, sticky=tkinter.W
+ )
+
+ tkinter.Label(option_frame, text="Language:", state=tkinter.DISABLED).grid(
+ row=1, column=0, sticky=tkinter.W
+ )
+ language = tkinter.StringVar()
+ language.set("English")
+ OptionMenu(option_frame, language, ["English"], state=tkinter.DISABLED).grid(
+ row=1, column=1, sticky=tkinter.W
+ )
+
+ bottom_frame = tkinter.Frame(frame_right)
+ bottom_frame.pack(fill=tkinter.X)
+
+ tkinter.Button(bottom_frame, text="Save PDF", command=on_save_button).pack(
+ side=tkinter.LEFT, expand=tkinter.TRUE, fill=tkinter.X
+ )
+ tkinter.Button(bottom_frame, text="Exit", command=root.destroy).pack(
+ side=tkinter.RIGHT, expand=tkinter.TRUE, fill=tkinter.X
+ )
+
+ app.mainloop()
+
+
+def main(argv=sys.argv):
+ rendered_papersizes = ""
+ for k, v in sorted(papersizes.items()):
+ rendered_papersizes += " %-8s %s\n" % (papernames[k], v)
+
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description="""\
+Losslessly convert raster images to PDF without re-encoding PNG, JPEG, and
+JPEG2000 images. This leads to a lossless conversion of PNG, JPEG and JPEG2000
+images with the only added file size coming from the PDF container itself.
+Other raster graphics formats are losslessly stored using the same encoding
+that PNG uses. Since PDF does not support images with transparency and since
+img2pdf aims to never be lossy, input images with an alpha channel are not
+supported.
+
+The output is sent to standard output so that it can be redirected into a file
+or to another program as part of a shell pipe. To directly write the output
+into a file, use the -o or --output option.
+
+Options:
+""",
+ epilog="""\
+Colorspace:
+ Currently, the colorspace must be forced for JPEG 2000 images that are not in
+ the RGB colorspace. Available colorspace options are based on Python Imaging
+ Library (PIL) short handles.
+
+ RGB RGB color
+ L Grayscale
+ 1 Black and white (internally converted to grayscale)
+ CMYK CMYK color
+ CMYK;I CMYK color with inversion (for CMYK JPEG files from Adobe)
+
+Paper sizes:
+ You can specify the short hand paper size names shown in the first column in
+ the table below as arguments to the --pagesize and --imgsize options. The
+ width and height they are mapping to is shown in the second column. Giving
+ the value in the second column has the same effect as giving the short hand
+ in the first column. Appending ^T (a caret/circumflex followed by the letter
+ T) turns the paper size from portrait into landscape. The postfix thus
+ symbolizes the transpose. The values are case insensitive.
+
+%s
+
+Fit options:
+ The img2pdf options for the --fit argument are shown in the first column in
+ the table below. The function of these options can be mapped to the geometry
+ operators of imagemagick. For users who are familiar with imagemagick, the
+ corresponding operator is shown in the second column. The third column shows
+ whether or not the aspect ratio is preserved for that option (same as in
+ imagemagick). Just like imagemagick, img2pdf tries hard to preserve the
+ aspect ratio, so if the --fit argument is not given, then the default is
+ "into" which corresponds to the absence of any operator in imagemagick.
+ The value of the --fit option is case insensitive.
+
+ into | | Y | The default. Width and height values specify maximum
+ | | | values.
+ ---------+---+---+----------------------------------------------------------
+ fill | ^ | Y | Width and height values specify the minimum values.
+ ---------+---+---+----------------------------------------------------------
+ exact | ! | N | Width and height emphatically given.
+ ---------+---+---+----------------------------------------------------------
+ shrink | > | Y | Shrinks an image with dimensions larger than the given
+ | | | ones (and otherwise behaves like "into").
+ ---------+---+---+----------------------------------------------------------
+ enlarge | < | Y | Enlarges an image with dimensions smaller than the given
+ | | | ones (and otherwise behaves like "into").
+
+Argument parsing:
+ Argument long options can be abbreviated to a prefix if the abbreviation is
+ unambiguous. That is, the prefix must match a unique option.
+
+ Beware of your shell interpreting argument values as special characters (like
+ the semicolon in the CMYK;I colorspace option). If in doubt, put the argument
+ values in single quotes.
+
+ If you want an argument value to start with one or more minus characters, you
+ must use the long option name and join them with an equal sign like so:
+
+ $ img2pdf --author=--test--
+
+ If your input file name starts with one or more minus characters, either
+ separate the input files from the other arguments by two minus signs:
+
+ $ img2pdf -- --my-file-starts-with-two-minuses.jpg
+
+ Or be more explicit about its relative path by prepending a ./:
+
+ $ img2pdf ./--my-file-starts-with-two-minuses.jpg
+
+ The order of non-positional arguments (all arguments other than the input
+ images) does not matter.
+
+Examples:
+ Lines starting with a dollar sign denote commands you can enter into your
+ terminal. The dollar sign signifies your command prompt. It is not part of
+ the command you type.
+
+ Convert two scans in JPEG format to a PDF document.
+
+ $ img2pdf --output out.pdf page1.jpg page2.jpg
+
+ Convert a directory of JPEG images into a PDF with printable A4 pages in
+ landscape mode. On each page, the photo takes the maximum amount of space
+ while preserving its aspect ratio and a print border of 2 cm on the top and
+ bottom and 2.5 cm on the left and right hand side.
+
+ $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
+
+ On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
+ original image size if the image is smaller than that.
+
+ $ img2pdf --output out.pdf -S A4 --imgsize 10cmx15cm --fit shrink *.jpg
+
+ Prepare a directory of photos to be printed borderless on photo paper with a
+ 3:2 aspect ratio and rotate each page so that its orientation is the same as
+ the input image.
+
+ $ img2pdf --output out.pdf --pagesize 15cmx10cm --auto-orient *.jpg
+
+ Encode a grayscale JPEG2000 image. The colorspace has to be forced as img2pdf
+ cannot read it from the JPEG2000 file automatically.
+
+ $ img2pdf --output out.pdf --colorspace L input.jp2
+
+Written by Johannes 'josch' Schauer <josch@mister-muffin.de>
+
+Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues
+"""
+ % rendered_papersizes,
+ )
+
+ parser.add_argument(
+ "images",
+ metavar="infile",
+ type=input_images,
+ nargs="*",
+ help="Specifies the input file(s) in any format that can be read by "
+ "the Python Imaging Library (PIL). If no input images are given, then "
+ 'a single image is read from standard input. The special filename "-" '
+ "can be used once to read an image from standard input. To read a "
+ 'file in the current directory with the filename "-", pass it to '
+ 'img2pdf by explicitly stating its relative path like "./-".',
+ )
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="store_true",
+ help="Makes the program operate in verbose mode, printing messages on "
+ "standard error.",
+ )
+ parser.add_argument(
+ "-V",
+ "--version",
+ action="version",
+ version="%(prog)s " + __version__,
+ help="Prints version information and exits.",
+ )
+ parser.add_argument(
+ "--gui", dest="gui", action="store_true", help="run experimental tkinter gui"
+ )
+
+ outargs = parser.add_argument_group(
+ title="General output arguments",
+ description="Arguments controlling the output format.",
+ )
+
+ # In Python3 we have to output to sys.stdout.buffer because we write are
+ # bytes and not strings. In certain situations, like when the main
+ # function is wrapped by contextlib.redirect_stdout(), sys.stdout does not
+ # have the buffer attribute. Thus we write to sys.stdout by default and
+ # to sys.stdout.buffer if it exists.
+ outargs.add_argument(
+ "-o",
+ "--output",
+ metavar="out",
+ type=argparse.FileType("wb"),
+ default=sys.stdout.buffer if hasattr(sys.stdout, "buffer") else sys.stdout,
+ help="Makes the program output to a file instead of standard output.",
+ )
+ outargs.add_argument(
+ "-C",
+ "--colorspace",
+ metavar="colorspace",
+ type=parse_colorspacearg,
+ help="""
+Forces the PIL colorspace. See the epilogue for a list of possible values.
+Usually the PDF colorspace would be derived from the color space of the input
+image. This option overwrites the automatically detected colorspace from the
+input image and thus forces a certain colorspace in the output PDF /ColorSpace
+property. This is useful for JPEG 2000 images with a different colorspace than
+RGB.""",
+ )
+
+ outargs.add_argument(
+ "-D",
+ "--nodate",
+ action="store_true",
+ help="Suppresses timestamps in the output and thus makes the output "
+ "deterministic between individual runs. You can also manually "
+ "set a date using the --moddate and --creationdate options.",
+ )
+
+ outargs.add_argument(
+ "--engine",
+ metavar="engine",
+ type=parse_enginearg,
+ help="Choose PDF engine. Can be either internal, pikepdf or pdfrw. "
+ "The internal engine does not have additional requirements and writes "
+ "out a human readable PDF. The pikepdf engine requires the pikepdf "
+ "Python module and qpdf library, is most featureful, can "
+ 'linearize PDFs ("fast web view") and can compress more parts of it.'
+ "The pdfrw engine requires the pdfrw Python "
+ "module but does not support unicode metadata (See "
+ "https://github.com/pmaupin/pdfrw/issues/39) or palette data (See "
+ "https://github.com/pmaupin/pdfrw/issues/128).",
+ )
+
+ outargs.add_argument(
+ "--first-frame-only",
+ action="store_true",
+ help="By default, img2pdf will convert multi-frame images like "
+ "multi-page TIFF or animated GIF images to one page per frame. "
+ "This option will only let the first frame of every multi-frame "
+ "input image be converted into a page in the resulting PDF.",
+ )
+
+ outargs.add_argument(
+ "--pillow-limit-break",
+ action="store_true",
+ help="img2pdf uses the Python Imaging Library Pillow to read input "
+ "images. Pillow limits the maximum input image size to %d pixels "
+ "to prevent decompression bomb denial of service attacks. If "
+ "your input image contains more pixels than that, use this "
+ "option to disable this safety measure during this run of img2pdf"
+ % Image.MAX_IMAGE_PIXELS,
+ )
+
+ outargs.add_argument(
+ "--pdfa",
+ nargs="?",
+ const="/usr/share/color/icc/sRGB.icc",
+ default=None,
+ help="Output a PDF/A-1b compliant document. By default, this will "
+ "embed /usr/share/color/icc/sRGB.icc as the color profile.",
+ )
+
+ sizeargs = parser.add_argument_group(
+ title="Image and page size and layout arguments",
+ description="""\
+Every input image will be placed on its own page. The image size is controlled
+by the dpi value of the input image or, if unset or missing, the default dpi of
+%.2f. By default, each page will have the same size as the image it shows.
+Thus, there will be no visible border between the image and the page border by
+default. If image size and page size are made different from each other by the
+options in this section, the image will always be centered in both dimensions.
+
+The image size and page size can be explicitly set using the --imgsize and
+--pagesize options, respectively. If either dimension of the image size is
+specified but the same dimension of the page size is not, then the latter will
+be derived from the former using an optional minimal distance between the image
+and the page border (given by the --border option) and/or a certain fitting
+strategy (given by the --fit option). The converse happens if a dimension of
+the page size is set but the same dimension of the image size is not.
+
+Any length value in below options is represented by the meta variable L which
+is a floating point value with an optional unit appended (without a space
+between them). The default unit is pt (1/72 inch, the PDF unit) and other
+allowed units are cm (centimeter), mm (millimeter), and in (inch).
+
+Any size argument of the format LxL in the options below specifies the width
+and height of a rectangle where the first L represents the width and the second
+L represents the height with an optional unit following each value as described
+above. Either width or height may be omitted. If the height is omitted, the
+separating x can be omitted as well. Omitting the width requires to prefix the
+height with the separating x. The missing dimension will be chosen so to not
+change the image aspect ratio. Instead of giving the width and height
+explicitly, you may also specify some (case-insensitive) common page sizes such
+as letter and A4. See the epilogue at the bottom for a complete list of the
+valid sizes.
+
+The --fit option scales to fit the image into a rectangle that is either
+derived from the --imgsize option or otherwise from the --pagesize option.
+If the --border option is given in addition to the --imgsize option while the
+--pagesize option is not given, then the page size will be calculated from the
+image size, respecting the border setting. If the --border option is given in
+addition to the --pagesize option while the --imgsize option is not given, then
+the image size will be calculated from the page size, respecting the border
+setting. If the --border option is given while both the --pagesize and
+--imgsize options are passed, then the --border option will be ignored.
+
+The --pagesize option or the --imgsize option with the --border option will
+determine the MediaBox size of the resulting PDF document.
+"""
+ % default_dpi,
+ )
+
+ sizeargs.add_argument(
+ "-S",
+ "--pagesize",
+ metavar="LxL",
+ type=parse_pagesize_rectarg,
+ help="""
+Sets the size of the PDF pages. The short-option is the upper case S because
+it is an mnemonic for being bigger than the image size.""",
+ )
+
+ sizeargs.add_argument(
+ "-s",
+ "--imgsize",
+ metavar="LxL",
+ type=parse_imgsize_rectarg,
+ help="""
+Sets the size of the images on the PDF pages. In addition, the unit dpi is
+allowed which will set the image size as a value of dots per inch. Instead of
+a unit, width and height values may also have a percentage sign appended,
+indicating a resize of the image by that percentage. The short-option is the
+lower case s because it is an mnemonic for being smaller than the page size.
+""",
+ )
+ sizeargs.add_argument(
+ "-b",
+ "--border",
+ metavar="L[:L]",
+ type=parse_borderarg,
+ help="""
+Specifies the minimal distance between the image border and the PDF page
+border. This value Is overwritten by explicit values set by --pagesize or
+--imgsize. The value will be used when calculating page dimensions from the
+image dimensions or the other way round. One, or two length values can be given
+as an argument, separated by a colon. One value specifies the minimal border on
+all four sides. Two values specify the minimal border on the top/bottom and
+left/right, respectively. It is not possible to specify asymmetric borders
+because images will always be centered on the page.
+""",
+ )
+ sizeargs.add_argument(
+ "-f",
+ "--fit",
+ metavar="FIT",
+ type=parse_fitarg,
+ default=FitMode.into,
+ help="""
+
+If --imgsize is given, fits the image using these dimensions. Otherwise, fit
+the image into the dimensions given by --pagesize. FIT is one of into, fill,
+exact, shrink and enlarge. The default value is "into". See the epilogue at the
+bottom for a description of the FIT options.
+
+""",
+ )
+ sizeargs.add_argument(
+ "-a",
+ "--auto-orient",
+ action="store_true",
+ help="""
+If both dimensions of the page are given via --pagesize, conditionally swaps
+these dimensions such that the page orientation is the same as the orientation
+of the input image. If the orientation of a page gets flipped, then so do the
+values set via the --border option.
+""",
+ )
+ sizeargs.add_argument(
+ "--crop-border",
+ metavar="L[:L]",
+ type=parse_borderarg,
+ help="""
+Specifies the border between the CropBox and the MediaBox. One, or two length
+values can be given as an argument, separated by a colon. One value specifies
+the border on all four sides. Two values specify the border on the top/bottom
+and left/right, respectively. It is not possible to specify asymmetric borders.
+""",
+ )
+ sizeargs.add_argument(
+ "--bleed-border",
+ metavar="L[:L]",
+ type=parse_borderarg,
+ help="""
+Specifies the border between the BleedBox and the MediaBox. One, or two length
+values can be given as an argument, separated by a colon. One value specifies
+the border on all four sides. Two values specify the border on the top/bottom
+and left/right, respectively. It is not possible to specify asymmetric borders.
+""",
+ )
+ sizeargs.add_argument(
+ "--trim-border",
+ metavar="L[:L]",
+ type=parse_borderarg,
+ help="""
+Specifies the border between the TrimBox and the MediaBox. One, or two length
+values can be given as an argument, separated by a colon. One value specifies
+the border on all four sides. Two values specify the border on the top/bottom
+and left/right, respectively. It is not possible to specify asymmetric borders.
+""",
+ )
+ sizeargs.add_argument(
+ "--art-border",
+ metavar="L[:L]",
+ type=parse_borderarg,
+ help="""
+Specifies the border between the ArtBox and the MediaBox. One, or two length
+values can be given as an argument, separated by a colon. One value specifies
+the border on all four sides. Two values specify the border on the top/bottom
+and left/right, respectively. It is not possible to specify asymmetric borders.
+""",
+ )
+
+ metaargs = parser.add_argument_group(
+ title="Arguments setting metadata",
+ description="Options handling embedded timestamps, title and author "
+ "information.",
+ )
+ metaargs.add_argument(
+ "--title", metavar="title", type=str, help="Sets the title metadata value"
+ )
+ metaargs.add_argument(
+ "--author", metavar="author", type=str, help="Sets the author metadata value"
+ )
+ metaargs.add_argument(
+ "--creator", metavar="creator", type=str, help="Sets the creator metadata value"
+ )
+ metaargs.add_argument(
+ "--producer",
+ metavar="producer",
+ type=str,
+ default="img2pdf " + __version__,
+ help="Sets the producer metadata value "
+ "(default is: img2pdf " + __version__ + ")",
+ )
+ metaargs.add_argument(
+ "--creationdate",
+ metavar="creationdate",
+ type=valid_date,
+ help="Sets the UTC creation date metadata value in YYYY-MM-DD or "
+ "YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format "
+ "understood by python dateutil module or any format understood "
+ "by `date --date`",
+ )
+ metaargs.add_argument(
+ "--moddate",
+ metavar="moddate",
+ type=valid_date,
+ help="Sets the UTC modification date metadata value in YYYY-MM-DD "
+ "or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format "
+ "understood by python dateutil module or any format understood "
+ "by `date --date`",
+ )
+ metaargs.add_argument(
+ "--subject", metavar="subject", type=str, help="Sets the subject metadata value"
+ )
+ metaargs.add_argument(
+ "--keywords",
+ metavar="kw",
+ type=str,
+ nargs="+",
+ help="Sets the keywords metadata value (can be given multiple times)",
+ )
+
+ viewerargs = parser.add_argument_group(
+ title="PDF viewer arguments",
+ description="PDF files can specify how they are meant to be "
+ "presented to the user by a PDF viewer",
+ )
+
+ viewerargs.add_argument(
+ "--viewer-panes",
+ metavar="PANES",
+ type=parse_panes,
+ help="Instruct the PDF viewer which side panes to show. Valid values "
+ 'are "outlines" and "thumbs". It is not possible to specify both '
+ "at the same time.",
+ )
+ viewerargs.add_argument(
+ "--viewer-initial-page",
+ metavar="NUM",
+ type=int,
+ help="Instead of showing the first page, instruct the PDF viewer to "
+ "show the given page instead. Page numbers start with 1.",
+ )
+ viewerargs.add_argument(
+ "--viewer-magnification",
+ metavar="MAG",
+ type=parse_magnification,
+ help="Instruct the PDF viewer to open the PDF with a certain zoom "
+ "level. Valid values are either a floating point number giving "
+ 'the exact zoom level, "fit" (zoom to fit whole page), "fith" '
+ '(zoom to fit page width) and "fitbh" (zoom to fit visible page '
+ "width).",
+ )
+ viewerargs.add_argument(
+ "--viewer-page-layout",
+ metavar="LAYOUT",
+ type=parse_layout,
+ help="Instruct the PDF viewer how to arrange the pages on the screen. "
+ 'Valid values are "single" (display single pages), "onecolumn" '
+ '(one continuous column), "twocolumnright" (two continuous '
+ 'columns with odd number pages on the right) and "twocolumnleft" '
+ "(two continuous columns with odd numbered pages on the left)",
+ )
+ viewerargs.add_argument(
+ "--viewer-fit-window",
+ action="store_true",
+ help="Instruct the PDF viewer to resize the window to fit the page size",
+ )
+ viewerargs.add_argument(
+ "--viewer-center-window",
+ action="store_true",
+ help="Instruct the PDF viewer to center the PDF viewer window",
+ )
+ viewerargs.add_argument(
+ "--viewer-fullscreen",
+ action="store_true",
+ help="Instruct the PDF viewer to open the PDF in fullscreen mode",
+ )
+
+ args = parser.parse_args(argv[1:])
+
+ if args.verbose:
+ logging.basicConfig(level=logging.DEBUG)
+
+ if args.pillow_limit_break:
+ Image.MAX_IMAGE_PIXELS = None
+
+ if args.gui:
+ gui()
+ sys.exit(0)
+
+ layout_fun = get_layout_fun(
+ args.pagesize, args.imgsize, args.border, args.fit, args.auto_orient
+ )
+
+ # if no positional arguments were supplied, read a single image from
+ # standard input
+ if len(args.images) == 0:
+ logging.info("reading image from standard input")
+ try:
+ args.images = [sys.stdin.buffer.read()]
+ except KeyboardInterrupt:
+ exit(0)
+
+ # with the number of pages being equal to the number of images, the
+ # value passed to --viewer-initial-page must be between 1 and that number
+ if args.viewer_initial_page is not None:
+ if args.viewer_initial_page < 1:
+ parser.print_usage(file=sys.stderr)
+ logging.error(
+ "%s: error: argument --viewer-initial-page: must be "
+ "greater than zero" % parser.prog
+ )
+ exit(2)
+ if args.viewer_initial_page > len(args.images):
+ parser.print_usage(file=sys.stderr)
+ logging.error(
+ "%s: error: argument --viewer-initial-page: must be "
+ "less than or equal to the total number of pages" % parser.prog
+ )
+ exit(2)
+
+ try:
+ convert(
+ *chain.from_iterable(args.images),
+ engine=args.engine,
+ title=args.title,
+ author=args.author,
+ creator=args.creator,
+ producer=args.producer,
+ creationdate=args.creationdate,
+ moddate=args.moddate,
+ subject=args.subject,
+ keywords=args.keywords,
+ colorspace=args.colorspace,
+ nodate=args.nodate,
+ layout_fun=layout_fun,
+ viewer_panes=args.viewer_panes,
+ viewer_initial_page=args.viewer_initial_page,
+ viewer_magnification=args.viewer_magnification,
+ viewer_page_layout=args.viewer_page_layout,
+ viewer_fit_window=args.viewer_fit_window,
+ viewer_center_window=args.viewer_center_window,
+ viewer_fullscreen=args.viewer_fullscreen,
+ outputstream=args.output,
+ first_frame_only=args.first_frame_only,
+ cropborder=args.crop_border,
+ bleedborder=args.bleed_border,
+ trimborder=args.trim_border,
+ artborder=args.art_border,
+ pdfa=args.pdfa,
+ )
+ except Exception as e:
+ logging.error("error: " + str(e))
+ if logging.getLogger().isEnabledFor(logging.DEBUG):
+ import traceback
+
+ traceback.print_exc(file=sys.stderr)
+ exit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/img2pdf/jp2.py b/img2pdf/jp2.py
new file mode 100644
index 00000000..1f99a5c3
--- /dev/null
+++ b/img2pdf/jp2.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2013 Johannes 'josch' Schauer <j.schauer at email.de>
+#
+# this module is heavily based upon jpylyzer which is
+# KB / National Library of the Netherlands, Open Planets Foundation
+# and released under the same license conditions
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import struct
+
+
+def getBox(data, byteStart, noBytes):
+ boxLengthValue = struct.unpack(">I", data[byteStart : byteStart + 4])[0]
+ boxType = data[byteStart + 4 : byteStart + 8]
+ contentsStartOffset = 8
+ if boxLengthValue == 1:
+ boxLengthValue = struct.unpack(">Q", data[byteStart + 8 : byteStart + 16])[0]
+ contentsStartOffset = 16
+ if boxLengthValue == 0:
+ boxLengthValue = noBytes - byteStart
+ byteEnd = byteStart + boxLengthValue
+ boxContents = data[byteStart + contentsStartOffset : byteEnd]
+ return (boxLengthValue, boxType, byteEnd, boxContents)
+
+
+def parse_ihdr(data):
+ height = struct.unpack(">I", data[0:4])[0]
+ width = struct.unpack(">I", data[4:8])[0]
+ return width, height
+
+
+def parse_colr(data):
+ meth = struct.unpack(">B", data[0:1])[0]
+ if meth != 1:
+ raise Exception("only enumerated color method supported")
+ enumCS = struct.unpack(">I", data[3:])[0]
+ if enumCS == 16:
+ return "RGB"
+ elif enumCS == 17:
+ return "L"
+ else:
+ raise Exception(
+ "only sRGB and greyscale color space is supported, " "got %d" % enumCS
+ )
+
+
+def parse_resc(data):
+ hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
+ hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
+ vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
+ return hdpi, vdpi
+
+
+def parse_res(data):
+ hdpi, vdpi = None, None
+ noBytes = len(data)
+ byteStart = 0
+ boxLengthValue = 1 # dummy value for while loop condition
+ while byteStart < noBytes and boxLengthValue != 0:
+ boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
+ if boxType == b"resc":
+ hdpi, vdpi = parse_resc(boxContents)
+ break
+ return hdpi, vdpi
+
+
+def parse_jp2h(data):
+ width, height, colorspace, hdpi, vdpi = None, None, None, None, None
+ noBytes = len(data)
+ byteStart = 0
+ boxLengthValue = 1 # dummy value for while loop condition
+ while byteStart < noBytes and boxLengthValue != 0:
+ boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
+ if boxType == b"ihdr":
+ width, height = parse_ihdr(boxContents)
+ elif boxType == b"colr":
+ colorspace = parse_colr(boxContents)
+ elif boxType == b"res ":
+ hdpi, vdpi = parse_res(boxContents)
+ byteStart = byteEnd
+ return (width, height, colorspace, hdpi, vdpi)
+
+
+def parsejp2(data):
+ noBytes = len(data)
+ byteStart = 0
+ boxLengthValue = 1 # dummy value for while loop condition
+ width, height, colorspace, hdpi, vdpi = None, None, None, None, None
+ while byteStart < noBytes and boxLengthValue != 0:
+ boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
+ if boxType == b"jp2h":
+ width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
+ break
+ byteStart = byteEnd
+ if not width:
+ raise Exception("no width in jp2 header")
+ if not height:
+ raise Exception("no height in jp2 header")
+ if not colorspace:
+ raise Exception("no colorspace in jp2 header")
+ # retrieving the dpi is optional so we do not error out if not present
+ return (width, height, colorspace, hdpi, vdpi)
+
+
+if __name__ == "__main__":
+ import sys
+
+ width, height, colorspace = parsejp2(open(sys.argv[1]).read())
+ sys.stdout.write("width = %d" % width)
+ sys.stdout.write("height = %d" % height)
+ sys.stdout.write("colorspace = %s" % colorspace)
diff --git a/mo/submit.py b/mo/submit.py
index ac46677b..ec8a998f 100644
--- a/mo/submit.py
+++ b/mo/submit.py
@@ -5,6 +5,7 @@ import pikepdf
from typing import Any
import werkzeug.utils
+from img2pdf import img2pdf
import mo.db as db
import mo.util
from mo.util import logger
--
GitLab