# ##### BEGIN GPL LICENSE BLOCK #####
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software Foundation,
#  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# ##### END GPL LICENSE BLOCK #####

# <pep8 compliant>

"""Reading various vector file formats.

Functions for classifying files, tokenizing, and parsing them.
The ultimate goal is to parse a file into an instance of the class Art,
which has the line segments, bezier segments, arc segments,
and faces specified in a vector file.
"""

__author__ = "howard.trickey@gmail.com"

import re
from . import geom
from . import pdf
from . import svg

WARN = True   # print Warnings about strange things?

# Token types

TNAME = 0
TLITNAME = 1
TSTRING = 2
TNUM = 3


def ClassifyFile(filename):
    """Classify file into one of known vector types.

    Args:
      filename: string, the name of the file to classify
    Returns:
      (string, string), giving maintype and version.
      If there's an error, returns ("error", reason-string)
    """

    if filename.endswith(".svg"):
        return ("svg", "")
    try:
        f = open(filename, "rb")
        start = f.read(25)
    except IOError:
        return ("error", "file open error")

    # Encapsulated Postscript files start like
    #   %!PS-Adobe-X.X EPSF-Y.Y
    # where the first number is the version of PostScript Document Structuring
    # Convention, and the second number is the level of EPSF.
    # Adobe Illustrator files, version 8 and earlier, have
    #   %%+ procset Adobe_Illustrator...
    # sometime before %%EndProlog
    if start.startswith(b"%!PS-Adobe-"):
        ans = ("ps", "")
        if start[14:20] == b" EPSF-":
            ans = ("eps", start[20:23].decode())
        if start[14:19] == b" PDF-":
            ans = ("pdf", start[19:22].decode())
        if ans[0] != "pdf" and _FindAdobeIllustrator(f):
            ans = ("ai", "eps")
    # PDF files start with %PDF
    # Adobe Illustrator files, version 9 and later, have
    #   %%+ procset Adobe_Illustrator...
    # sometime before %%EndProlog
    elif start.startswith(b"%PDF"):
        ans = ("pdf", start[5:8].decode())
        if _FindAdobeIllustrator(f):
            ans = ("ai", "pdf")
    else:
        ans = ("error", "unknown file type")
    f.close()
    return ans


def _FindAdobeIllustrator(f):
    """Does a file contain "Adobe_Illustrator"?

    Args:
      f: an open File
    Returns:
      bool: True if reading forward in f, we find "Adobe_Illustrator"
    """

    while True:
        s = f.readline()
        if not s or s.startswith(b"%%EndProlog"):
            break
        if s.find(b"Adobe_Illustrator") >= 0:
            return True
    return False


def ParseVecFile(filename):
    """Parse a vector art file and return an Art object for it.

    Right now, handled file types  are: EPS, Adobe Illustrator, PDF

    Args:
      filename: string - name of the file to read and parse
    Returns:
      geom.Art: object containing paths drawn in the file.
           Return None if there was a major problem reading the file.
    """

    (major, minor) = ClassifyFile(filename)
    if (major == "error"):
        print("Couldn't get Art:", minor)
        return None
    if major == "pdf" or (major == "ai" and minor == "pdf"):
        contents = pdf.ReadPDFPageOneContents(filename)
        if contents:
            toks = TokenizeAIEPS(contents)
            return ParsePS(toks, major, minor)
        else:
            return None
    elif major == "eps" or (major == "ai" and minor == "eps"):
        toks = TokenizeAIEPSFile(filename)
        return ParsePS(toks, major, minor)
    elif major == "svg":
        return svg.ParseSVGFile(filename)
    else:
        return None


def ParseAIEPSFile(filename):
    """Parse an AI (eps kind) file and return an Art object for it.

    Args:
      filename: string - name of the file to read and parse
    Returns:
      geom.Art - object containing paths and faces drawn in the file
    """

    toks = TokenizeAIEPSFile(filename)
    return ParsePS(toks, "ai", "eps")


def TokenizeAIEPSFile(filename):
    """Tokenize the after-setup part of an AI (eps kind) file.

    Runs TokenizeAIEPS (see below) on the contents of the file.

    Args:
      filename: name of the file to tokenize
    Returns:
      list of (tokenid, value) tuples
    """

    try:
        f = open(filename, "rU")  # 'U'-> all newline reps converted to '\n'
    except IOError:
        if WARN:
            print("Can't open file", filename)
        return []
    contents = f.read()
    f.close()
    return TokenizeAIEPS(contents)

# Regular expressions for PostScript tokens
_re_psname = re.compile(r"[^ \t\r\n()<>[\]{}/%]+")
_re_psfloat = re.compile(r"(\+|-)?(([0-9]+\.[0-9]*)|(\.[0-9]+))")
_re_psint = re.compile(r"(\+|-)?[0-9]+")
_re_psstring = re.compile(r"\((\\.|.)*?\)")
_re_pshexstring = re.compile(r"<.*>")


def TokenizeAIEPS(s):
    """Tokenize the after-setup part of the an AI (eps kind) string.

    Args:
      s: string to tokenize
    Returns:
      list of (Txxx, val) where Txxx is a token type constant
    """

    i = s.find("%%EndSetup")
    if i == -1:
        i = 0
    else:
        i += 10
    ans = []
    while i < len(s):
        c = s[i]
        if c.isspace():
            i += 1
        elif c == "%":
            i = s.find("\n", i)
            if i < 0:
                i = len(s)
                break
            i += 1
        elif c == "/":
            m = _re_psname.match(s, i + 1)
            if m:
                ans.append((TLITNAME, m.group()))
                i = m.end()
            else:
                if WARN:
                    print("empty name at", i)
                i += 1
        elif c == "(":
            m = _re_psstring.match(s, i)
            if m:
                ans.append((TSTRING, s[m.start() + 1:m.end() - 1]))
                i = m.end()
            else:
                if WARN:
                    print("unterminated string at", i)
                i = len(s)
        elif c == "<":
            m = _re_pshexstring.match(s, i)
            if m:
                ans.append((TSTRING, s[m.start() + 1:m.end() - 1]))
                i = m.end()
            else:
                if WARN:
                    print("unterminated hex string at", i)
                i = len(s)  # unterminated hex string
        elif c == "[" or c == "]" or c == "{" or c == "}":
            ans.append((TNAME, c))
            i += 1
        elif c == "-" or c.isdigit():
            m = _re_psfloat.match(s, i)
            if m:
                v = float(m.group())
                ans.append((TNUM, v))
                i = m.end()
            else:
                m = _re_psint.match(s, i)
                if m:
                    v = int(m.group())
                    ans.append((TNUM, v))
                    i = m.end()
                else:
                    if WARN:
                        print("number parse problem at", i)
                    i += 1
        else:
            m = _re_psname.match(s, i)
            if m:
                ans.append((TNAME, m.group()))
                i = m.end()
            else:
                if WARN:
                    print("tokenize error at", i, s[i:i + 10], "...")
                i += 1
    return ans


class GState(object):
    """Object to hold graphic state.

    Attributes:
      ctm: geom.TransformMatrix - current transform matrix
      fillpaint: geom.Paint
      strokepaint: geom.Paint
    """

    def __init__(self):
        self.ctm = geom.TransformMatrix()
        self.fillpaint = geom.black_paint
        self.strokepaint = geom.black_paint

    def Copy(self):
        """Return a copy of this graphics state."""

        gs = GState()
        gs.ctm = self.ctm.Copy()
        gs.fillpaint = self.fillpaint  # ok to share, paint is immutable
        gs.strokepaint = self.strokepaint
        return gs


class _PathState(object):
    """Object to hold state while parsing Adobe paths.

    Attributes:
      art: geom.Art, used  to accumulate answer
      curpath: geom.Path
      cursubpath: geom.Subpath - not yet added into curpath
      curpoint: coordinates of current point, None if none
      incompound: true if parsing an ai/eps compound path
      gstate: GState - the current graphics state
      gstack: list of GState - stack when graphics state pushed
      messages: list of string - warnings, errors
    """

    def __init__(self):
        """Construct the _PathState object."""

        self.art = geom.Art()
        self.ResetPath()
        self.incompound = False
        self.gstate = GState()
        self.statestack = []
        self.messages = []

    def CloseSubpath(self):
        """Close the current subpath.

        Close the current subpath by appending a straight line segment from
        current point to starting point of the subpath, terminating current
        subpath.
        Does nothing if current subpath is already closed or is empty.
        """

        if not self.cursubpath.Empty():
            startp = geom.Subpath.SegStart(self.cursubpath.segments[0])
            if startp != self.curpoint:
                self.cursubpath.AddSegment(("L", self.curpoint, startp))
                self.curpoint = startp
            self.curpath.AddSubpath(self.cursubpath)
            self.cursubpath = geom.Subpath()

    def ResetPath(self):
        """Reset the current path state to empty,
        discarding any current path."""

        self.curpath = geom.Path()
        self.cursubpath = geom.Subpath()
        self.curpoint = None
        self.incompound = False

    def StartCompound(self):
        """Mark entry to an ai/eps compound path."""

        self.incompound = True

    def EndCompound(self):
        """Finish off an ai/eps compound path."""

        if not self.curpath.Empty():
            self.art.paths.append(self.curpath)
        self.ResetPath()

    def DrawPath(self, dofill, dostroke, fillevenodd=False):
        """End the current path and add its subpaths to art.

        Assume any finally closing of the current subpath, if needed,
        was done separately.
        If we are in an ai/eps compound path, don't close off the
        current path yet - wait until EndCompound - but record
        the fill/stroke parameters for later use.

        Arguments:
          dofill: if true, the path is to be filled
          dostroke: if true, the path is to be stroked
          fillevenodd: it true, use even-odd fill rule,
              else nonzero winding number rule
        """

        if not self.cursubpath.Empty():
            self.curpath.AddSubpath(self.cursubpath)
            self.cursubpath = geom.Subpath()
        p = self.curpath
        if not p.Empty():
            p.filled = dofill
            p.fillevenodd = fillevenodd
            p.stroked = dostroke
            if dofill:
                p.fillpaint = self.gstate.fillpaint
            if dostroke:
                p.strokepaint = self.gstate.strokepaint
            if not self.incompound:
                self.art.paths.append(p)
                self.ResetPath()
        elif not self.incompound:
            self.ResetPath()

    def MoveTo(self, x, y, relative=False):
        """Begin a new subpath, starting at (x,y).

        If the previous path construction was also a MoveTo,
        its effect is overridden.
        If relative is True, the move should be relative
        to the previous point, else it is absolute.

        Args:
          x: float
          y: float - the 2d coord to start at
          relative: bool - if true, then a relative move, else absolute
        """

        (xp, yp) = self.gstate.ctm.Apply((x, y))
        if relative and self.curpoint:
            xp += self.curpoint[0]
            yp += self.curpoint[1]
        p = (xp, yp)
        if not self.cursubpath.Empty():
            self.curpath.AddSubpath(self.cursubpath)
        self.cursubpath = geom.Subpath()
        self.curpoint = p

    def LineTo(self, x, y, relative=False):
        """Append a straight line segment from current point to (x,y).

        Does nothing if there is no current point, or the segment
        would have no length.
        If relative is True, the endpoint of the line is relative to the start.

        Args:
          x: float
          y: float - the 2d coord to make the line to.
          relative: bool - if true, then a relative lineto
        """

        if self.curpoint == -1:
            return
        (xp, yp) = self.gstate.ctm.Apply((x, y))
        if relative and self.curpoint:
            xp += self.curpoint[0]
            yp += self.curpoint[1]
        p = (xp, yp)
        if p != self.curpoint:
            self.cursubpath.AddSegment(("L", self.curpoint, p))
            self.curpoint = p

    def Bezier3To(self, x, y, cp1x, cp1y, cp2x, cp2y,
                  use_start_as_cp=False, relative=False):
        """Append a cubic bezier curve from current point to (x,y).

        Args:
          x: float
          y: float - the 2d coord that ends the curve
          cp1x: float
          cp1y: float - first bezier control point
          cp2x: float
          cp2y: float - second bezier control point
          use_start_as_cp: bool - if True, ignore cp1x,cp2y and use current
              point as first control point instead
          relative: bool - if True, all coords are relative to previous point
        """

        if self.curpoint == -1:
            return
        (rx, ry) = (0, 0)
        if relative and self.curpoint:
            (rx, ry) = self.curpoint
        if use_start_as_cp:
            cp1 = self.curpoint
        else:
            cp1 = self.gstate.ctm.Apply((cp1x + rx, cp1y + ry))
        cp2 = self.gstate.ctm.Apply((cp2x + rx, cp2y + ry))
        p = self.gstate.ctm.Apply((x + rx, y + ry))
        self.cursubpath.AddSegment(("B", self.curpoint, p, cp1, cp2))
        self.curpoint = p

    def PushGState(self):
        """Push the graphics state, leaving a copy in gstate."""

        newgstate = self.gstate.Copy()
        self.statestack.append(self.gstate)
        self.gstate = newgstate

    def PopGState(self):
        """Pop the graphics state (no-op if stack is empty)."""

        if self.statestack:
            self.gstate = self.statestack.pop()


def ParsePS(toks, major="pdf", minor=""):
    """Parse a Postscript-like token list into an Art object.

    Four kinds of files use approximately the same painting
    model and operators:
      Encapsulated Postscript (EPS) - Postscript with Document
        Structuring Convention Comments: in general, these
        can have Postscript procedures and are not handled
        by the code here, but many programs producing eps
        just use the path creating/painting operators or
        abbreviations for them.
      Adobe Illustrator, version <=8: Uses EPS but with
        paths are all just single subpaths unless enclosed
        in compound path brackets (*u ... *U)
      Adobe Illustrator, version >=9: PDF for page description
      PDF: similar to Postscript, but some different operators

     We can parse each into an Art structure using approximately
     the same code.

    Args:
      toks: list of (Txxx, val), result of Tokenizing a file
      major: string - major version ("ps", "eps", "pdf", or "ai")
      minor: string - minor version (version number for ps, eps, pdf,
                      and "eps" or "pdf" for "ai")
    Returns:
      geom.Art: object with the paths painted by the token stream
    """

    pstate = _PathState()
    i = 0
    while i < len(toks):
        (t, v) = toks[i]
        i += 1
        if t == TNAME:
            # zero-operand operator or unhandled one
            # since all handled multi-operand operators
            # are handled below
            if v == "h" or v == "H" or v == "closepath":
                pstate.CloseSubpath()
            elif v == "f" or v == "F" or v == "fill":
                # fill path using nonzero winding number rule
                pstate.DrawPath(True, False, False)
            elif v == "f*" or v == "eofill":
                # fill path using even-odd rule
                pstate.DrawPath(True, False, True)
            elif v == "s":
                # close and stroke path
                pstate.CloseSubpath()
                pstate.DrawPath(False, True)
            elif v == "S" or v == "stroke":
                # stroke path
                pstate.DrawPath(False, True)
            elif v == "b":
                # close, fill and stroke path using nonzero winding rule
                pstate.CloseSubpath()
                pstate.DrawPath(True, True, False)
            elif v == "B":
                # fill and stroke path uwing nonzero winding rule
                pstate.DrawPath(True, True, False)
            elif v == "b*":
                # close, fill and stroke path using even-odd rule
                pstate.CloseSubpath()
                pstate.DrawPath(True, True, True)
            elif v == "B*":
                # fill and stroke path using even-odd rule
                pstate.DrawPath(True, True, True)
            elif v == "n" or v == "N" or v == "newpath":
                # finish path no-op, probably after clipping
                # (which is not handled yet)
                pstate.ResetPath()
            elif v == "*u" and major == "ai" and minor == "eps":
                # beginning of AI compound path
                pstate.StartCompound()
            elif v == "*U" and major == "ai" and minor == "eps":
                # end of AI compound path
                pstate.EndCompound()
            elif v == "q" or v == "gsave":
                pstate.PushGState()
            elif v == "Q" or v == "grestore":
                pstate.PopGState()
        elif t == TNUM:
            # see if have nargs numbers followed by an op name
            op = ""
            args = [float(v)]
            iend = min(i + 6, len(toks))
            while i < iend:
                t = toks[i][0]
                if t == TNUM:
                    args.append(float(toks[i][1]))
                    i += 1
                elif t == TNAME:
                    op = toks[i][1]
                    i += 1
                    break
                else:
                    break
            if op and len(args) <= 6:
                if len(args) == 1:
                    if op == "g":
                        # gray level for non-stroking operations
                        pstate.gstate.fillpaint = geom.Paint(args[0],
                            args[0], args[0])
                    elif op == "G":
                        pstate.gstate.strokepaint = geom.Paint(args[0],
                            args[0], args[0])
                if len(args) == 2:
                    if op == "m" or op == "moveto":
                        pstate.MoveTo(args[0], args[1], False)
                    elif op == "rmoveto":
                        pstate.MoveTo(args[0], args[1], True)
                    elif op == "l" or op == "L" or op == "lineto":
                        pstate.LineTo(args[0], args[1], False)
                    elif op == "rlineto":
                        pstate.LineTo(args[0], args[1], True)
                    elif op == "scale":
                        pstate.gstate.ctm.ComposeTransform(args[0], 0.0,
                            0.0, args[1], 0.0, 0.0)
                    elif op == "translate":
                        pstate.gstate.ctm.ComposeTransform(0.0, 0.0,
                            0.0, 0.0, args[0], args[1])
                if len(args) == 3:
                    if op == "rg" or op == "scn":
                        # rgb for non-stroking operations
                        # For scn should really refer to Color space from
                        # cs operator, which in turn may need to look in
                        # Resource Dictionary in pdf,
                        # so for now punt and assume rgb if three operands
                        pstate.gstate.fillpaint = geom.Paint(args[0],
                            args[1], args[2])
                    elif op == "RG" or op == "SCN":
                        pstate.gstate.strokepaint = geom.Paint(args[0],
                            args[1], args[2])
                elif len(args) == 4:
                    if op == "v" or op == "V":
                        # cubic bezier but use start as first cp
                        pstate.Bezier3To(args[2], args[3], 0.0, 0.0,
                                         args[0], args[1],
                                         use_start_as_cp=True)
                    elif op == "y" or op == "Y":
                        # cubic bezier but use last as second cp
                        pstate.Bezier3To(args[2], args[3], args[0], args[1],
                                         args[2], args[3])
                    elif op == "re" or op == "rectfill" or op == "rectstroke":
                        # rectangle with x, y, width, height as args
                        # drawn as complete subpath  (a PDF operator)
                        x = args[0]
                        y = args[1]
                        w = args[2]
                        h = args[3]
                        pstate.MoveTo(x, y)
                        pstate.LineTo(x + w, y)
                        pstate.LineTo(x + w, y + h)
                        pstate.LineTo(x, h + y)
                        pstate.CloseSubpath()
                        if op == "rectfill":
                            pstate.DrawPath(True, False)
                        elif op == "rectstroke":
                            pstate.DrawPath(False, True)
                    elif op == "k" or op == "scn":
                        # cmyk for non-stroking operations
                        # For scn should really refer to Color space from
                        # cs operator, which in turn may need to look in
                        # Resource Dictionary in pdf,
                        # so for now punt and assume cmyk if four operands
                        pstate.gstate.fillpaint = geom.Paint.CMYK(args[0],
                            args[1], args[2], args[3])
                    elif op == "K" or op == "SCN":
                        pstate.gstate.strokepaint = geom.Paint.CMYK(args[0],
                            args[1], args[2], args[3])
                elif len(args) == 6:
                    if op == "c" or op == "C" or op == "curveto":
                        # corner and non-corner cubic beziers
                        pstate.Bezier3To(args[4], args[5], args[0], args[1],
                                         args[2], args[3], False, False)
                    elif op == "rcurveto":
                        pstate.Bezier3To(args[4], args[5], args[0], args[1],
                                         args[2], args[3], False, True)
                    elif op == "cm" or op == "concat":
                        pstate.gstate.ctm.ComposeTransform(args[0], args[1],
                            args[2], args[3], args[4], args[5])
    return pstate.art


# Notes on Adobe Illustrator post version 8:
# Outside format is PDF.
# A Page object may have a PieceInfo with Illustrator attribute
# pointing to an object with a Private attribute that points to
# an object with AIMetaData and AIPrivateData[123456]
# AIMetaData points to the prolog of an old-style AI file
# AIPrivate1 points to a thumbnail image
# AIPrivate2-6 point to compressed stream objects - need more investigation.
# But AI version12 does different stuff: has AIPrivateData1-6 etc.
# It appears that AIPrivate6 obj has the %EndSetup and then old-style AI file
# So: hacky way that will sometimes work:
# 1) find "/AIPrivateData6 Z 0 R"  for some Z
# 2) find "Z 0 obj"
# 3) find following stream, and then endstream
# 4) flatedecode the stream if necessary
# 5) look for "%%EndSetup, if found: tokenize and parse like old AI files