#!/bin/bash
# 
# xmlto - apply an XSL stylesheet to an XML document
# Copyright (C) 2001  Tim Waugh <twaugh@redhat.com>

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# Utilities that we need that aren't everywhere
FIND=find     # This must be GNU find (need -maxdepth)
MKTEMP=mktemp # See http://www.mktemp.org if missing on your system
BASH=bash     # GNU bash, for running the format scripts
GETOPT=getopt # a getopt that supports --longoptions

version () {
  echo "xmlto version 0.0.8"
}

usage () {
  cat << EOF
usage: xmlto FORMAT [OPTION]... XML
OPTIONs are:
  -v              verbose output (-vv for very verbose)
  -x stylesheet   use the specified stylesheet instead of choosing one
  -m fragment     use the XSL fragment to customize the stylesheet
  -o directory    put output in the specified directory instead of
                  the current working directory
  --extensions    turn on stylesheet extensions for this tool chain

Available FORMATs depend on the type of the XML file (which is
determined automatically).
EOF
  if [ -d "$FORMAT_DIR" ]
  then
    for source in $(${FIND} "$FORMAT_DIR" -type d -maxdepth 1)
    do
      if [ "$source" = "$FORMAT_DIR" ]; then continue; fi

      cat << EOF

For documents of type "$(basename "$source")":
EOF
    ls "$source"
    done
  fi
}

# Allow FORMAT_DIR and XSL_DIR to be over-ridden, so that we can be
# run from the build directory.
prefix=/usr
: ${FORMAT_DIR=/usr/share/xmlto/format}
: ${XSL_DIR=/usr/share/xmlto/xsl}
# This can be over-ridden, but really we should detect the source
# document type without needing any help.
: ${SOURCE_FORMAT=docbook}

# Get absolute pathnames for FORMAT_DIR, XSL_DIR, and OUTPUT_DIR.
WD="$(pwd)"
if [ "x${FORMAT_DIR##/*}" != "x" ]
then
	FORMAT_DIR="${PWD}/${FORMAT_DIR}"
fi
if [ "x${XSL_DIR##/*}" != "x" ]
then
	XSL_DIR="${PWD}/${XSL_DIR}"
fi
OUTPUT_DIR="$WD"

# This is an array of XSL fragments specified by the user.
declare -a XSL_MODS
XSL_MOD_COUNT=0

# List of files to remove after exit
declare -a CLEANFILES
CLEANFILE_COUNT=0
trap -- 'cd /; [ -z "${CLEANFILES[*]}" ] || rm -rf "${CLEANFILES[@]}"' EXIT

XSLTOPTS=

# Magic paper size, based on LC_PAPER
if [ -x /usr/bin/locale ]
then
  # For paper sizes we know about, specify them.
  h=$(locale LC_PAPER 2>/dev/null | head -1)
  if [ "$h" = "297" ]
  then
    papertype=A4
  fi

  if [ -n "$papertype" ]
  then
    papersizemod=$(${MKTEMP} ${TMPDIR:-/tmp}/xmlto-xsl.XXXXXX)
    CLEANFILES[$CLEANFILE_COUNT]="$papersizemod"
    CLEANFILE_COUNT=$(($CLEANFILE_COUNT + 1))
    cat << EOF > "$papersizemod"
<?xml version='1.0'?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
		version='1.0'>
<xsl:param name="paper.type" select="'$papertype'"/>
</xsl:stylesheet>
EOF
    XSL_MODS[$XSL_MOD_COUNT]="$papersizemod"
    XSL_MOD_COUNT=$(($XSL_MOD_COUNT + 1))
  fi
fi

# Disable network entities
XSLTOPTS="$XSLTOPTS --nonet"

# Process any options
ARGS=$(${GETOPT} --longoptions=help,version,extensions -n xmlto -- x:m:o:v "$@")
[ $? != 0 ] && { usage; exit 1; }
eval set -- "$ARGS"
while [ "$#" -gt "0" ]; do
  case "$1" in
  --help)
	usage
	exit 0
	;;
  --version)
	version
	exit 0
	;;
  -x)
	case "$2" in
	/*) STYLESHEET="$2" ;;
	 *) STYLESHEET="$PWD/$2" ;;
	esac
	shift 2
	;;
  -m)
	case "$2" in
	/* | *:/*) XSL_MODS[$XSL_MOD_COUNT]="$2" ;;
	        *) XSL_MODS[$XSL_MOD_COUNT]="$PWD/$2" ;;
	esac
	XSL_MOD_COUNT=$(($XSL_MOD_COUNT + 1))
	shift 2
	;;
  -o)
	case "$2" in
	/*) OUTPUT_DIR="$2" ;;
	 *) OUTPUT_DIR="$WD/$2" ;;
	esac
	shift 2
	;;
  --extensions)
	# Turn on extensions for whatever tools we are using.
	# At the moment, it must be for PassiveTeX, since
	# that's all we know about.
	XSLTOPTS="$XSLTOPTS --param use.extensions '1'"
	XSLTOPTS="$XSLTOPTS --param passivetex.extensions '1'"
	shift
	;;
  -v)
	: ${VERBOSE:-0}
	VERBOSE=$((${VERBOSE}+1))
	shift
	;;
  --)
	shift
	break
	;;
  esac
done

if [ "$#" != "2" ]
then
  usage
  exit 1
fi

DEST_FORMAT="$1"
case "$2" in
/*) INPUT_FILE="$2" ;;
 *) INPUT_FILE="$PWD/$2" ;;
esac

if [ -z "$DEST_FORMAT" -o -z "$INPUT_FILE" ]
then
  usage
  exit 1
fi

# Decide what source format this is.  Default to DocBook.
{ while read line
  do
    while [ -n "${line}" -a -z "${line##*>*}" ] # while tags in line
    do
      if [ -z "${line%%<\?*}" ]
      then
	# This is a processing instruction
	line="${line#*>}"
	continue
      fi

      if [ -z "${line%%<!*}" ]
      then
	# Comment or DOCTYPE.  We ignore DOCTYPE for now.
	line="${line#*>}"
	continue
      fi

      # Root element
      rootel="${line#*<}"
      rootel="${rootel%% *}"
      break
    done
    if [ -n "${rootel}" ]
    then
      break;
    fi
  done
} < "$INPUT_FILE"
case "$rootel" in
fo:root)
	SOURCE_FORMAT="fo"
	;;
esac
[ ! -e "$INPUT_FILE" ] && exit 1

# If the destination format is an absolute pathname then it's a
# user-defined format script.  Otherwise it's one of ours.
case "$DEST_FORMAT" in
/*) FORMAT="$DEST_FORMAT" ;;
 *) FORMAT="${FORMAT_DIR}/${SOURCE_FORMAT}/${DEST_FORMAT}" ;;
esac

[ -n "$VERBOSE" ] && echo >&2 "Format script: ${FORMAT}"

if [ ! -e "$FORMAT" ]
then
  echo "I don't know how to convert ${SOURCE_FORMAT} into ${DEST_FORMAT}."
  exit 1
fi

# Ask the format script what stylesheet to use.
XSLT_PROCESSOR=xsltproc # We only know about xsltproc right now.
export XSLT_PROCESSOR
export XSL_DIR
if [ -z "$STYLESHEET" ]
then
  STYLESHEET="$(${BASH} "$FORMAT" stylesheet)" || exit 1
fi

# We might need to create a temporary stylesheet if there are
# XSL fragments that need adding.
if [ "$XSL_MOD_COUNT" -gt "0" -a -n "$STYLESHEET" ]
then
  REAL_STYLESHEET="$STYLESHEET"
  [ -n "$VERBOSE" ] && echo >&2 "Real stylesheet: ${REAL_STYLESHEET}"
  STYLESHEET="$(${MKTEMP} ${TMPDIR:-/tmp}/xmlto-xsl.XXXXXX)" || exit 1
  CLEANFILES[$CLEANFILE_COUNT]="$STYLESHEET"
  CLEANFILE_COUNT=$(($CLEANFILE_COUNT + 1))
  cat << EOF > "$STYLESHEET"
<?xml version='1.0'?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
		version='1.0'>
<xsl:import href="${REAL_STYLESHEET}"/>
EOF

  i=0
  while [ "$i" -lt "$XSL_MOD_COUNT" ]
  do
    cat << EOF >> "$STYLESHEET"
<xsl:include href="${XSL_MODS[$i]}"/>
EOF
    i=$(($i + 1))
  done

  cat << EOF >> "$STYLESHEET"
</xsl:stylesheet>
EOF
fi

XSLT_PROCESSED_DIR="$(${MKTEMP} -d ${TMPDIR:-/tmp}/xmlto.XXXXXX)" || exit 1
CLEANFILES[$CLEANFILE_COUNT]="$XSLT_PROCESSED_DIR"
CLEANFILE_COUNT=$(($CLEANFILE_COUNT + 1))
cd "$XSLT_PROCESSED_DIR"

if [ -z "${STYLESHEET}" ]
then
  # No stylesheet: no XSL-T processing to do.
  XSLT_PROCESSED="$INPUT_FILE"
else
  [ -n "$VERBOSE" ] && echo >&2 "Stylesheet: ${STYLESHEET}"
  XSLT_PROCESSED="$XSLT_PROCESSED_DIR/$(basename ${INPUT_FILE%.*}).proc"

  if [ -n "$VERBOSE" ]
  then if [ "$VERBOSE" -gt 1 ]
    then
      XSLTOPTS="-v $XSLTOPTS"
    fi
  fi

  [ -n "$VERBOSE" ] && \
   echo -e >&2 "xsltproc${XSLTOPTS} \\\\\n -o "$XSLT_PROCESSED" \\\\\n $STYLESHEET \\\\\n $INPUT_FILE"
  xsltproc $XSLTOPTS -o "$XSLT_PROCESSED" "$STYLESHEET" "$INPUT_FILE" || exit $?
fi

if [ ! -d "$OUTPUT_DIR" ]
then
  [ -n "$VERBOSE" ] && echo >&2 "Creating output directory ${OUTPUT_DIR}"
  mkdir -p "$OUTPUT_DIR"
fi

# Run the format script in post-process mode to finish off.
export OUTPUT_DIR
export XSLT_PROCESSED
export INPUT_FILE
if [ -n "$VERBOSE" ]
then
  export VERBOSE
  if [ "$VERBOSE" -gt 2 ]
  then
    # Extremely verbose
    BASH="${BASH} -x"
  fi
fi
${BASH} "$FORMAT" post-process || exit 1
