#!/bin/bash
# ==============================================================
#
# Convert a text file to a PDF text stream.
# Defaults as set are landscape, 132cols, 72 lines (lineflow)
#
# Will test for the IBM Form Feed byte hercules writes out
# so it will be able to page break on hercules printer
# data streams.
# Yes, it's primary purpose is to produce pdf files for
# job output. It should handle any other text input without
# any issue but you may want to change it back to portrait
# for that (and maybe implement line breaks for long lines).
#
# TODO: put the recomended binary flags after the start of the
#       header section.
# ==============================================================
infile="$1"    # input file to process
diskfile="$2"  # optional, to screen if not provided
if [ "${infile}." = "." ];   # if omitted pipe from stdin
then
   infile='-'
fi
mypid=$$       # to keep workfiles unique
objnum=3       # we dynamically build from object 3
pagecount=0    # needed to build pdf fields
linecount=0    # just used to break page objects into pages
parentobject=0
outfile="/tmp/wrk_final.${mypid}"
workfile1="/tmp/wrk1.${mypid}"
workfile2="/tmp/wrk2.${mypid}"   # xrefs
workfile3="/tmp/wrk3.${mypid}"   # xrefs
xrefprog="/tmp/calclen.${mypid}" # program/script we need to create and use

# PORTRAIT   ---- 60 lines
#  mediabox='[0 0 612 792]'
#  maxlines=60
#  YPOSINIT=770   # default Yposition on page for line 1
#  fontsize=12
#  linespace="12.5"
#  fontname="Helvetica"
# LANDSCAPE   ---- 132 colums and 72 lines
#                  Lineflow :-)
mediabox='[0 0 792 612]'
maxlines=72
YPOSINIT=600   # default Yposition on page for line 1
fontsize=9
linespace="8"
fontname="Courier"
IBMFFCHAR=''  # hercules specific, form feed on this

# -----------------------------------------------------------
# A little helper to clean up work files so we keep things
# nice and tidy.
# -----------------------------------------------------------
clean_files() {
   if [ -f ${outfile} ];
   then
      /bin/rm ${outfile}
   fi
   if [ -f ${workfile1} ];
   then
      /bin/rm ${workfile1}
   fi
   if [ -f ${workfile2} ];
   then
      /bin/rm ${workfile2}
   fi
   if [ -f ${workfile3} ];
   then
      /bin/rm ${workfile3}
   fi
#   if [ -f ${xrefprog} ];
#   then
#      /bin/rm ${xrefprog}
#   fi
} # end clean_files

# ---------------------------------------------------------------------
# Offsets must always be 10 bytes with leading zeros
# A little helper to stuff zeros on the front of a text string.
# ---------------------------------------------------------------------
ten_bytes() {
   tempsize="$1"
   # NOT IN A LOOP, bash won't preserve the changes
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   if [ ${#tempsize} -lt 10 ];
   then
      tempsize=0${tempsize}
   fi
   echo "${tempsize}"
} # end store_xref_entries}

clean_files

# ------------------------------------------------------------
# Create this script, we need it.
# It will calculate the byte offsets we need for creating
# teh xref table later on.
# ------------------------------------------------------------
cat << EOF > ${xrefprog}
linenum=0
xref=1   # addressing offset by 1 as we want addr
         # of first byte in next line
# Change IFS to newline so read does not strip off
# any leading spaces
IFS="
"
cat \$1 | while read dataline
do
   linenum=\$((\${linenum} + 1))
   echo "\${linenum}:\${xref}:\${dataline}:"
   linelen=\${#dataline}
   xref=\$((\${xref} + \${linelen}))
done
unset IFS
EOF
if [ ! -f ${xrefprog} ];
then
   echo "**** Unable to create utility script."
   exit 1
fi

# ------------------------------------------------------------
# The PDF doctype/version comment header is not optional
# ------------------------------------------------------------
echo '%PDF-1.4' > ${outfile}              # alays start of document

# ------------------------------------------------------------
# Common objects at the top. Seems to barf badly if the        
# Catalog entry is not the first entry (probably my addressing
# as if the catalog is at the end pdf readers cannot seem to
# find it.
# ------------------------------------------------------------
cat << EOF >> ${outfile}
1 0 obj
<< /Type /Catalog
/Outlines 2 0 R
/Pages XXXXXX 0 R
>>
endobj
2 0 obj
<< /Type /Outlines
/Count 0
>>
endobj
EOF

# ------------------------------------------------------------
# Process everything in the file, break into new page
# objects after every 59 lines. (Max a 10pt font will
# fit on a page before showing just a blank page)
# ------------------------------------------------------------
begin_page() {
   echo "BT">> ${workfile1}
   echo "/F1 ${fontsize} Tf">> ${workfile1}
   echo "${linespace} TL">> ${workfile1}
   #     X  Y
   echo "10 ${YPOSINIT} Td">> ${workfile1}
} # end begin_page
write_page() {
   startobjnum="$1"    # initial object we are working with
   echo "ET">> ${workfile1}
   echo "${startobjnum} 0 obj" >> ${outfile}
   echo "<< /Type /Page" >> ${outfile}
   echo "/Parent XXXXXX 0 R" >> ${outfile}   # adjust when we know
   echo "/MediaBox ${mediabox}" >> ${outfile} 
   echo "/Contents $((${startobjnum} + 1)) 0 R" >> ${outfile}
   echo "/Resources << /ProcSet XXXXXX 0 R" >> ${outfile}
   echo "/Font << /F1 XXXXXX 0 R >>" >> ${outfile}
   echo ">>" >> ${outfile}
   echo ">>" >> ${outfile}
   echo "endobj" >> ${outfile}
   # xref entry, byte position in outfile before write
   # data stream size we will have
   streamsize=`wc -c ${workfile1} | awk {'print $1'}`
   streamsize=$((${streamsize} - 1))  # do not count last LF
   echo "$((${startobjnum} + 1)) 0 obj" >> ${outfile}
   echo "<< /Length ${streamsize} >>" >> ${outfile}
   echo "stream" >> ${outfile}
   cat ${workfile1} >> ${outfile}
   echo "endstream" >> ${outfile}
   echo "endobj" >> ${outfile}
   /bin/rm ${workfile1}
} # end write_page


# Change IFS to newline so read does not strip off
# any leading spaces in the read of the dataline
IFS="
"
begin_page
cat ${infile} | while read dataline
do
   # hercules specific, FF is before the line is printed
   # so we have to do this before the block to text line
   # number is at max.
   # However, the blasted thing can be in the middle of
   # a data line, it doesn't have to be the first byte.
   # expr will return 1 for character 1, which is string position
   # 0 for extraction.
   charpos=`expr index "${dataline}" ${IBMFFCHAR}`
   if [ ${charpos} -gt 0 ];   # its in the string
   then
      if [ ${charpos} -eq 1 ];   # the easy one
      then
         # strip off the FF, leave the rest of the line in
         # dataline for normal processing on the next page
         dataline=${dataline:1:132}
         if [ ${linecount} -gt 0 ];  # ignore if just page jumped
         then
            write_page "${objnum}"
            objnum=$((${objnum} + 2))
            linecount=0
            pagecount=$((${pagecount} + 1))
            begin_page
         fi
      else
         # write the part before the FF, save the rest
         # of the line after the FF for normal processing
         # on the next page.
         part1=${dataline:0:$((${charpos} - 1))}
         dataline=${dataline:${charpos}:132}
         echo "T* (${part1}) Tj">> ${workfile1}
         if [ ${linecount} -gt 0 ];  # ignore if just page jumped
         then
            write_page "${objnum}"
            objnum=$((${objnum} + 2))
            linecount=0
            pagecount=$((${pagecount} + 1))
            begin_page
         fi
      fi
   fi
   # write line, resume normal code
   echo "T* (${dataline}) Tj">> ${workfile1}
   linecount=$((${linecount} + 1))
   if [ ${linecount} -ge ${maxlines} ];
   then
      write_page "${objnum}"
      objnum=$((${objnum} + 2))
      linecount=0
      pagecount=$((${pagecount} + 1))
      begin_page
   fi
   # outside if, we always need to save this
   echo "${objnum} ${pagecount}" > ${workfile3}
done
# retrieve the variables we saved
zzz=`cat ${workfile3}`
objnum=`echo "${zzz}" | awk {'print $1'}`
pagecount=`echo "${zzz}" | awk {'print $2'}`

# and save the last page
write_page "${objnum}"
objnum=$((${objnum} + 2))
pagecount=$((${pagecount} + 1))
echo "${objnum} ${pagecount}" > ${workfile3}
unset IFS

# ------------------------------------------------------------
# We can now create the parent as we have a final
# page count and will use the next available
# object for the parent
# ------------------------------------------------------------
parentobject=${objnum}   # have to globally fix the data file with this
cat << EOF >> ${outfile}
${objnum} 0 obj
<< /Type /Pages
/Kids [
EOF
# Kids start at 3, inc by 2 (page,stream then next pair)
temp1=${pagecount}
temp2=3
temp3=0
while [ ${temp3} -lt ${pagecount} ];
do
   echo "${temp2} 0 R" >> ${outfile}
   temp2=$((${temp2} + 2))
   temp3=$((${temp3} + 1))
done
cat << EOF >> ${outfile}
]
/Count ${pagecount}
>>
endobj
EOF

# ------------------------------------------------------------
# ------------------------------------------------------------
objnum=$((${objnum} + 1))
fontnum=${objnum}
cat << EOF >> ${outfile}
${objnum} 0 obj
<< /Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /${fontname}
/Encoding /MacRomanEncoding
>>
endobj
EOF
objnum=$((${objnum} + 1))
procsetnum=${objnum}
cat << EOF >> ${outfile}
${objnum} 0 obj
[/PDF /Text]
endobj
EOF

# ------------------------------------------------------------
# We have the page parent object number, update all the child
# pages and the catalog entry with the correct number now
# Also update the font and procset references 
# ------------------------------------------------------------
cat ${outfile} \
  | sed -e "s/Parent XXXXXX 0 R/Parent ${parentobject} 0 R/g" \
  | sed -e "s/Pages XXXXXX 0 R/Pages ${parentobject} 0 R/g" \
  | sed -e "s/F1 XXXXXX 0 R/F1 ${fontnum} 0 R/g" \
  | sed -e "s/ProcSet XXXXXX 0 R/ProcSet ${procsetnum} 0 R/g" > ${workfile2}
/bin/mv ${workfile2} ${outfile}

# ------------------------------------------------------------
# Now we can store the xref entries
# ------------------------------------------------------------
countxref=`grep "0 obj" ${outfile} | wc -l | awk {'print $1'}`
countxref=$((${countxref} + 1))   # must count entry 0 also
echo "xref" >> ${outfile}
echo "0 ${countxref}" >> ${outfile}   # sections 1 to ..
echo '0000000000 65535 f ' >> ${outfile}  # always present for index 0
bash ${xrefprog} ${outfile} | grep "0 obj:" | awk -F: {'print $2'} | while read bytepos
do
   bytepos=`ten_bytes ${bytepos}`
   echo "${bytepos} 00000 n " >> ${outfile}
done

# ------------------------------------------------------------
# And then the trailer entry
# countxref we needed above so we already have that
# ------------------------------------------------------------
startxref=`bash ${xrefprog} ${outfile} | grep ":xref:" | awk -F: {'print $2'}`
cat << EOF >> ${outfile}
trailer
<< /Size ${countxref}
/Root 1 0 R
>>
startxref
${startxref}
%%EOF
EOF

# ------------------------------------------------------------
# Move to output pdf file or display on screen depending on
# whether an output disk filename was specified.
# ------------------------------------------------------------
if [ "${diskfile}." != "." ];
then
   /bin/mv ${outfile} ${diskfile}
else
   cat ${outfile}
fi

# ------------------------------------------------------------
# Now done, clean up
# ------------------------------------------------------------
clean_files
exit 0