#!/usr/bin/mawk -f
# *********************************************************************
# csa-tbl2select: create an XHTML <select> widget based on a NoSQL
#		  table.
#
# Copyright (c) 2002,2008 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# *********************************************************************
# $Id: csa-tbl2select 49 2009-08-03 15:50:05Z carlo $

BEGIN {
  NULL = ""; FS="\t"

  # Get local settings.
  csa_install = ENVIRON["CSA_INSTALL"]
  stdout = ENVIRON["CSA_STDOUT"]
  stderr = ENVIRON["CSA_STDERR"]

  # Set default values if necessary.
  if (csa_install == NULL) csa_install = "/usr/local/csa"
  if (stdout == NULL) stdout = "/dev/stdout"
  if (stderr == NULL) stderr = "/dev/stderr"

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-n" || ARGV[i] == "--name") name = ARGV[++i]
    else if (ARGV[i] == "-m" || ARGV[i] == "--multi") multi = 1
    else if (ARGV[i] == "-s" || ARGV[i] == "--size") size = ARGV[++i]
    else if (ARGV[i] == "-S" || ARGV[i] == "--selected") selected = ARGV[++i]
    else if (ARGV[i] == "-v" || ARGV[i] == "--value") value = ARGV[++i]
    else if (ARGV[i] == "-V" || ARGV[i] == "--visible") visible = ARGV[++i]
    else if (ARGV[i] == "-t" || ARGV[i] == "--trunc") trunc = ARGV[++i]
    else if (ARGV[i] == "-i" || ARGV[i] == "--input") i_file = ARGV[++i]
    else if (ARGV[i] == "-o" || ARGV[i] == "--output") o_file = ARGV[++i]
    else if (ARGV[i] == "-e" || ARGV[i] == "--empty") empty = ARGV[++i]
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' " csa_install "/help/t2htselect.txt")
       rc = 1
       exit(rc)
    }
  }

  ARGC = 1				# Fix argv[]

  # Set defaults.

  if (o_file == NULL) o_file = stdout
  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }
  if (name == NULL) name = "select"
  if (!size) size++
  if (!value) value++
  if (!visible) visible = value

  printf("<select name=\"%s\"", name) > o_file		# Header.
  printf(" size=\"%d\"", size) > o_file
  if (multi) printf(" multiple=\"multiple\"") > o_file
  printf(">\n") > o_file

  if (empty != NULL)
     printf(" <option value=\"\">%s</option>\n", empty) > o_file 
}

# Table body.

# Skip table header, if any.
/^\001/ { next }
{
  if (trunc) $value = substr($value,1,trunc)
  printf(" <option value=\"%s\"", xmlencode(unescape($value))) > o_file

  if (!found && $value == selected) {
     found = 1
     printf(" selected=\"selected\"") > o_file
  }

  printf(">%s</option>\n", xmlencode(unescape($visible))) > o_file
}

END {
  if (rc) exit(rc)
  print "</select>" > o_file		# Footer.
}

# ---------------------------------------------------------------------
# string xmlencode(string s)
#
# Encode special XML characters into the equivalent ISO entities. The
# set of characters that are XML-encoded is:  & < " \001
# 
# Warning: '&' must be escaped first. According to the XML   
# specification, characters 0 through 31 and character 127 may not
# appare anywhere in a document, not even as character references, such
# as &#31;. The only other possibility it to either remove them or turn
# them into something sensible. I took this second approach, whereby
# \001 (that is rc(1) list concatenation character) is replaced with a
# single blank. This is experimental, and may change in future versions
# of CSA.
# ---------------------------------------------------------------------

function xmlencode(string) {

   gsub(/&/, "\\&amp;", string)			# Ampersand at first.
   gsub(/</, "\\&lt;", string)			# Open tag
   gsub(/"/, "\\&quot;", string)		# double quote
   gsub(/\001/, " ", string)			# rc(1)'s ^A

   return string
}

# ---------------------------------------------------------------------
# string unescape(string s)
#
# Un-escape NoSQL special sequences \t, \n and \\
# Note: global variables, like RSTART and RLENGTH cannot be localised.
# ---------------------------------------------------------------------

function unescape(s,		S,rstart,rlength) {

   while (match(s,/\\+t/)) {

      rstart=RSTART
      rlength=RLENGTH

      if (!(rlength % 2))
	 S = sprintf("%s%s\t", S, substr(s,1,rstart+rlength-3))
      else
	 S = sprintf("%s%s", S, substr(s,1,rstart+rlength-1))

      s = sprintf("%s", substr(s,rstart+rlength))
   }

   s = sprintf("%s%s", S, s)
   S = ""

   while (match(s,/\\+n/)) {

      rstart=RSTART
      rlength=RLENGTH

      if (!(rlength % 2))
	 S = sprintf("%s%s\n", S, substr(s,1,rstart+rlength-3))
      else
	 S = sprintf("%s%s", S, substr(s,1,rstart+rlength-1))

      s = sprintf("%s", substr(s,rstart+rlength))
   }

   s = sprintf("%s%s", S, s)
   S = ""

   while (match(s,/[\\][\\]/)) {

      rstart=RSTART; rlength=RLENGTH

      S = sprintf("%s%s", S, substr(s,1,rstart))   
      s = sprintf("%s", substr(s,rstart+2))
   }

   return sprintf("%s%s", S, s)
}

# End of program.
