#!/usr/bin/mawk -f
# =====================================================================
# csa-env2rc: make selected environment variables suitable for being
#	      eval'ed by rc(1).
#
# Copyright (c) 2001,2006 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# =====================================================================
# $Id: csa-env2rc 43 2007-09-24 08:30:21Z carlo $

# =====================================================================
# Notes:
#
# According to the XML specification, characters 0 through 31 and
# character 127 may not appare anywhere in adocument, not even as
# character references, such as &#31;. The only other possibility it
# to either remove them or turn them into something sensible. I took
# this second approach, whereby \001 (that is rc(1) list concatenation
# character) is replaced with a single blank. This is experimental, and
# may change in future versions of CSA.
# =====================================================================

BEGIN {
  NULL = ""

  # Get local settings.
  csa_install = ENVIRON["CSA_INSTALL"]
  stdout = ENVIRON["CSA_STDOUT"]
  stderr = ENVIRON["CSA_STDERR"]

  # Set default values if necessary.
  if (csa_install == NULL) csa_install = "/usr/local/csa"
  if (stdout == NULL) stdout = "/dev/stdout"
  if (stderr == NULL) stderr = "/dev/stderr"

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-m" || ARGV[i] == "--match") m_pattern = ARGV[++i]
    else if (ARGV[i] == "-d" || ARGV[i] == "--delete") {
      remove = 1; d_pattern = ARGV[++i]
    }
    else if (ARGV[i] == "-b" || ARGV[i] == "--blank") {
      blank = 1; b_pattern = ARGV[++i]
    }
    else if (ARGV[i] == "-x" || ARGV[i] == "--debug") debug = 1
    else if (ARGV[i] == "-s" || ARGV[i] == "--strip-names") {
      strip_names = 1; s_pattern = ARGV[++i]
    }
    else if (ARGV[i] == "-p" || ARGV[i] == "--prefix") prefix = ARGV[++i]
    else if (ARGV[i] == "-t" || ARGV[i] == "--trim") trim = 1
    else if (ARGV[i] == "-O" || ARGV[i] == "--override") {
      ovar =  ARGV[++i]; oval = ARGV[++i]
    }
    else if (ARGV[i] == "-M" || ARGV[i] == "--max-env") max_env = ARGV[++i]
    else if (ARGV[i] == "-c" || ARGV[i] == "--context") context = ARGV[++i]
    else if (ARGV[i] == "-u" || ARGV[i] == "--unset") unset = 1
    else if (ARGV[i] == "-e" || ARGV[i] == "--encoding") {
       split(ARGV[++i],esc,",")
    }
  }

  ARGC = 1				# Fix argv[]

  if (debug) {
    print "-m pattern: " m_pattern    > stderr
    print "-d pattern: " d_pattern    > stderr
    print "-b pattern: " b_pattern    > stderr
    print "-s pattern: " s_pattern    > stderr
  }

  if (m_pattern == NULL) m_pattern = ".*"

  printf("{\n")		# Make sure we output something valid in any case.

  for (env in ENVIRON) {
    if (env !~ m_pattern) continue

    # Always skip rc(1) functions, confidential data
    # and CSA-specific stuff.
    if (env ~ /^fn_/ || env ~ /PGPPASS$/ || env ~ /^csa[0-9]+$/) continue

    var = ENVIRON[env]

    # Honour the '-s' switch.
    if (strip_names) gsub(s_pattern, NULL, env)

    # Skip invalid variable names. This must be tested *after*
    # names have been stripped. Think of a "WWW_6" that becomes a "6",
    # or a name that is tripped to the empty string.

    if (env !~ /^[A-Za-z_][A-Za-z0-9_]*$/) {
       print "csa-env2rc: bad variable name " env > stderr
       continue
    }

    # Honour '-d' and '-b' first.
    if (remove) gsub(d_pattern, NULL, var)
    if (blank) gsub(b_pattern, " ", var)

    if (trim) {
       sub(/^ +/, NULL, var)
       sub(/ +$/, NULL, var)
    }

    # Limit the environment size if requested.
    if (max_env) {
       env_size += length(prefix env var)
       if (env_size > max_env) break
    }

    i=1
    do {
      newvar = var
      x = split(esc[i],e,"+")
      if (e[x] == "i") newpref = prefix "ISO_"
      else if (e[x] == "u") newpref = prefix "URI_"
      else if (e[x] == "t") newpref = prefix "TBL_"
      else if (e[x] == "a") newpref = prefix "AWK_"
      else if (e[x] == "x") newpref = prefix "XML_"
      else newpref = prefix
      j=1
      do {
	 if (x) newvar = escape(newvar,e[j])
      }  while (e[++j] != NULL)
      printf("%s%s=(%s);\n", newpref, env, escape(newvar))
    } while(esc[++i] != NULL)
    if (unset) printf("%s=();\n", env)
  }

  # Override one of the output assignments if requested.
  # Only one override is currently supported.

  if (ovar != NULL) {
     env_size += length(ovar oval)
     if (!max_env || (env_size <= max_env)) {
	printf("%s=%s;\n", ovar, escape(oval))
     }
  }

  printf("}\n")		# Print list-end separator.
}

function escape(string,e,		a,i,j,s) {

  if (e == "i") string = isoencode(string)
  else if (e == "u") string = uriencode(string)
  else if (e == "t") {
     gsub(/\\/,"&&", string)
     gsub(/\n/,"\\n",string)
     gsub(/\t/,"\\t",string)
  }
  else if (e == "a") {
     gsub(/\\/,"&&", string)
     gsub(/"/,"\\\"",string)
     gsub(/\//,"\\/",string)
     gsub(/\n/,"\\n",string)
  }
  else if (e == "x") {
     gsub(/&/, "\\&amp;", string)
     gsub(/</, "\\&lt;", string)
     gsub(/"/,"\\&quot;",string)
     gsub(/\001/, " ", string)
  }
  else {
     i = split(string, a, "\001")		# Split rc(1) lists.

     for (j=1; j<=i; j++) {
	 gsub("'", "''", a[j])
	 s = s " '" a[j] "'"
     }
     string = s
  }
  return string
}

# ---------------------------------------------------------------------
# string isoencode(string s)
#
# Encode special HTML and form characters into the equivalent ISO entities.
# The set of characters that are ISO-encoded is:
#
#		& # < > | " ' ` \ \n \t \f SPACE
#
# Warning: '&' must be escaped first, then '#'. They need to be escaped to
# prevent the data from containing statements (both numeric and literal) that
# could be parsed by a Web server as valid SSI calls, or ampersand-escaped
# sequences that would act as formatting instructions to the Web browser. See
# http://www.apache.org/info/css-security/ for more info on the so-called
# Cross-Site Scripting vulnerabilities.
# ---------------------------------------------------------------------

function isoencode(s_string) {

   gsub(/&/, "\\&amp;", s_string)		# Ampersand
   gsub(/#/, "\\&#35;", s_string)		# Hash mark
   gsub(/</, "\\&#60;", s_string)		# Open tag
   gsub(/>/, "\\&#62;", s_string)		# Close tag
   gsub(/\|/, "\\&#124;", s_string)		# Generic separator
   gsub(/"/, "\\&#34;", s_string)		# double quote
   gsub(/'/, "\\&#39;", s_string)		# single quote
   gsub(/`/, "\\&#96;", s_string)		# backtick
   # gsub(/\001/, "\\&#1;", s_string)		# rc(1)'s ^A
   gsub(/\n/, "\\&#10;", s_string)		# newline
   gsub(/\t/, "\\&#9;", s_string)		# tab
   gsub(/\f/, "\\&#12;", s_string)		# form-feed
   gsub(/ /, "\\&#32;", s_string)		# space
   gsub(/\\/, "\\&#92;", s_string)		# backslash

   return s_string
}

# ---------------------------------------------------------------------
# string uriencode(string s)
#
# Perform URI-encoding of special characters, according to RFC 1378. The set
# of characters that are URI-encoded is:
#
#  % # & ? + SPACE = " \t \f ' \n \r $ ( ) [ ] ` < > \ / . | ; ! * { } : ~
#
# Note how '%' must be escaped first, and SPACE must be done after '+'.
# See http://www.apache.org/info/css-security/ for more info on the so-called
# Cross-Site Scripting vulnerabilities.
# ---------------------------------------------------------------------

function uriencode(s_string) {

   # Order matters here.
   gsub(/%/, "%25", s_string)
   gsub(/\+/, "%2B", s_string)
   gsub(/  */, "+", s_string)

   # Order does not matter here.
   gsub(/\t/, "%09", s_string)
   gsub(/\f/, "%0C", s_string)
   gsub(/\n/, "%0A", s_string)
   gsub(/'/, "%27", s_string)
   gsub(/#/, "%23", s_string)
   gsub(/&/, "%26", s_string)
   gsub(/\?/, "%3F", s_string)
   gsub(/=/, "%3D", s_string)
   gsub(/"/, "%22", s_string)
   gsub(/\r/, "%0D", s_string)
   gsub(/\$/, "%24", s_string)
   gsub(/\(/, "%28", s_string)
   gsub(/\)/, "%29", s_string)
   gsub(/\[/, "%5B", s_string)
   gsub(/]/, "%5D", s_string)
   gsub(/`/, "%60", s_string)
   # gsub(/\001/, "%01", s_string)
   gsub(/</, "%3C", s_string)
   gsub(/>/, "%3E", s_string)
   gsub(/\\/, "%5C", s_string)
   gsub(/\//, "%2F", s_string)
   gsub(/\./, "%2E", s_string)
   gsub(/\|/, "%7C", s_string)
   gsub(/;/, "%3B", s_string)
   gsub(/!/, "%21", s_string)
   gsub(/\*/, "%2A", s_string)
   gsub(/{/, "%7B", s_string)
   gsub(/}/, "%7D", s_string)
   gsub(/:/, "%3A", s_string)
   gsub(/~/, "%7E", s_string)

   # Add more encodings here, if necessary.

   return s_string
}

# End of program.
