#!/bin/sed -f
# =====================================================================
# csa-uridecode: decode one or more URI-encoded strings.
#
# Copyright (c) 2008,2009 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# =====================================================================
# $Id$

# Append the lookup table. For the sake of speed, I use only a (substantial)
# subset of all the possible ASCII and ISO-8859-1 escape codes. What has
# been left out are codes for numbers, upper- and lower- case letters,
# which seems reasonable. Unfortunately the overall process remains quite
#
# Speed-wise, this program is a very poor performer, especially with
# GNU sed(1). Please use 'csa-urldecode' instead.

s/$/\
%07%08%09	%0B%0C%20 %21!%22"%23#%24$%26\&%28(%29)%2A*%2C,%2D-%2E.%2F\/%3A:%3B;%3C<%3D=%3E>%3F?%40@%5B[%5C\\%5D]%5E^%5F_%60`%7B{%7C|%7D}%7E~%A0 %A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF/

# Lookup the escaped value and replace it with the unescaped one, if found.
# The replacement order does not matter here. Lookup misses will remain
# escaped.

:1
s/\(%..\)\(.*\)\n\(.*\)\1\(.\)\(.*\)/\4\2\
\3\1\4\5/
t1

# discard the lookup table
s/\n.*//

# perform order-dependent substitutions
s/+/ /g
s/%2B/+/g
s/%25/%/g

# this must come last (currently not done).
#s/%0A/\
#/g

# End of program
