1 # ===================================================================== 2 # unixify.awk: turn a free-form string into a safer form. 3 # 4 # Copyright (c) 2007,2008,2009,2010 Carlo Strozzi 5 # 6 # This program is free software; you can redistribute it and/or modify 7 # it under the terms of the GNU General Public License as published by 8 # the Free Software Foundation; version 2 dated June, 1991. 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU General Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License 16 # along with this program; if not, write to the Free Software 17 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 # 19 # ===================================================================== 20 # string unixify(string s [,boolean mode]) 21 # 22 # If mode is "1" then "s" will be considered a page name, otherwise 23 # "s" will be considered to be a group name (default). 24 # ===================================================================== 25 26 function unixify(s,mode) { 27 28 s = tolower(s) 29 30 # Some editors my intentionally use en-dash and em-dash characters 31 # in place of true hyphens in page names, which should be avoided 32 # by all means, as that is usually done by mistake. These characters 33 # are still allowed in page contents, even if they may not be 34 # palatable to some XML clients. 35 36 # http://www.fileformat.info/info/unicode/char/2013/index.htm 37 gsub(/\xe2\x80\x93/,"-",s) 38 39 # See http://www.fileformat.info/info/unicode/char/2014/index.htm 40 gsub(/\xe2\x80\x94/,"-",s) 41 42 # For backward-compatibility, if this variable is set 43 # to true then sequences of multiple blanks and/or dashes 44 # in page and attachment names will be turned into as many 45 # dashes, i.e. a page name like "My page - how nice" becomes 46 # "my-page---how-nice". By default they are squeezed to one 47 # single dash, that is "my-page-how-nice" . 48 # 49 # This hack was necessary to retain backward-compatibility 50 # with older pre-existing TW databases. 51 52 if (_bool(ENVIRON["TNS_LOCAL_HACK_1"]) == _TRUE) { 53 s = _strip(s) 54 gsub(/\t+/,"-",s) 55 gsub(/[- ]/,"-",s) 56 } 57 else { 58 s = _strip(s,_O_MIDDLE) 59 gsub(/[- \t]+/,"-",s) 60 } 61 62 if (mode) return _userencode(2,s,ENVIRON["TNS_PAGE_MAXLEN"]) 63 else return _userencode(1,s,ENVIRON["TNS_GROUP_MAXLEN"]) 64 } 65