1 # ===================================================================== 2 # rssFeed.awk: RPC I/O function for rpclib/rssFeed. 3 # 4 # Copyright (c) 2007,2008,2009,2010 Carlo Strozzi 5 # 6 # This program is free software; you can redistribute it and/or modify 7 # it under the terms of the GNU General Public License as published by 8 # the Free Software Foundation; version 2 dated June, 1991. 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU General Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License 16 # along with this program; if not, write to the Free Software 17 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 # 19 # ===================================================================== 20 21 # ===================================================================== 22 # void _userproc(int mode) 23 # ===================================================================== 24 25 function _userproc(mode, value,url,a,b,i,j,title,g_uri,\ 26 p_name,page_dir,page,tmp,x) { 27 28 if (mode == _O_REQUEST) { # request. 29 30 # target group 31 value = _request("1",1) 32 33 # group must not be null and it may not contain the 34 # unescaped ``.'' character. 35 36 if (value != _NULL && value !~ /\./) { 37 _rcset("cgi.group",unixify(value)) 38 _rcset("cgi.group.literal",value) 39 } 40 41 # special page name corresponding to the desired view. 42 value = _request("2",1) 43 44 # special page names must begin with 'tw-'. 45 46 if (value ~ /^tw-[a-z][-a-z0-9]+-recent-(pages|headlines)$/) { 47 # extract subcat from cgi.tw.page if cgi.subcat is empty (this 48 # is necessary to have the "Print" action do the right thing). 49 sub(/^tw-/,"",value) 50 if (sub(/-recent-headlines$/,"",value)) 51 _rcset("cgi.tw.page","tw-recent-headlines") 52 else if (sub(/-recent-pages$/,"",value)) 53 _rcset("cgi.tw.page","tw-recent-pages") 54 _rcset("cgi.subcat",value) 55 } 56 else if (value ~ /^tw-/) 57 _rcset("cgi.tw.page",unixify(value,1)) 58 59 # optional target page within group (required by some views). 60 value = _request("3",1) 61 62 # page name must be at least 2-character long. 63 if (length(value) > 1) { 64 _rcset("cgi.page.literal",value) 65 _rcset("cgi.page.uri",_uriencode(value)) 66 value = unixify(value,1) 67 _rcset("cgi.page",value) 68 69 # Extract page meta-category if available. This will override 70 # any subcat extracted from cgi.tw.page (see above). 71 if ((value=getcat(value)) != _NULL) _rcset("cgi.subcat",value) 72 } 73 74 # The following test is necessary since the address could, 75 # at least in theory, have been set to any string by the 76 # remote user, due to how it is handled to cope with stunnel(8) 77 # and the lack of transproxy support in kernel 2.4.x. 78 79 if (_isipaddr(ENVIRON["REMOTE_ADDR"]) == _TRUE) 80 value = ENVIRON["REMOTE_ADDR"] 81 else value = "0.0.0.0" 82 83 _rcset("REMOTE_ADDR",value) 84 } 85 86 else { # response. 87 88 # Prepare empty array for _mrwresponse() (see below). 89 # This is alreaty a local var, but I want to make clear 90 # that it is an array, for documentational purposes. 91 delete b 92 93 title = _rcget("tbl_group.g_descr",1) 94 g_uri = _rcget("tbl_group.g_uri",1) 95 p_name = _rcget("cgi.tw.page.url") 96 page_dir = _rcget("tw_gstem") 97 98 if (_rcget("cgi.page") != _NULL) 99 p_name = p_name "/" _rcget("cgi.page") 100 101 # p_uri, p_[m,v]time, p_name, p_modau, p_descr [,k_page] 102 103 # RSS 1.0 response index. 104 105 x = 1 106 for (i=1; i<= _TBL[0]; i++) { 107 108 j = split(_TBL[i], a, "\t") 109 110 # Pages with nil descriptions are excluded from RSS feeds. 111 if (a[5] ~ /^ *- *$/) continue 112 113 if (a[1] ~ /^https?:\/\//) url = a[1] 114 115 else url = ENVIRON["CSA_RPC_URI"] "/" \ 116 ENVIRON["CSA_LANG"] "/" g_uri "/" a[1] 117 118 if (x++==1) { 119 _mrwresponse(_MRW,"channel","",ENVIRON["CSA_RPC_URI"] "/" \ 120 ENVIRON["CSA_LANG"] "/" g_uri "/" p_name,"","","title",title) 121 122 # make language strictly ISO-639, as described at this link: 123 # http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes 124 125 value = tolower(ENVIRON["CSA_LANG"]) 126 gsub(/_/,"-",value) 127 128 _mrwresponse(_MRW,"","","","","","link",\ 129 ENVIRON["CSA_RPC_URI"] "/" \ 130 ENVIRON["CSA_LANG"] "/" g_uri,b,b,1) 131 _mrwresponse(_MRW,"","","","","","dc:language",value) 132 if (ENVIRON["TNS_RSS_CREATOR"] != _NULL) _mrwresponse(_MRW,"","",\ 133 "","","","dc:creator",ENVIRON["TNS_RSS_CREATOR"]) 134 if (ENVIRON["TNS_RSS_RIGHTS"] != _NULL) _mrwresponse(_MRW,"","",\ 135 "","","","dc:rights",ENVIRON["TNS_RSS_RIGHTS"]) 136 if (ENVIRON["TNS_RSS_SUBTITLE"] != _NULL) _mrwresponse(_MRW,"","",\ 137 "","","","description",ENVIRON["TNS_RSS_SUBTITLE"]) 138 _mrwresponse(_MRW,"","","","","","dc:date",_rcget("CSA_TIME_ISO8601",1)) 139 _mrwresponse(_MRW,"","items","","rdf:Seq") 140 } 141 _mrwresponse(_MRW,"","","","","","rdf:li",url) 142 } 143 144 _mrwresponse(_MRW,".") # close the channel index. 145 146 # RSS 1.0 response body. 147 148 for (i=1; i<= _TBL[0]; i++) { 149 150 j = split(_TBL[i], a, "\t") 151 152 # Mangle according to http://www.w3.org/TR/NOTE-datetime . 153 sub(/^[^,]+,/,_NULL,a[2]); sub(/ /,"T",a[2]) 154 155 # Make feed readers happy by appending a plausible time zone 156 # if missing. This both for backward-compatibility with previous 157 # TW versions and for those RSS views that use 'vtime' instead 158 # of either 'mtime' or 'ctime', since 'vtime' is always in a 159 # simplified local time format. 160 161 if (a[2] !~ /[-+][:0-9]+$/) a[2] = a[2] _TIME[":z"] 162 163 # Pages with nil descriptions are excluded from RSS feeds. 164 if (a[5] ~ /^ *- *$/) continue 165 166 if (a[1] ~ /^https?:\/\//) url = a[1] 167 168 else url = ENVIRON["CSA_RPC_URI"] "/" \ 169 ENVIRON["CSA_LANG"] "/" g_uri "/" a[1] 170 171 if (a[6] == _NULL) { 172 _mrwresponse(_MRW,"item","",url) 173 value = a[3] 174 sub(/\./,": ",value) # improve subcat delimiter. 175 _mrwresponse(_MRW,"","","","","","title",value) 176 _mrwresponse(_MRW,"","","","","","link",url,b,b,1) 177 _mrwresponse(_MRW,"","","","","","dc:creator",a[4]) 178 _mrwresponse(_MRW,"","","","","","dc:date",a[2]) 179 180 # Omit page descriptions if equal to page names, 181 # regardless of the subcat trailer. 182 if (a[5] != substr(a[3],index(a[3],".")+1)) 183 _mrwresponse(_MRW,"","","","","","description",a[5]) 184 } 185 else { 186 page = page_dir "/" a[6] "+wki" 187 188 # Note: newlines MUST be preserved, or "
" sections
   189		    # will no longer work (the extra leading newline should
   190		    # not matter).
   191	
   192		    value = _NULL
   193		    while (getline tmp < page > 0) value = value "\n" tmp
   194		    close(page)
   195	
   196		    if (value !~ /[a-zA-Z0-9]/) continue	# skip empty pages.
   197	
   198		    # Note how the TW concept of "abstract" works: an editor can
   199		    # select the portion of a page which will be used as the page
   200		    # abstract by surrounding such portion by suitable application-
   201		    # level wiki tags. This means that a page abstract isn't a
   202		    # separate piece of information but it is simply a selected
   203		    # part of the page body, that will be rendered ***in alternative
   204		    # to the actual page body*** in tw-recent-pages and possibly
   205		    # other static views views. When a page is rendered for normal
   206		    # display the abstract section is removed.
   207	
   208		    sub(/.*\(::ab:\)/,"",value)
   209		    sub(/\(:ab::\).*/,"",value)
   210	
   211		    # This would probably break the feed's XML well-formedness,
   212		    # so I use the simplified form below.
   213		    #if (sub(/\(:i:\).*/," ...",value))
   214		    #	value = "" value
   215	
   216		    sub(/\(:i:\).*/," ...",value)
   217	
   218		    _mrwresponse(_MRW,"item","",url)
   219		    tmp = a[3]
   220		    sub(/\./,": ",tmp)	# improve subcat delimiter.
   221		    _mrwresponse(_MRW,"","","","","","title",tmp)
   222		    _mrwresponse(_MRW,"","","","","","link",url,b,b,1)
   223		    _mrwresponse(_MRW,"","","","","","dc:creator",a[4])
   224		    _mrwresponse(_MRW,"","","","","","dc:date",a[2])
   225	
   226		    # Save on overhead if there's no RDFa to parse.
   227		    if (value ~ /\(:v-/) {
   228		       if (_rcget("TNS_GROUP_MISC_PROP",4) == "rdfa")
   229						value = _rdfacpi(value)
   230		       else value = _mfmtcpi(value)
   231		    }
   232	
   233		    # Page bodies may contain markup, so they need to be
   234		    # enclosed in a CDATA section.
   235	
   236		    value = ""
   237	
   238		    _mrwresponse(_MRW,"","","","","","description",value,b,b,1,"",1)
   239		 }
   240	      }
   241	   }
   242	} 
   243	
   244	# EOF