1	# =====================================================================
     2	# rssFeed.awk: RPC I/O function for rpclib/rssFeed.
     3	#
     4	# Copyright (c) 2007,2008,2009,2010 Carlo Strozzi
     5	#
     6	# This program is free software; you can redistribute it and/or modify
     7	# it under the terms of the GNU General Public License as published by
     8	# the Free Software Foundation; version 2 dated June, 1991.
     9	#
    10	# This program is distributed in the hope that it will be useful,
    11	# but WITHOUT ANY WARRANTY; without even the implied warranty of
    12	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13	# GNU General Public License for more details.
    14	#
    15	# You should have received a copy of the GNU General Public License
    16	# along with this program; if not, write to the Free Software
    17	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    18	#
    19	# =====================================================================
    20	
    21	# =====================================================================
    22	# void _userproc(int mode)
    23	# =====================================================================
    24	
    25	function _userproc(mode,		value,url,a,b,i,j,title,g_uri,\
    26						p_name,page_dir,page,tmp,x) {
    27	
    28	   if (mode == _O_REQUEST) {				# request.
    29	
    30	      # target group
    31	      value = _request("1",1)
    32	
    33	      # group must not be null and it may not contain the
    34	      # unescaped ``.'' character.
    35	
    36	      if (value != _NULL && value !~ /\./) {
    37		 _rcset("cgi.group",unixify(value))
    38		 _rcset("cgi.group.literal",value)
    39	      }
    40	
    41	      # special page name corresponding to the desired view.
    42	      value = _request("2",1)
    43	
    44	      # special page names must begin with 'tw-'.
    45	
    46	      if (value ~ /^tw-[a-z][-a-z0-9]+-recent-(pages|headlines)$/) {
    47		 # extract subcat from cgi.tw.page if cgi.subcat is empty (this
    48		 # is necessary to have the "Print" action do the right thing).
    49		 sub(/^tw-/,"",value)
    50		 if (sub(/-recent-headlines$/,"",value))
    51			_rcset("cgi.tw.page","tw-recent-headlines")
    52		 else if (sub(/-recent-pages$/,"",value))
    53			_rcset("cgi.tw.page","tw-recent-pages")
    54		 _rcset("cgi.subcat",value)
    55	      }
    56	      else if (value ~ /^tw-/)
    57	         _rcset("cgi.tw.page",unixify(value,1))
    58	
    59	      # optional target page within group (required by some views).
    60	      value = _request("3",1)
    61	
    62	      # page name must be at least 2-character long.
    63	      if (length(value) > 1) {
    64		 _rcset("cgi.page.literal",value)
    65		 _rcset("cgi.page.uri",_uriencode(value))
    66		 value = unixify(value,1)
    67		 _rcset("cgi.page",value)
    68	
    69		 # Extract page meta-category if available. This will override
    70		 # any subcat extracted from cgi.tw.page (see above).
    71		 if ((value=getcat(value)) != _NULL) _rcset("cgi.subcat",value)
    72	      }
    73	
    74	      # The following test is necessary since the address could,
    75	      # at least in theory, have been set to any string by the
    76	      # remote user, due to how it is handled to cope with stunnel(8)
    77	      # and the lack of transproxy support in kernel 2.4.x.
    78	
    79	      if (_isipaddr(ENVIRON["REMOTE_ADDR"]) == _TRUE)
    80					value = ENVIRON["REMOTE_ADDR"]
    81	      else value = "0.0.0.0"
    82	
    83	      _rcset("REMOTE_ADDR",value)
    84	   }
    85	
    86	   else {					# response.
    87	
    88	      # Prepare empty array for _mrwresponse() (see below).
    89	      # This is alreaty a local var, but I want to make clear
    90	      # that it is an array, for documentational purposes.
    91	      delete b
    92	
    93	      title = _rcget("tbl_group.g_descr",1)
    94	      g_uri = _rcget("tbl_group.g_uri",1)
    95	      p_name = _rcget("cgi.tw.page.url")
    96	      page_dir = _rcget("tw_gstem")
    97	
    98	      if (_rcget("cgi.page") != _NULL)
    99				 p_name = p_name "/" _rcget("cgi.page")
   100	
   101	      # p_uri, p_[m,v]time, p_name, p_modau, p_descr [,k_page]
   102	
   103	      # RSS 1.0 response index.
   104	
   105	      x = 1
   106	      for (i=1; i<= _TBL[0]; i++) {
   107	
   108		 j = split(_TBL[i], a, "\t")
   109	
   110		 # Pages with nil descriptions are excluded from RSS feeds.
   111		 if (a[5] ~ /^ *- *$/) continue
   112	
   113		 if (a[1] ~ /^https?:\/\//) url = a[1]
   114	
   115		 else url = ENVIRON["CSA_RPC_URI"] "/" \
   116				ENVIRON["CSA_LANG"] "/" g_uri "/" a[1]
   117	
   118		 if (x++==1) {
   119		    _mrwresponse(_MRW,"channel","",ENVIRON["CSA_RPC_URI"] "/" \
   120			ENVIRON["CSA_LANG"] "/" g_uri "/" p_name,"","","title",title)
   121	
   122		    # make language strictly ISO-639, as described at this link:
   123		    # http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes
   124	
   125		    value = tolower(ENVIRON["CSA_LANG"])
   126		    gsub(/_/,"-",value)
   127	
   128		    _mrwresponse(_MRW,"","","","","","link",\
   129			ENVIRON["CSA_RPC_URI"] "/" \
   130			ENVIRON["CSA_LANG"] "/" g_uri,b,b,1)
   131		    _mrwresponse(_MRW,"","","","","","dc:language",value)
   132		    if (ENVIRON["TNS_RSS_CREATOR"] != _NULL) _mrwresponse(_MRW,"","",\
   133				"","","","dc:creator",ENVIRON["TNS_RSS_CREATOR"])
   134		    if (ENVIRON["TNS_RSS_RIGHTS"] != _NULL) _mrwresponse(_MRW,"","",\
   135				"","","","dc:rights",ENVIRON["TNS_RSS_RIGHTS"])
   136		    if (ENVIRON["TNS_RSS_SUBTITLE"] != _NULL) _mrwresponse(_MRW,"","",\
   137				"","","","description",ENVIRON["TNS_RSS_SUBTITLE"])
   138		    _mrwresponse(_MRW,"","","","","","dc:date",_rcget("CSA_TIME_ISO8601",1))
   139		    _mrwresponse(_MRW,"","items","","rdf:Seq")
   140		 }
   141		 _mrwresponse(_MRW,"","","","","","rdf:li",url)
   142	      }
   143	
   144	      _mrwresponse(_MRW,".")			# close the channel index.
   145	
   146	      # RSS 1.0 response body.
   147	
   148	      for (i=1; i<= _TBL[0]; i++) {
   149	
   150		 j = split(_TBL[i], a, "\t")
   151	
   152		 # Mangle according to http://www.w3.org/TR/NOTE-datetime .
   153		 sub(/^[^,]+,/,_NULL,a[2]); sub(/ /,"T",a[2])
   154	
   155		 # Make feed readers happy by appending a plausible time zone
   156		 # if missing. This both for backward-compatibility with previous
   157		 # TW versions and for those RSS views that use 'vtime' instead
   158		 # of either 'mtime' or 'ctime', since 'vtime' is always in a
   159		 # simplified local time format.
   160	
   161		 if (a[2] !~ /[-+][:0-9]+$/) a[2] = a[2] _TIME[":z"]
   162	
   163		 # Pages with nil descriptions are excluded from RSS feeds.
   164		 if (a[5] ~ /^ *- *$/) continue
   165	
   166		 if (a[1] ~ /^https?:\/\//) url = a[1]
   167	
   168		 else url = ENVIRON["CSA_RPC_URI"] "/" \
   169				ENVIRON["CSA_LANG"] "/" g_uri "/" a[1]
   170	
   171		 if (a[6] == _NULL) {
   172		    _mrwresponse(_MRW,"item","",url)
   173		    value = a[3]
   174		    sub(/\./,": ",value)	# improve subcat delimiter.
   175		    _mrwresponse(_MRW,"","","","","","title",value)
   176		    _mrwresponse(_MRW,"","","","","","link",url,b,b,1)
   177		    _mrwresponse(_MRW,"","","","","","dc:creator",a[4])
   178		    _mrwresponse(_MRW,"","","","","","dc:date",a[2])
   179	
   180		    # Omit page descriptions if equal to page names,
   181		    # regardless of the subcat trailer.
   182		    if (a[5] != substr(a[3],index(a[3],".")+1))
   183			_mrwresponse(_MRW,"","","","","","description",a[5])
   184		 }
   185		 else {
   186		    page = page_dir "/" a[6] "+wki"
   187	
   188		    # Note: newlines MUST be preserved, or "<pre>" sections
   189		    # will no longer work (the extra leading newline should
   190		    # not matter).
   191	
   192		    value = _NULL
   193		    while (getline tmp < page > 0) value = value "\n" tmp
   194		    close(page)
   195	
   196		    if (value !~ /[a-zA-Z0-9]/) continue	# skip empty pages.
   197	
   198		    # Note how the TW concept of "abstract" works: an editor can
   199		    # select the portion of a page which will be used as the page
   200		    # abstract by surrounding such portion by suitable application-
   201		    # level wiki tags. This means that a page abstract isn't a
   202		    # separate piece of information but it is simply a selected
   203		    # part of the page body, that will be rendered ***in alternative
   204		    # to the actual page body*** in tw-recent-pages and possibly
   205		    # other static views views. When a page is rendered for normal
   206		    # display the abstract section is removed.
   207	
   208		    sub(/.*\(::ab:\)/,"<span class='tw-abstract'>",value)
   209		    sub(/\(:ab::\).*/,"</span>",value)
   210	
   211		    # This would probably break the feed's XML well-formedness,
   212		    # so I use the simplified form below.
   213		    #if (sub(/\(:i:\).*/," ...</span>",value))
   214		    #	value = "<span class='tw-abstract'>" value
   215	
   216		    sub(/\(:i:\).*/," ...",value)
   217	
   218		    _mrwresponse(_MRW,"item","",url)
   219		    tmp = a[3]
   220		    sub(/\./,": ",tmp)	# improve subcat delimiter.
   221		    _mrwresponse(_MRW,"","","","","","title",tmp)
   222		    _mrwresponse(_MRW,"","","","","","link",url,b,b,1)
   223		    _mrwresponse(_MRW,"","","","","","dc:creator",a[4])
   224		    _mrwresponse(_MRW,"","","","","","dc:date",a[2])
   225	
   226		    # Save on overhead if there's no RDFa to parse.
   227		    if (value ~ /\(:v-/) {
   228		       if (_rcget("TNS_GROUP_MISC_PROP",4) == "rdfa")
   229						value = _rdfacpi(value)
   230		       else value = _mfmtcpi(value)
   231		    }
   232	
   233		    # Page bodies may contain markup, so they need to be
   234		    # enclosed in a CDATA section.
   235	
   236		    value = "<![CDATA[" value "]]>"
   237	
   238		    _mrwresponse(_MRW,"","","","","","description",value,b,b,1,"",1)
   239		 }
   240	      }
   241	   }
   242	} 
   243	
   244	# EOF