#!/usr/bin/perl # # plosbiology2rss by Pieter Edelman # Generated by script4rss (http://script4rss.sf.net) # # This script converts http://www.plosbiology.org/plosonline/?request=get-issue to an RSS feed # # ============================================================================== # PLoS Biology is an open-access scientific biology magazine # This script attempts to convert the page linking to the current issue to an RSS feed # ============================================================================== # # Copyright(c) 2004 Pieter Edelman # Released under the terms of the GNU General Public License (GPL) Version 2. # See http://www.gnu.org/ for details. sub fixHTML { # Close open tags before new ones are opened # Rewrite everything within < and > with a fixed number of dashes local($clean_string, $line, $in_tag); $in_tag = 0; $clean_string = ""; split/([<>])/, $_[0]; while (scalar(@_)) { $line = shift(@_); if ($line =~ /^<$/) { if ($in_tag) { $clean_string .= ">"; } $in_tag = 1; $clean_string = $clean_string.$line; $line = shift(@_); $line =~ s/\s*=\s*/=/g; $line =~ s/\s+/ /g; } elsif ($line =~ /^>$/) { $in_tag = 0; } $clean_string = $clean_string.$line; }; $clean_string; } sub cleanup { # Convert HTML entries into valid HTML #$_[0] =~ s/&(?!\S*;)/&/g; $_[0] =~ s/&/&/g; $_[0] =~ s//>/g; $_[0]; } # Read lines from stdin @lines = <>; # Print the header print "\n"; print "\n"; print "\n"; print " \n"; print " PLoS Biology\n"; print " http://www.plosbiology.org/plosonline/?request=get-issue\n"; print " Public Library of Sciences: Biology\n"; print " 10080\n"; # Keep reading lines until they are all used while (scalar(@lines) > 0) { $line = &fixHTML(shift(@lines)); # Find the catagory name if ($line =~ m/