#!/usr/bin/perl -wT
#
# Produce an atom feed of subversion commits.
#
# @(#) $Id: subatom 1177 2006-06-05 22:17:55Z dom $
#

use strict;
use warnings;

use File::Basename qw( basename dirname );
use File::Spec;
use Getopt::Std qw( getopts );
use XML::Atom::SimpleFeed;

our $VERSION = '0.08';
my $me = basename $0;

sub usage;

# Ensure we know what we're doing for tainting.
local $ENV{ PATH } = join ':', qw( /bin /usr/bin /usr/local/bin );

my %opt = ( 'm' => 30 );
getopts( "m:o:v:", \%opt ) or usage;

my $repos = shift
  or usage;

# untaint.
$repos =~ m/^((?:https?|file|svn|svn\+ssh):\/\/[-\/\w.\@]+)/;
my $repos_url = $1;
usage unless $repos_url;
$repos_url =~ s!/$!!;    # Remove trailing slash.

# Filter out crap (for tainting).
my @path = grep { $_ } map { m!^(/[-/\w.\@]+)$!; $1 } @ARGV;
@path = qw( / ) unless @path;

my @items = fetch_log_items( $repos_url, @path );

my $feed = make_feed_from_items( \@items, \@path );
my $xml = $feed->as_string;
if ( $opt{ o } ) {
    my ( $file ) = $opt{ o } =~ m/^([-\w\/.]+)/;    # untaint.
    write_feed_to_file( $xml => $file );
}
else {
    print $xml;
}
exit 0;

sub usage {
    die "usage: $me [-m max] [-o output] [-v viewcvs] repos_url [path ...]\n";
}

sub fetch_log_items {
    my ( $repos_url, @path ) = @_;

    # See perlipc(3) for details about all this.
    my $pid = open my $fh, '-|';
    if ( $pid ) {

        # Parent.
        my @items = process_log_lines( $fh );
        close $fh;
        return @items;
    }
    else {

        # Child.  Ensure that errors get thrown away, since svn log
        # will probably die with a "broken pipe" error.  That's
        # because we close it as soon as we've read enough.
        open STDERR, File::Spec->devnull;

        # Ensure that we get subversion to generate times with no
        # timezone.  This means that they match the Atom expectation.
        local $ENV{ TZ } = 'UTC';
        exec 'svn', 'log', '-v', $repos_url, @path
          or die "$me: exec(svn log): $!\n";

        # NOTREACHED
    }
}

sub process_log_lines {
    my ( $fh ) = @_;
    my ( @items, $current_item, $in_paths );
    while ( <$fh> ) {
        1 while chomp;
        if ( m/^-+$/ ) {
            push @items, $current_item if $current_item;
            last if @items >= $opt{ m };
            $current_item = {};
        }
        elsif ( m/^r\d+ / ) {
            my ( $ver, $user, $date, $lines ) = split / \| /;
            $ver   =~ s/^r//;
            $date  =~ s/ \(.*\)//;
            $lines =~ s/ .*//;
            $current_item->{ revision } = $ver;
            $current_item->{ date }     = $date;
            $current_item->{ user }     = $user;
            $current_item->{ lines }    = $lines;
        }
        elsif ( m/^Changed paths:/ ) {
            $in_paths = 1;
        }
        elsif ( m/^$/ ) {
            $in_paths = 0;
        }
        elsif ( $in_paths ) {
            push @{ $current_item->{ affected } }, $_;
        }
        else {
            push @{ $current_item->{ msg } }, $_;
        }
    }
    return @items;
}

sub item_link {
    my ( $item ) = @_;

    if ( $opt{ v } ) {
        return $opt{ v } . $item->{ revision };
    }
    else {

        # Just link to the first changed file.
        my $path = ( split ' ', $item->{ affected }[0], 2 )[1];
        return $repos_url . $path;
    }
}

sub item_to_entry {
    my ( $feed, $item ) = @_;

    # Use the internal writer to do this.  Slightly sneaky.
    my $cdata   = \&XML::Atom::SimpleFeed::_cdata;
    my $msg     = $cdata->( join( "\n", @{ $item->{ msg } || [] } ) );
    my $summary = $msg;
    $msg = "<p>$msg</p>\n";
    $msg .= "<p>Affected files:</p>\n";
    $msg .= "<ul>\n";
    $msg .= "<li>$_</li>\n" foreach @{ $item->{ affected } };
    $msg .= "</ul>\n";

    $feed->add_entry(
        id      => $repos_url . '#r' . $item->{ revision },
        title   => item_title( $item ),
        updated => svn_date_to_atom_date( $item->{ date } ),
        content => $msg,
        summary => $summary,
        author  => $item->{ user },
        link    => item_link( $item ),
    );

    return;
}

sub svn_date_to_atom_date {
    my ( $svn_date ) = @_;

    # Change from '2005-08-01 11:07:02 +0000' to
    # '2005-08-01T12:07:02Z'.  Because we specified "TZ" when running
    # svn, we can rely on the time being zulu time.

    $svn_date =~ s/ /T/;
    $svn_date =~ s/ \+\d\d\d\d/Z/;
    return $svn_date;
}

sub item_title {
    my ( $item ) = @_;
    my @files =
      sort { length( $a ) <=> length( $b ) }
      map  { ( split ' ' )[1] } @{ $item->{ affected } };
    my $d = dirname( $files[0] );
    return "r$item->{revision} - $d";
}

sub make_feed_from_items {
    my ( $items, $requested ) = @_;

    my $title = "Recent commits to $repos_url";
    $title .= " for @{$requested}"
      unless @{ $requested } == 1 && $requested->[0] eq '/';

    my $feed = XML::Atom::SimpleFeed->new(
        id      => $repos_url,
        title   => $title,
        updated => svn_date_to_atom_date( $items->[0]{ date } ),
    );

    item_to_entry( $feed, $_ ) foreach @{ $items };
    return $feed;
}

# Write out a feed to the file, taking care to avoid writing unless
# the feed we've just generated is actually newer than the one on
# disk.  This is needed for correct If-Modified-Since support.
sub write_feed_to_file {
    my ( $feed, $file ) = @_;
    if ( -f $file ) {
        # XXX This is quite evil and I should use a proper XML parser.
        open my $fh, '<', $file or die "$me: open($file): $!\n";
        my $old_feed = do { local $/; <$fh> };
        close $fh;
        my ( $old_updated ) = $old_feed =~ m{<updated>(.*?)</updated>};
        my ( $new_updated ) = $feed     =~ m{<updated>(.*?)</updated>};
        return if $new_updated le $old_updated;
    }
    open my $fh, '>', $file or die "$me: open(>$file): $!\n";
    print $fh $feed;
    close $fh;
}

__END__

=pod

=head1 NAME

subatom - produce an atom feed from subversion commits

=head1 SYNOPSIS

  subatom [-m max] [-o output] [-v viewcvs] REPOSITORY_URL [path ...]

=head1 DESCRIPTION

B<subatom> is a small script to produce an Atom feed from subversion
commits.  You can use this with a feed reader to see new commits to
your repository.

The first argument is the URL for your subversion repository.  The
remaining arguments are paths within the repository for which you
would like commit messages.  If you don't specify any, it will default
to the entire repository.

The I<-m> flag states how many entries you would like in the feed.
The default is 30.

If you pass a filename using the I<-o> flag, the output will be
written there instead of STDOUT.  If the filename already exists then
the timestamp will be checked and the new feed will only be written if
it actually contains any newer entries.  If you will be serving the
feed file using a web server, then doing this enables
If-Modified-Since to work correctly (ie: feed readers will only
download the file if it has actually changed).

You can pass an URL using the I<-v> flag, and B<subatom> will create a
link for each commit by appending the revision number to the URL.  For
example a viewcvs URL might look like this (on the command line).

    http://example.com/viewcvs/svn/?view=rev&rev=

If the URL has question marks or ampersands, don't forget to quote it on
the command line.  If you don't, the shell will interpret them and
probably give some odd looking errors.

If you don't pass in a URL, a link will be automatically made to the
first file that changed in this commit.

=head1 SEE ALSO

L<XML::Atom>

L<http://subversion.tigris.org/>

=head1 AUTHOR

Dominic Mitchell E<lt>cpan (at) happygiraffe.netE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2004 by Dominic Mitchell. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

=over 4

=item 1.

Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

=item 2.

Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

=back

THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.

=cut

# vim: set ai et sw=4 :
