#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell
=pod

=head1 NAME

tv_grab_uk_rt - Grab TV listings for United Kingdom and Republic of Ireland

=head1 SYNOPSIS

tv_grab_uk_rt --help

tv_grab_uk_rt --configure [--config-file FILE]

tv_grab_uk_rt [--config-file FILE] [--output FILE] [--days N] [--offset N]
              [--gui OPTION] [--quiet]

tv_grab_uk_rt --list-channels

tv_grab_uk_rt --capabilities

tv_grab_uk_rt --version

=head1 DESCRIPTION

Output TV listings in XMLTV format for many stations available in the 
United Kingdom and Republic of Ireland.  The data comes from a 
machine-readable file produced by the Radio Times website.

=head1 USAGE

First you must run B<tv_grab_uk_rt --configure> to choose which stations you
want to receive.  Then running B<tv_grab_uk_rt> with no arguments will get
about a fortnightE<39>s listings for the stations you chose.

B<--configure> Prompt for which stations to download and write the
configuration file.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--config-file FILE> Set the name of the configuration file, the default is
B<~/.xmltv/tv_grab_uk_rt.conf>.  This is the file written by B<--configure> and
read when grabbing.

B<--output FILE> When grabbing, write output to FILE rather than standard
output.

B<--quiet> Suppress the progress messages normally written to standard error.

B<--days N> When grabbing, grab N days of data instead of all available.

B<--offset N> Start grabbing at today + N days.

B<--list-channels> Write output giving <channel> elements for every channel
available, but no programmes.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://membled.com/twiki/bin/view/Main/XmltvCapabilities>

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

Note that tv_grab_uk_rt always downloads data for all days and then filters
out the days specified with --days and --offset. It is therefore more
efficient to omit --days and --offset and use all the returned data.

=head1 SEE ALSO

L<xmltv(5)>, L<http://www.radiotimes.beeb.com/>

=head1 AUTHOR

Ed Avis, ed@membled.com

=cut

use warnings;
use strict;
use XMLTV::Version '$Id: tv_grab_uk_rt.in,v 1.102 2007/09/27 02:35:37 knowledgejunkie Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache share preferredmethod/;
use XMLTV::Description 'United Kingdom/Ireland (Radiotimes)';
use XMLTV::PreferredMethod qw/allatonce/;
use Getopt::Long;
use HTML::Entities;
use Date::Manip; Date_Init('TZ=+0000');
use XMLTV::Config_file;
use XMLTV::Get_nice;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Memoize; XMLTV::Memoize::check_argv 'get_nice';
use XMLTV::DST;
use XMLTV::Usage <<END
$0: Get TV listings for United Kingdom and Ireland in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] [--offset N]
                     [--gui OPTION] [--quiet]
To list channels: $0 --list-channels
To show capabilities: $0 --capabilities
To show version: $0 --version
END
  ;
$XMLTV::Get_nice::Delay = 0; # since this is intended for grabbing
my $channel_list_uri = 'http://xmltv.radiotimes.com/xmltv/channels.dat';

sub configure();

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

GetOptions('help'      => \ my $opt_help,
       'configure'     => \ my $opt_configure,
       'config-file=s' => \ my $opt_config_file,
       'gui:s'         => \ my $opt_gui,
       'output=s'      => \ my $opt_output,
       'share=s'       => \ my $opt_share, # also undocumented
       'quiet'         => \ my $opt_quiet,
       'list-channels' => \ my $opt_list_channels,
       'days=s'        => \ my $opt_days,
       'offset=s'      => \ my $opt_offset,
      )
  or usage(0);

if ($opt_help) {
    usage(1);
}

# share/ directory for storing channel mapping files.  This next line
# is altered by processing through tv_grab_uk_rt.PL.  But we can use
# the current directory instead of share/tv_grab_uk for development.
#
# The 'source' file tv_grab_uk_rt.in has $SHARE_DIR undef, which means
# use the current directory.  In any case the directory can be
# overridden with the --share option (useful for testing).
#
my $SHARE_DIR='/usr/share/xmltv'; # by grab/uk_rt/tv_grab_uk_rt.PL
$SHARE_DIR = $opt_share if defined $opt_share;
my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_uk_rt" : '.';
(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s;

XMLTV::Ask::init($opt_gui);

# Stuff for the root <tv> element.
my %tv_credits = ( # 'source-info-url'     => "todo",
           'source-info-name'    => 'Radio Times',
           'generator-info-name' => 'XMLTV',
           'generator-info-url'  =>
           'http://membled.com/work/apps/xmltv/',
         );

# Tables to convert between Radio Times and XMLTV ids of channels.
# The way to access these is through the routines rt_to_xmltv() and
# xmltv_to_rt(), not directly.  Those will deal sensibly with a new RT
# channel that isn't mentioned in the file.
#
my (%rt_to_xmltv, %xmltv_to_rt, %extra_dn, %icon_urls);
my $line_num = 0;
foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) {
    ++ $line_num;
    next unless defined;
    my $where = "$CHANNEL_NAMES_FILE:$line_num";
    my @fields = split /\|/;
    die "$where: Wrong number of fields in XMLTV channel_ids file, aborting"
      if @fields < 2 or @fields > 4;

    my ($xmltv_id, $rt_id, $extra_dn, $icon_url) = @fields;

    if (not $opt_quiet) {
        warn "$where: RT ID '$rt_id' already seen in XMLTV channel_ids file\n"
          if defined $rt_to_xmltv{$rt_id};
    }
    $rt_to_xmltv{$rt_id} = $xmltv_id;

    if (not $opt_quiet) {
        warn "$where: XMLTV ID '$xmltv_id' already seen in XMLTV channel_ids file\n"
          if defined $xmltv_to_rt{$xmltv_id};
    }
    $xmltv_to_rt{$xmltv_id} = $rt_id;

    $extra_dn{$xmltv_id} = $extra_dn if defined $extra_dn;
    $icon_urls{$xmltv_id} = $icon_url if defined $icon_url;
}

# Keep output on STDERR preserving STDOUT for XML data
say( "\nAll data is the copyright of the Radio Times website
<http://www.radiotimes.com> and the use of this data
is restricted to personal use only.\n" ) if not $opt_quiet;

# Whatever we're doing, we need the available channels from the RT site.
my $channel_list = get_nice $channel_list_uri;
my (%channels, %seen_rt_id, %seen_name);
my @rt_chans = split /\n/, $channel_list;
my $num_rt_chans = scalar @rt_chans;
say( "Radio Times reports available listings for $num_rt_chans channels.\n")
  if not $opt_quiet;

my $chans_bar = new XMLTV::ProgressBar({name   => 'Retrieving channels',
                                        count  => $num_rt_chans,
                                        ETA    => 'linear', })
                                          if not $opt_quiet;
my $need_final_update = 0;

foreach (@rt_chans) {
    chomp;
    /^(\d+)\|(.+)/ or die "Bad line seen in RT channels.dat: $_";
    my ($rt_id, $name) = ($1, $2);
    if ($seen_rt_id{$rt_id}++) {
        die "RT channel ID '$rt_id' already seen in RT channels.dat, aborting";
    }
    if ($seen_name{$name}++) {
        if (not $opt_quiet) {
            warn "RT channel '$name' already seen in RT channels.dat\n";
        }
    }
    
    my $xmltv_id = $rt_to_xmltv{$rt_id};
    
    # If the current RT channel has a known XMLTV ID, check it against known bad
    # channels and skip it if required. If the channel does not have an 
    # XMLTV ID, create one and continue.
    if (defined $xmltv_id) {
        # Skip any RT entries which have been flagged as bad in channel_ids file
        if ( $extra_dn{ $rt_to_xmltv{$rt_id} } =~ /.*Do\ Not\ Use.*/ ) {
            if (not $opt_quiet) {
                warn "RT channel '$name' ($rt_id) flagged as bad, skipping\n";
            }
            $need_final_update = 1;
            next;
        }
    } else {
        # Handle new channels on RT site unknown to channel_ids
        if (not $opt_quiet) {
            warn "RT channel '$name' ($rt_id) unknown in XMLTV channel_ids file\n";
        }
        $xmltv_id = "C$rt_id.radiotimes.com";
    }

    my @names = ([ $name ]);
    my $icon_url = $icon_urls{$xmltv_id};
    my @icon = { 'src' => $icon_url } if $icon_url;
    for ($extra_dn{$xmltv_id}) { push @names, [ $_ ] if defined }
    if (@icon) {
        $channels{$xmltv_id} = { id => $xmltv_id,
			         rt_id => $rt_id,
			         'display-name' => \@names,
			         'icon' => \@icon };
    } else {
        $channels{$xmltv_id} = { id => $xmltv_id,
			         rt_id => $rt_id,
			         'display-name' => \@names };
    }
    
    # Update the progres bar by one increment
    if (defined $chans_bar) {
        $chans_bar->update();
    }
}

if (defined $chans_bar) {
    # Only update the progress bar to 100% if we need to
    $chans_bar->update($num_rt_chans) if $need_final_update;
    $chans_bar->finish();
    say( "\n" ) if not $opt_quiet;
}

if (not $opt_quiet) {
    foreach (keys %xmltv_to_rt) {
        # ignore channels flagged as bad in channel_ids
        next if $extra_dn{$_} =~ /.*Do\ Not\ Use.*/;
        warn "XMLTV channel '$_' ($xmltv_to_rt{$_}) not seen on RT site\n"
          if not exists $channels{$_};
    }
}

my %g_args = ();
if (defined $opt_output) {
    my $fh = new IO::File ">$opt_output";
    die "Cannot write to $opt_output" if not $fh;
    #    binmode $fh or die "cannot set binmode for output: $!";
    %g_args = (OUTPUT => $fh);
}

if ($opt_list_channels) {
    die "--list-channels can't be given with --configure, exiting.\n"
      if $opt_configure;
    my $writer = new XMLTV::Writer(%g_args, encoding => 'ISO-8859-1');
    $writer->start(\%tv_credits);
    foreach (sort keys %channels) {
	delete $channels{$_}{rt_id};
	$writer->write_channel($channels{$_});
    }
    $writer->end;
    exit;
}

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_uk_rt', $opt_quiet);

if ($opt_configure) {
    configure();
    exit;
}

# Ask the user which channels to download, and write $config_file.
#
# Uses global %channels hash.
#
# FIXME commonize with other grabbers.
#
sub configure() {
    #    local $Log::TraceMessages::On = 1;

    XMLTV::Config_file::check_no_overwrite($config_file);

    # FIXME need to make directory
    open(CONF, ">$config_file") or die "Cannot write to $config_file: $!";
    t 'channels: ' . d \%channels;

    my %chan_id_to_name;
    foreach my $chan_id (keys %channels) {
        $chan_id_to_name{$chan_id} =
            $channels{$chan_id}->{'display-name'}->[0]->[0];
    }

    my @chan_ids = sort {$chan_id_to_name{$a} cmp $chan_id_to_name{$b}}
        keys %chan_id_to_name;

    my @questions;
    foreach my $chan_id (@chan_ids) {
        push @questions, "Add channel ".$chan_id_to_name{$chan_id}."? ";
    }
    my @answers = ask_many_boolean(1, @questions);

    for (my $i=0; $i < $#chan_ids; $i++) {
        if ($answers[$i]) {
            print CONF "channel ".$chan_ids[$i]."\n";
        }
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
    exit();
}

# Grabbing.  Start by reading config file.
my @wanted_chs;
my $n = 0;
foreach (XMLTV::Config_file::read_lines $config_file) {
    ++$n;
    next if not defined;
    /^\s*channel\s+(\S+)\s*$/ or die "$config_file: $n: bad line $_\n";
    my $id = $1;
    if (not exists $channels{$id}) {
        warn "XMLTV channel '$id' mentioned in $config_file but is not available on RT site\n"
          if not $opt_quiet;
        next;
    }
    push @wanted_chs, $id;
}
#@wanted_chs = sort keys %channels;

my %d_args = ();
if (defined( $opt_days ) or defined( $opt_offset )) {
  $opt_offset = 0 unless defined $opt_offset;
  $opt_days = 15 unless defined $opt_days;

  $d_args{offset} = $opt_offset;
  $d_args{days} = $opt_days;
  $d_args{cutoff} = "000000";
}

my $writer = new XMLTV::Writer(%g_args, %d_args, encoding => 'ISO-8859-1');
$writer->start(\%tv_credits);
foreach (@wanted_chs) {
    my %h = %{$channels{$_}};
    delete $h{rt_id};
    $writer->write_channel(\%h);
}

my $num_req_chans = scalar @wanted_chs;

say("Downloading listings for $num_req_chans configured channels\n")
  if not $opt_quiet;

my $listings_bar = new XMLTV::ProgressBar({name   => 'Retrieving listings',
                                           count  => $num_req_chans,
                                           ETA    => 'linear', })
                                             if not $opt_quiet;

my %warned_wrong_num_fields; # give that warning once per channel file

# Reset check for final progress bar update
$need_final_update = 0;

foreach my $ch (@wanted_chs) {
    my $c = $channels{$ch};
    my $rt_id = $channels{$ch}->{rt_id}; die if not defined $rt_id;

    # Try to get the base timezone for this channel from its name.
    my $base_tz;
    if ($c->{'display-name'}->[0]->[0] =~ /\((UTC|GMT|CET)\)\s*$/) {
	$base_tz = $1;
    }
    for ($base_tz) { $_ = 'UTC' if not defined }

    my $uri = "http://xmltv.radiotimes.com/xmltv/$rt_id.dat";
    local $SIG{__DIE__} = sub { die "$uri: $_[0]" };
    local $SIG{__WARN__} = sub { warn "$uri: $_[0]" };
    my $page = get_nice $uri;

    # Tidy up HTML entities and bad characters.  The site seems to use
    # a mixture of Latin-1 and UTF-8, I'm not sure exactly.  We want
    # our output to be in Latin-1 but HTML::Entities decides to use
    # Unicode so we have to fiddle a few entities manually first.
    #
    for ($page) {
	s/&#8212;/--/g;
	s/&#8230;/.../g;
	decode_entities $_;
	tr/\207\211\200\224/\347\311\055\055/; # bad characters
	# Replace wierd punctuation (some wierd encoding?) prefixed 
	# by A-tilde (char 0303/C3/195)
	s/\303[\364\366]/"/g; # o-circumflex/o-tilde -> "
	s/\303[\306\346]/'/g; # ae-dipthong AE-dipthong -> '
	s/\303[\371\373]/-/g; # u-grave u-circumflex -> -
    }
    foreach (split /\n/, $page) {
	my @fields = split /\~/;
	if (@fields != 23) {
	    if (not $opt_quiet) {
	        warn "Wrong number of fields in line:\n$_\n"
	          unless $warned_wrong_num_fields{$ch}++;
            }
	    next;
	}
	foreach (@fields) { s/^\s+//; s/\s+$//; undef $_ if not length }
	my ($title, $sub_title, $episode, $year, $director, $cast,
	    $premiere, $film, $repeat, $subtitles, $widescreen,
	    $new_series, $deaf_signed, $black_and_white, $star_rating,
	    $certificate, $genre, $desc, $choice, $date, $start, $stop,
	    $duration_mins) = @fields;
	foreach ($premiere, $film, $repeat, $subtitles, $widescreen,
		 $new_series, $deaf_signed, $black_and_white, $choice) {
	    die "true/false value not defined" if not defined;
	    if ($_ eq 'true') { $_ = 1 }
	    elsif ($_ eq 'false') { $_ = 0 }
	    else { die "bad true/false value $_" }
	}

	warn "Ignoring sub-title $sub_title since episode also given\n"
	  if defined $sub_title and defined $episode and not $opt_quiet;
	$sub_title = $episode if defined $episode;

	if (not defined $title) {
	    warn("Missing title in: $_") if not $opt_quiet;
	    next;
        }

	# Roundabout the summer time changeover they include timezone
	# in the title.
	#
	my $explicit_tz = '';
	if ($title =~ s/^\((GMT|UTC|BST|UTC\+1)\)\s*//) {
	    $explicit_tz = $1;
	}

	my %p = (channel => $ch, title => [ [ $title ] ]);
        if (defined $sub_title && 
            ($sub_title =~ /^(\d+)\/(\d+)$/ ||
             $sub_title =~ /^(\d+)\/(\d+)\s+-\s+/))
        {
            my $episode = $1 - 1;
            my $episodes = $2;

            $p{'episode-num'} = [ [ " . ${episode}/${episodes} . ", "xmltv_ns" ] ];

            $sub_title =~ s/^(\d+)\/(\d+)(?:\s+-\s+)?//;

            undef $sub_title if $sub_title =~ /^\s*$/;
        }
	for ($sub_title) { $p{'sub-title'} = [ [ $_ ] ] if defined }
	for ($year) { $p{date} = $_ if defined }
	for ($director) { $p{credits}{director} = [ $_ ] if defined }
	if (defined $cast) {
	    my @cast;
	    if ($cast =~ tr/|//) {
		@cast = split /\|/, $cast;
		# Each bit is in the format 'part*actor' and it seems that
		# even when part is 'director' that is the name of a
		# character.
		#
		foreach (@cast) {
		    s/^.*[*]// or warn "Bad bit of cast list: $_" if not $opt_quiet;
		}
	    }
	    else {
		@cast = split /,/, $cast;
	    }
	    foreach (@cast) { s/^\s+//; s/\s+$// }
	    $p{credits}{actor} = \@cast;
	}
	$p{premiere} = [ '' ] if $premiere;
	push @{$p{category}}, [ 'Film', 'en' ] if $film;
	$p{'previously-shown'} = {} if $repeat;
	$p{subtitles} = [ { type => 'teletext' } ] if $subtitles;
	$p{video}{aspect} = '16:9' if $widescreen;
	$p{new} = 1 if $new_series;
	# $deaf_signed ignored for now
	$p{video}{colour} = 0 if $black_and_white;
	$p{'star-rating'} = [ "$star_rating/5" ] if defined $star_rating;
	$p{rating} = [ [ $certificate, 'BBFC' ] ] if defined $certificate;
	push @{$p{category}}, [ $genre, 'en' ] if defined $genre and not $film;
	for ($desc) {
	    if (defined) {
		s!</?[A-Za-z]+>!!g;
		$p{desc} = [ [ $_, 'en' ] ];
	    }
	}
	# $choice ignored for now

	# Date, start and stop time.
	my ($yyyy, $mm, $dd);
	for ($date) {
	    die "Missing date in $_" if not defined;
	    m!(\d\d)/(\d\d)/(\d{4})$! or die "Bad date $_";
	    ($dd, $mm, $yyyy) = ($1, $2, $3);
	}

	$p{start} = utc_offset "$yyyy$mm$dd$start $explicit_tz", $base_tz;
	$p{stop} = utc_offset "$yyyy$mm$dd$stop $explicit_tz", $base_tz;
	if (Date_Cmp($p{start}, $p{stop}) > 0) {
	    $p{stop} = utc_offset(DateCalc("$yyyy$mm$dd$stop $explicit_tz",
					   '+ 1 day'),
				  $base_tz);
	}
	# Ignore $duration_mins since it may not be reliable.

	$writer->write_programme(\%p);
    
    }
    
    # Update the progres bar by one increment
    if (defined $listings_bar) {
        $listings_bar->update();
    }
}

$writer->end;

if (defined $listings_bar) {
    # Only update the progress bar to 100% if we need to
    $listings_bar->update($num_req_chans) if $need_final_update;
    $listings_bar->finish();
    say( "\n" ) if not $opt_quiet;
}

# Keep output on STDERR preserving STDOUT for XML data
say( "Finished!\n" ) if not $opt_quiet;
