Codebase list bgoffice-dict-downloader / HEAD bgoffice-dict-download
HEAD

Tree @HEAD (Download .tar.gz)

bgoffice-dict-download @HEADraw · history · blame

#!/usr/bin/perl

use strict;
use warnings;


use Getopt::Long;
use Pod::Usage;
use WWW::Mechanize;

my $opt_list;
my $opt_dict;
my $opt_output;
my $opt_base_url = 'http://sourceforge.net/projects/bgoffice/files/';

GetOptions(
    'list' => \$opt_list,
    'dict=s'    => \$opt_dict,
    'output=s'  => \$opt_output,
) and not @ARGV or pod2usage();

our %dictionaries = (
    'bg-en_dual' => {
        list_title => "Dual Bulgarian-English dictinary",
        link_text  => "Dual Bg En Dictionary",
    },
    'full-pack' => {
        list_title => 'All the dictionaries in one file',
        link_text  => 'Full Pack of Dictionaries',
    },
    'thesaurus' => {
        list_title => 'Thesaurus',
        link_text  => 'Thesaurus',
    },
    'polytechnical' => {
        list_title => 'Polytechnical English -> Bulgarian dictionary',
        link_text  => 'Polytechnical En Bg Dictionary',
    },
    'dialect' => {
        list_title => 'Dialect dictionary',
        link_text  => 'Dialect dictionary',
    },
);

our %template_codes = (
    'Bulgarian-English dual dictionary'   => 'bg-en_dual',
    'Dictionary of north-western dialect' => 'dialect',
    'Polytechnical dictionary'            => 'polytechnical',
    'Thesaurus'                           => 'thesaurus',
);

if ($opt_list) {
    pod2usage() if $opt_dict or $opt_output;

    printf( "%-15s - %s\n", $_, $dictionaries{$_}{list_title} )
        for sort keys %dictionaries;

    exit 0;
}

$opt_dict or pod2usage();

$opt_dict = $template_codes{$opt_dict}
    if $opt_dict and exists $template_codes{$opt_dict};

die "'$opt_dict' is not a known dictionary. Try --list.\n"
    unless exists $dictionaries{$opt_dict};

$opt_output ||= "$opt_dict.tar.bz2";

my $b = WWW::Mechanize->new();


# The main "files" page contains links to individual download pages, on which
# we look for the "direct link" link

$b->get($opt_base_url);

use URI::Escape qw(uri_escape);

my $dict_name = $dictionaries{$opt_dict}{link_text};
my $dict_url = uri_escape($dict_name);
$b->follow_link( text_regex => qr/^$dict_name$/i );

$b->follow_link(
    text_regex => qr/^\d+\.\d+$/,
    url_regex  => qr/$dict_url/i
);

$b->follow_link( text => "$opt_dict.tar.bz2" );

my $link = $b->find_link( url_regex => qr/\buse_mirror=/ );
die "'direct link' link not found on " . $b->uri . "\n" unless $link;

#warn $link->url;

exec( 'curl', '--output', $opt_output, '--location', $link->url );

__END__
=head1 NAME

bgoffice-dict-download - download dictionaries for bgoffice

=head1 SYNOPSIS

    # list known dictionaries
    bgoffice-dict-download --list

    # download a dictionary
    bgoffice-dict-download --dict bg-en_dual

    # explicit output file
    bgoffice-dict-download --dict bg-en_dual --output=/some/where.tar.bz2

=head1 DESCRIPTION

B<bgoffice-dict-download> is a small program to help download bgoffice
dictionaries off sourceforge.net. It browses the bgoffice project download area
and follows the download links.

The dictionary is downloaded in the current directory, unless the L</--output>
option is used.

=head1 OPTIONS

=over

=item --list

Prints the list of the known dictionaries.

=item --dict I<dictionary name>

Specifies the dictionary to be downloaded. See L</--list> for the known
dictionaries. This option is mandatory, unless L</--list> is given.

=item --output I<file>

Saves the downloaded dictionary in the given file, instead of ./<dict>.tar.bz2

=back

=head1 COPYRIGHT AND LICENSE

=over

=item Copyright: (C) 2010 Damyan Ivanov L<dmn@debian.org>

=back

Permission is granted to use this work, with or without modifications, provided
that this notice is retained. If we meet some day, and you think this stuff is
worth it, you can buy me a beer in return.