Codebase list bgoffice-dict-downloader / HEAD bgoffice-dict-download

Tree @HEAD (Download .tar.gz)

bgoffice-dict-download @HEADraw · history · blame


use strict;
use warnings;

use Getopt::Long;
use Pod::Usage;
use WWW::Mechanize;

my $opt_list;
my $opt_dict;
my $opt_output;
my $opt_base_url = '';

    'list' => \$opt_list,
    'dict=s'    => \$opt_dict,
    'output=s'  => \$opt_output,
) and not @ARGV or pod2usage();

our %dictionaries = (
    'bg-en_dual' => {
        list_title => "Dual Bulgarian-English dictinary",
        link_text  => "Dual Bg En Dictionary",
    'full-pack' => {
        list_title => 'All the dictionaries in one file',
        link_text  => 'Full Pack of Dictionaries',
    'thesaurus' => {
        list_title => 'Thesaurus',
        link_text  => 'Thesaurus',
    'polytechnical' => {
        list_title => 'Polytechnical English -> Bulgarian dictionary',
        link_text  => 'Polytechnical En Bg Dictionary',
    'dialect' => {
        list_title => 'Dialect dictionary',
        link_text  => 'Dialect dictionary',

our %template_codes = (
    'Bulgarian-English dual dictionary'   => 'bg-en_dual',
    'Dictionary of north-western dialect' => 'dialect',
    'Polytechnical dictionary'            => 'polytechnical',
    'Thesaurus'                           => 'thesaurus',

if ($opt_list) {
    pod2usage() if $opt_dict or $opt_output;

    printf( "%-15s - %s\n", $_, $dictionaries{$_}{list_title} )
        for sort keys %dictionaries;

    exit 0;

$opt_dict or pod2usage();

$opt_dict = $template_codes{$opt_dict}
    if $opt_dict and exists $template_codes{$opt_dict};

die "'$opt_dict' is not a known dictionary. Try --list.\n"
    unless exists $dictionaries{$opt_dict};

$opt_output ||= "$opt_dict.tar.bz2";

my $b = WWW::Mechanize->new();

# The main "files" page contains links to individual download pages, on which
# we look for the "direct link" link


use URI::Escape qw(uri_escape);

my $dict_name = $dictionaries{$opt_dict}{link_text};
my $dict_url = uri_escape($dict_name);
$b->follow_link( text_regex => qr/^$dict_name$/i );

    text_regex => qr/^\d+\.\d+$/,
    url_regex  => qr/$dict_url/i

$b->follow_link( text => "$opt_dict.tar.bz2" );

my $link = $b->find_link( url_regex => qr/\buse_mirror=/ );
die "'direct link' link not found on " . $b->uri . "\n" unless $link;

#warn $link->url;

exec( 'curl', '--output', $opt_output, '--location', $link->url );

=head1 NAME

bgoffice-dict-download - download dictionaries for bgoffice


    # list known dictionaries
    bgoffice-dict-download --list

    # download a dictionary
    bgoffice-dict-download --dict bg-en_dual

    # explicit output file
    bgoffice-dict-download --dict bg-en_dual --output=/some/where.tar.bz2


B<bgoffice-dict-download> is a small program to help download bgoffice
dictionaries off It browses the bgoffice project download area
and follows the download links.

The dictionary is downloaded in the current directory, unless the L</--output>
option is used.

=head1 OPTIONS


=item --list

Prints the list of the known dictionaries.

=item --dict I<dictionary name>

Specifies the dictionary to be downloaded. See L</--list> for the known
dictionaries. This option is mandatory, unless L</--list> is given.

=item --output I<file>

Saves the downloaded dictionary in the given file, instead of ./<dict>.tar.bz2




=item Copyright: (C) 2010 Damyan Ivanov L<>


Permission is granted to use this work, with or without modifications, provided
that this notice is retained. If we meet some day, and you think this stuff is
worth it, you can buy me a beer in return.