#!/usr/bin/perl
# vim: set noexpandtab :
#
# mussort - a simple program for sorting music collections
# Copyright (C) Eskild Hustvedt 2007, 2008, 2009, 2010, 2011
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
use strict;
use warnings;
use utf8;
use File::Find;
use File::Basename qw(dirname basename);
use File::Path qw(rmtree);
use File::Copy;
use Getopt::Long;
use IPC::Open2;
use Cwd qw(realpath);
use Storable;
use constant {
true => 1,
false => 0,
# Verbosity levels
M_STANDARD => 1,
M_VERBOSE => 2,
M_VERYVERBOSE => 3,
M_DEBUG => 4,
# Directory types, used for getDirectoriesIn()
DIR_TYPE_PRIMARY => 1,
DIR_TYPE_SECONDARY => 2,
# Duplicate actions
DUPE_REPLACE => 1,
DUPE_NOREMOVE => 2,
DUPE_KEEP => 3,
};
our $VERSION = '0.3.1';
# Hash of which information sources we have
my %Has = (
'Audio::File' => false,
'Ogg::Vorbis::Header::PurePerl' => false,
'Ogg::Vorbis::Header' => false,
'id3v2' => false,
'id3info' => false,
'ogginfo' => false,
'metaflac' => false,
);
# Dir hash, used for the final directory cleanup
my %cleanTree;
# Dir preferance hash, used for case-insensitive searches
my %DirPreferance;
# The file information cache (as used by --cache)
my %cachedInfo;
# The directory information cache
my @dirInfoCache;
# The number of entries added to the cache since it was last saved.
# Used to write the cache file during runtime
my $cacheAdditions = 0;
# -- The following are altered through command-line parameters --
# Int, the action to perform when detecting duplicates
my $dupeAction = DUPE_KEEP;
# Bool, true if we should prefer Audio::File over the other programs
my $PreferAudioFile = false;
# Verbosity level (0-4)
my $verbosity = 1;
# Bool, true if we should allow special characters in file names
my $AllowSpecialChars = false;
# Bool, true if we should work in case-insensitive mode
my $DirInsensitive = false;
# Bool, true if we should work in case-insensitive consistent mode
my $DirConsistent = false;
# Bool, true if we should silently skip files (not outputting information about it)
my $SilentSkip = false;
# Bool, true if we should
my $CompilationDetection;
# Bool, true or a string if caching is enabled (string would be path to the cache file)
my $cacheFile = true;
# Bool, true if we should *read* from the cache (as opposed to only writing updated
# info to it)
my $readFromCache = true;
# Bool, true if we should add all directories to the clean process
my $addAllCleanTree = false;
# Bool, true if we just output what will be done, don't actually do anything
my $DryRunMove;
# This makes sure the children are slayed properly
$SIG{CHLD} = sub {
my $PID = wait;
return(1)
};
# --
# File information
# --
# Purpose: Get information about an ogg vorbis file using ogginfo
# Usage: my %Info = GetOggVorbisInfo(FILE);
sub GetOggVorbisInfo
{
my $InfoFile = shift;
$InfoFile = realpath($InfoFile);
my %Information = ();
local *STDERR;
open(STDERR,'>','/dev/null');
my $PID = open2(my $Child_OUT, my $Child_IN, 'ogginfo', $InfoFile) or FatalError("Unable to open2(): $!\n");
my $hadInfo = false;
while(<$Child_OUT>)
{
next if not m/=/;
chomp;
s/^\s+//g;
my $Opt = $_;
my $Val = $_;
$Opt =~ s/^\s*(.*)\s*=.*/$1/;
$Opt =~ tr[a-z][A-Z];
$Val =~ s/^.*=\s*(.*)\s*/$1/;
if($Val =~ /\S/)
{
if(not $Opt eq 'TRACKNUMBER')
{
$hadInfo = true;
}
$Information{$Opt} = $Val;
}
}
close($Child_OUT);
close($Child_IN);
$Information{TRACKNUMBER} =~ s#/.*##;
if ($hadInfo)
{
my %FileInfo;
$FileInfo{Title} = getTag(\%Information,'title');
$FileInfo{Band} = getTag(\%Information,'artist');
$FileInfo{Album} = getTag(\%Information,'album');
$FileInfo{Track} = getTag(\%Information,'track');
$FileInfo{SetNo} = getTag(\%Information,'setNo');
$FileInfo{Ext} = 'ogg';
return %FileInfo;
}
return;
}
# Purpose: Get information about an mp3 file
# Usage: my %Info = GetMP3Info(FILE);
sub GetMP3Info
{
my $InfoFile = shift;
my $type = shift;
my $Information;
my $hadInfo = false;
if ($type eq 'id3v2')
{
($hadInfo,$Information) = GetMP3Info_id3v2($InfoFile);
}
elsif($type eq 'id3info')
{
($hadInfo,$Information) = GetMP3Info_id3info($InfoFile);
}
else
{
die("GetMP3Info: unknown type: $type\n");
}
if ($hadInfo)
{
my %FileInfo;
$FileInfo{Title} = getTag($Information,'title');
$FileInfo{Band} = getTag($Information,'artist');
$FileInfo{Album} = getTag($Information,'album');
$FileInfo{Track} = getTag($Information,'track');
$FileInfo{SetNo} = getTag($Information,'setNo');
$FileInfo{Ext} = 'mp3';
return(%FileInfo);
}
return;
}
# Purpose: Get information about a flac file
# Usage: my %Info = GetFLACInfo(FILE);
sub GetFLACInfo
{
my $InfoFile = shift;
$InfoFile = realpath($InfoFile);
my %Information = ();
local *STDERR;
open(STDERR,'>','/dev/null');
my $PID = open2(my $Child_OUT, my $Child_IN, 'metaflac','--list', $InfoFile) or FatalError("Unable to open2(): $!\n");
my $hadInfo = false;
while(<$Child_OUT>)
{
next if not s/^\s*comment\[[^\]]+\]:\s*//;
chomp;
s/^\s+//g;
my $Opt = $_;
my $Val = $_;
$Opt =~ s/^\s*(.*)\s*=.*/$1/;
$Opt =~ tr[a-z][A-Z];
$Val =~ s/^.*=\s*(.*)\s*/$1/;
if($Val =~ /\S/)
{
if(not $Opt eq 'TRACKNUMBER')
{
$hadInfo = true;
}
$Information{$Opt} = $Val;
}
}
close($Child_OUT);
close($Child_IN);
$Information{TRACKNUMBER} =~ s#/.*##;
if ($hadInfo)
{
my %FileInfo;
$FileInfo{Title} = getTag(\%Information,'title');
$FileInfo{Band} = getTag(\%Information,'artist');
$FileInfo{Album} = getTag(\%Information,'album');
$FileInfo{Track} = getTag(\%Information,'track');
$FileInfo{SetNo} = getTag(\%Information,'setNo');
$FileInfo{Ext} = 'flac';
return %FileInfo;
}
return;
}
# Purpose: Get information about an mp3 file using id3v2
# Usage: my ($hasInfo, $info) = GetMP3Info_id3v2(FILE);
sub GetMP3Info_id3v2
{
my $InfoFile = shift;
my %Information = ();
my %AlternateInfo = ();
my $o = $ENV{LC_ALL};
$ENV{LC_ALL} = 'C';
my $PID = open2(my $Child_OUT, my $Child_IN, 'id3v2', '--list', $InfoFile) or FatalError("Unable to open2(): $!");
$ENV{LC_ALL} = $o;
my $hadInfo = false;
while(<$Child_OUT>)
{
if (/^(Title|Album|Comment)\s*:/)
{
chomp;
my ($first,$second,$clean);
if (/^Title/)
{
$first = 'Title';
$second = 'Artist';
}
elsif(/^Album/)
{
$first = 'Album';
$second = 'Year';
$clean = ',\s*Genre\s*:.*';
}
elsif(/^Comment/)
{
$first = 'Comment';
$second = 'Track';
}
my $f = $_;
my $s = $_;
if(not $f =~ s/^$first\s*:\s*//)
{
next;
}
$f =~ s/\s*$second:.*//g;
if(not $s =~ s/^$first.*$second\s*:\s*//)
{
next;
}
$s =~ s/,\s*Genre\s*:.*//g;
if ($f or $s and not $first eq 'Comment')
{
$hadInfo = true;
}
$Information{$first} = $f;
$Information{$second} = $s;
}
else
{
next if not /\):/;
chomp;
my $Opt = $_;
my $Val = $_;
$Opt =~ s/^(\w+)\s.*/$1/g;
$Val =~ s/^.+\):\s(.+)$/$1/g;
if($Val =~ /\S/)
{
if(not $Opt eq 'TRCK')
{
$hadInfo = true;
}
$Information{$Opt} = $Val;
}
}
}
close($Child_OUT);
close($Child_IN);
return($hadInfo,\%Information);
}
# Purpose: Get information about an mp3 file using id3info
# Usage: my ($hasInfo, $info) = GetMP3Info_id3info(FILE);
sub GetMP3Info_id3info
{
my $InfoFile = shift;
my %Information = ();
my $PID = open2(my $Child_OUT, my $Child_IN, 'id3info', $InfoFile) or FatalError("Unable to open2(): $!");
my $hadInfo = false;
while(<$Child_OUT>)
{
next if not /^==/;
chomp;
my $Opt = $_;
my $Val = $_;
$Opt =~ s/^===\s(\w+)\s.*/$1/g;
$Val =~ s/^.+\):\s(.+)$/$1/g;
if($Val =~ /\S/)
{
if(not $Opt =~ /^TRC?K$/)
{
$hadInfo = true;
}
$Information{$Opt} = $Val;
}
}
close($Child_OUT);
close($Child_IN);
return($hadInfo,\%Information);
}
# Purpose: Get information about an Ogg Vorbis file using
# Ogg::Vorbis::Header::PurePerl
# Usage: my %Info = GetInfoFromVorbisHeader(FILE);
sub GetInfoFromVorbisHeader
{
my $File = shift;
my $modType = shift;
my %FileInfo;
my %Information;
my $hadInfo = false;
eval {
local *STDERR;
# Ogg::Vorbis::Header(::PurePerl) outputs junk to STDERR we don't want,
# so temporarily redirect it to the black hole
open(STDERR,'>','/dev/null');
my $modName = 'Ogg::Vorbis::Header';
my $ov;
if ($modType eq 'PurePerl')
{
$modName .= '::PurePerl';
if(Ogg::Vorbis::Header::PurePerl->can('load'))
{
$ov = Ogg::Vorbis::Header::PurePerl->load($File);
}
else
{
$ov = Ogg::Vorbis::Header::PurePerl->new($File);
}
}
else
{
$ov = Ogg::Vorbis::Header->load($File);
}
foreach my $t (qw(title artist album tracknumber discnumber))
{
$Information{$t} = $ov->comment($t);
if(defined $Information{$t} and length $Information{$t} and $Information{$t} ne '1')
{
$hadInfo = true;
}
}
1;
} or do {
my $err = $@;
if ($Has{'ogginfo'} || $Has{'Audio::File'})
{
printv(M_DEBUG,"Ogg::Vorbis::Header::PurePerl crashed on $File: $err\n");
return;
}
else
{
die('Ogg::Vorbis::Header::PurePerl appears to have crashed for the file "'.$File.'". Please install either ogginfo from vorbis-tools or Audio::File. Error: '.$err);
}
};
if ($hadInfo)
{
my %FileInfo;
$FileInfo{Title} = getTag(\%Information,'title');
$FileInfo{Band} = getTag(\%Information,'artist');
$FileInfo{Album} = getTag(\%Information,'album');
$FileInfo{Track} = getTag(\%Information,'track');
$FileInfo{SetNo} = getTag(\%Information,'setNo');
$FileInfo{Ext} = 'ogg';
return %FileInfo;
}
return;
}
# Purpose: Get information about an audio file using Audio::File
# Usage: my %Info = GetInfoFromAudioFile(FILE);
sub GetInfoFromAudioFile
{
my $File = shift;
my %FileInfo;
if ($File =~ /mp3$/i)
{
$FileInfo{Ext} = 'mp3';
}
elsif($File =~ /ogg$/i)
{
$FileInfo{Ext} = 'ogg';
}
elsif($File =~ /flac$/i)
{
$FileInfo{Ext} = 'flac';
}
else
{
FatalError("Unknown filetype: $File\n");
}
eval {
local *STDERR;
# Audio::File outputs junk to STDERR we don't want, so temporarily redirect it
# to the black hole
open(STDERR,'>','/dev/null');
my $af = Audio::File->new($File);
eval { $FileInfo{Title} = CleanTag($af->tag->title) };
eval { $FileInfo{Band} = CleanTag($af->tag->artist) };
eval { $FileInfo{Album} = CleanTag($af->tag->album) };
eval { $FileInfo{Track} = CleanTag($af->tag->track) };
1;
} or do {
my $err = $@;
printv(M_DEBUG,"Audio::File crashed on $File: $err\n");
if (
$FileInfo{Ext} eq 'mp3' and
($Has{'id3info'} or $Has{'id3v2'})
)
{
return;
}
elsif($FileInfo{Ext} eq 'ogg' and
($Has{'ogginfo'} or $Has{'Ogg::Vorbis::Header::PurePerl'}))
{
return;
}
else
{
die('Audio::File appears to have crashed for the file "'.$File.'". See the DEPENDENCIES section of the mussort manpage for a list of other libraries and utilities you can install to avoid this problem. Error: '.$err);
}
return;
};
return %FileInfo;
}
# Purpose: Check that the tag supplied is valid
# Usage: bool = IsValidTagValue(VALUE);
# This is simply a reversed version of IsInvalidTagValue()
sub IsValidTagValue
{
return ! IsInvalidTagValue(@_);
}
# Purpose: Check that the tag supplied is valid
# Usage: bool = IsInvalidTagValue(VALUE);
# Returns false if it is valid, true if it isn't.
sub IsInvalidTagValue
{
my $value = shift;
if(not defined $value
or not length $value
or $value eq 'Unknown'
or $value eq '~')
{
return true;
}
return false;
}
# Purpose: Get information about a file
# Usage: $FileInfo = GetInfo(pathtofile);
sub GetInfo
{
my $File = shift;
my $usedCached = false;
my %FileInfo;
my $AudioFilePreferred = $PreferAudioFile;
my $used;
my $optimalQuality = 5;
my $quality = $optimalQuality;
# We only use Audio::File for OGGs. id3info appears to be faster
# for MP3s
if(my $info = getInfoFromCache($File))
{
printv(M_DEBUG,'Using cached information for '.$File."\n");
%FileInfo = %{$info};
if(%FileInfo)
{
$usedCached = true;
$used = 'cache';
}
else
{
printv(M_DEBUG,'Cached null information, ignoring');
}
}
if (not $usedCached)
{
printv(M_VERYVERBOSE,'Reading information about file: '.$File."\n");
my @try;
if ($File =~ /ogg$/i)
{
if ($Has{'Ogg::Vorbis::Header'})
{
push(@try,'Ogg::Vorbis::Header');
}
if ($Has{'Ogg::Vorbis::Header::PurePerl'})
{
push(@try,'Ogg::Vorbis::Header::PurePerl');
}
if ($Has{'ogginfo'})
{
push(@try,'ogg');
}
}
elsif($File =~ /mp3$/i)
{
if ($Has{'id3v2'})
{
push(@try,'id3v2');
}
if ($Has{'id3info'})
{
push(@try,'id3info');
}
}
elsif($File =~ /flac$/)
{
push(@try,'metaflac');
# Audio::File is faster than metaflac, so use it if possible
$AudioFilePreferred = 1;
}
else
{
FatalError("Unknown filetype: $File\n");
}
if ($Has{'Audio::File'})
{
if ($AudioFilePreferred)
{
unshift(@try,'Audio::File');
}
else
{
push(@try,'Audio::File');
}
}
foreach my $t (@try)
{
printv(M_DEBUG,'Trying method: '.$t."\n");
$used = $t;
$quality = $optimalQuality;
if ($t eq 'Audio::File')
{
%FileInfo = GetInfoFromAudioFile($File);
}
elsif($t eq 'Ogg::Vorbis::Header')
{
%FileInfo = GetInfoFromVorbisHeader($File);
}
elsif($t eq 'Ogg::Vorbis::Header::PurePerl')
{
%FileInfo = GetInfoFromVorbisHeader($File,'PurePerl');
}
elsif($t eq 'id3v2')
{
%FileInfo = GetMP3Info($File,'id3v2');
}
elsif($t eq 'id3info')
{
%FileInfo = GetMP3Info($File,'id3info');
}
elsif($t eq 'mp3')
{
%FileInfo = GetMP3Info($File);
}
elsif($t eq 'ogg')
{
%FileInfo = GetOggVorbisInfo($File);
}
elsif($t eq 'metaflac')
{
%FileInfo = GetFLACInfo($File);
}
my $discard;
if (!%FileInfo)
{
$discard ||= 'No information returned';
$quality = 0;
}
if(keys %FileInfo < 2)
{
$discard ||= 'Not enough information returned';
$quality = -1;
}
if(!IsValidTagValue($FileInfo{Title}) || $FileInfo{Title} !~ /[\w\d]/)
{
$discard ||= 'Title tag invalid';
$quality--;
}
if(!IsValidTagValue($FileInfo{Band}) || $FileInfo{Band} !~ /[\w\d]/)
{
$discard ||= 'Band tag invalid';
$quality--;
}
if( (defined($FileInfo{Title}) && $FileInfo{Title} eq '1' ) || (defined($FileInfo{Band}) && $FileInfo{Band} eq '1'))
{
$discard ||= 'Title or Band set to "1" (Ogg::Vorbis::Header workaround)';
$quality--;
}
if ($quality == $optimalQuality)
{
printv(M_DEBUG,'Quality of values from '.$t.' was excellent ('.$quality.'/'.$optimalQuality.') - using them'."\n");
last;
}
elsif($quality < 1)
{
printv(M_DEBUG,'Quality of values from '.$t.' was rubbish ('.$quality.'/'.$optimalQuality.'): '.$discard.". Will attempt another method if possible.\n");
}
else
{
printv(M_DEBUG,'Quality of values from '.$t.' was poor ('.$quality.'/'.$optimalQuality.'): '.$discard.". Will attempt another method if possible.\n");
}
}
}
printv(M_DEBUG,'Using values from '.$used."\n");
if ($quality == $optimalQuality && $used ne 'cache')
{
addToCache($File,\%FileInfo);
}
if ($CompilationDetection and defined $FileInfo{Album} and defined $FileInfo{Band})
{
if(not defined $CompilationDetection->{album}->{$FileInfo{Album}}->{$FileInfo{Band}})
{
$CompilationDetection->{album}->{$FileInfo{Album}}->{$FileInfo{Band}} = [];
}
push(@{$CompilationDetection->{album}->{$FileInfo{Album}}->{$FileInfo{Band}}},$File);
}
# If we couldn't extract a track then try to extract it from the filename instead
if(not defined $FileInfo{Track} or
not $FileInfo{Track} =~ /^\d+$/ or
$FileInfo{Track} eq '0'
)
{
my $Track = basename($File);
if ($Track =~ s/^(\d+)\D+.*$/$1/)
{
$FileInfo{Track} = $Track;
}
}
return(\%FileInfo);
}
# Purpose: Fetch tag values from extracted data
# Usage: value = getTag(infoHashRef, tagName);
sub getTag
{
my $info = shift;
my $tag = shift;
my %possibleTags = (
title => [ 'TIT2','TT2','TITLE','Title','title'],
artist => [ 'TPE1','TP1','ARTIST','Artist','artist' ],
album => [ 'TALB','TAL','ALBUM','Album', 'album' ],
track => [ 'TRCK','TRK','TRACKNUMBER', 'TRACK','Track', 'tracknumber' ],
setNo => [ 'TPA','DISCNUMBER', 'discnumber','TPOS' ],
);
if(not defined $possibleTags{$tag})
{
warn("Tried to fetch tag '$tag', but it wasn't found in possibleTags");
return;
}
my $content = 'Unknown';
foreach my $possible (@{$possibleTags{$tag}})
{
if(IsValidTagValue($info->{$possible}))
{
$content = $info->{$possible};
if ($tag ne 'setNo' && $tag ne 'track')
{
$content = CleanTag($content);
next if not IsValidTagValue($content);
}
last;
}
}
if ($tag eq 'track')
{
$content =~ s{/.*$}{}g;
}
elsif($tag eq 'setNo')
{
my $valid = false;
if ($content =~ m{^\d+/\d+$})
{
my $no = $content;
my $total = $content;
$no =~ s{/\d+$}{};
$total =~ s{^\d+/}{};
if ($no > 1 || $total > 1)
{
$valid = true;
$content = { total => $total, number => $no };
}
}
if(not $valid)
{
$content = {};
}
}
return $content;
}
# Purpose: Strip stuff we don't want in file/dir names
# Usage: my $NewVar = CleanTag(OldVar);
sub CleanTag
{
my $Name = shift;
# Make & into and
$Name =~ s/\&/and/;
# Don't allow whitespace
$Name =~ s/\s+/_/g;
# Remove these characters - if present
$Name =~ s/~//g;
$Name =~ s/('|"|\(|\))+//g;
$Name =~ s#/##g;
$Name =~ s/\?//g;
$Name =~ s/#//g;
$Name =~ s/\\//g;
# Ensure we don't have two identical _ or - following each other. That's ugly.
$Name =~ s/_+/_/g;
$Name =~ s/-+/-/g;
# Don't begin a file with .
$Name =~ s/^\.+//;
# Don't end or begin a file with _ or -
$Name =~ s/(_|-)+$//;
$Name =~ s/^(_|-)+//;
# If we have more than 3 . after each other, force only three
$Name =~ s/\.\.\.\.+/.../g;
# Clean up non-ascii characters
if(not $AllowSpecialChars) {
# These usually doesn't work, but having them here doesn't hurt
$Name =~ s/æ/ae/gi;
$Name =~ s/ø/oe/gi;
$Name =~ s/Ã¥/aa/gi;
# Now remove everything that doesn't match this regex
$Name =~ s/[^\w\s\.,-_]//g;
}
# Don't let it start with a dot
$Name =~ s/^\.+//;
# Don't allow _-_ - just use one -
$Name =~ s/_-_/-/;
# Finally, return the cleaned filename
return($Name);
}
# --
# Main application
# --
# Purpose: use() a module if it's installed
# Returns true if the use was successful, false otherwise
sub tryuse
{
my $mod = shift;
if(eval('use '.$mod.';1;'))
{
return true;
}
return;
}
# Purpose: Set the title of the xterm/screen
# Usage: setTermTitle(TITLE);
sub setTermTitle
{
return if not $verbosity > 0;
return if not defined $ENV{TERM};
my $title = shift;
if ($ENV{TERM} =~ /screen/)
{
print "\033k$title\033\\";
}
else
{
print "\033]0;$title\007";
}
}
# Purpose: Returns the mtime of a file
# Usage: $mtime = mtime(file);
# Note: It returns zero (not undef) if it can't read the mtime
sub mtime
{
my $file = shift;
my $mtime;
(undef,undef,undef,undef,undef,undef,undef,undef,undef,$mtime,undef,undef,undef) = stat($file);
if(not defined $mtime)
{
printv(M_DEBUG,'Failed to read mtime from file '.$file."\n");
return 0;
}
return $mtime;
}
# Purpose: Internal mkpath() that also modifies the directory cache
# Usage: intMkpath(/path/to/create);
sub intMkpath
{
my $fullpath = shift;
if (-d $fullpath)
{
return;
}
my @pathsToCreate;
$fullpath=~ s{/+$}{};
while(not -d $fullpath)
{
unshift(@pathsToCreate,$fullpath);
$fullpath = dirname($fullpath);
}
foreach my $dir (@pathsToCreate)
{
mkdir($dir) or die('Failed to mkdir('.$dir.'): '.$!);
my $parent = dirname($dir);
foreach my $type (DIR_TYPE_PRIMARY,DIR_TYPE_SECONDARY)
{
if(defined $dirInfoCache[$type]->{$parent})
{
push(@{$dirInfoCache[$type]->{$parent}},basename($dir));
@{$dirInfoCache[$type]->{$parent}} = sort(@{$dirInfoCache[$type]->{$parent}});
}
}
}
}
# Purpose: Check for a dependency
# Usage: depError(COMMAND);
sub depError
{
my $format = shift;
my $install = shift;
my $recommended = shift;
my $debPackages = {
id3info => 'libid3-dev',
id3v2 => 'id3v2',
ogginfo => 'vorbis-tools',
'Audio::File' => 'libaudio-file-perl',
'Ogg::Vorbis::Header::PurePerl' => 'libogg-vorbis-header-pureperl-perl',
'Ogg::Vorbis::Header' => 'libogg-vorbis-header-perl',
};
# Multi-dimensional hash mapping distro -> command -> package
my %DepToPackage = (
'/etc/apt/apt.conf.d/01ubuntu' => $debPackages,
'/etc/debian_version' => $debPackages,
'/etc/mandriva-release' => {
ogginfo => 'vorbis-tools',
id3info => 'id3lib',
id3v2 => 'id3v2',
},
'/etc/fedora-release' => {
ogginfo => 'vorbis-tools',
id3info => 'id3lib',
id3v2 => 'id3v2',
},
);
my $out = '';
my $depsMap = {};
foreach my $path (keys(%DepToPackage))
{
if (-e $path)
{
$depsMap = $DepToPackage{$path};
}
}
$out .= "You are missing a component for $format support.\n";
$out .= "Please install either of the following:\n";
foreach my $e (@{$install})
{
my $oe = $e;
if ($e eq $recommended)
{
$oe = '[RECOMMENDED] '.$oe;
}
if ($depsMap->{$e})
{
$out .= "$oe (from the $depsMap->{$e} package)\n";
}
else
{
if ($e =~ /::/)
{
$oe .= '(perl module)';
}
$out .= "$oe\n";
}
}
FatalError($out);
}
# Purpose: Output an error message
# Usage: Error(MESSAGE);
sub Error
{
warn "Error: $_[0]\n";
}
# Purpose: Output a fatal error message (causes die())
# Usage: FatalError(MESSAGE);
sub FatalError
{
die "Error: $_[0]\n";
}
# Purpose: Check if a directory is empty
# Usage: DirIsEmpty(PATH);
# Returns 1 if it is empty, 0 if it isn't.
sub DirIsEmpty
{
my $dir = shift;
return 0 if not $dir;
opendir(TESTDIR, $dir);
my @TestDir = readdir(TESTDIR);
closedir(TESTDIR);
if(not scalar @TestDir > 2)
{
return 1;
}
return 0;
}
# Purpose: Get information from cache if caching is enabled
# Usage: %info = getInfoFromCache(FILE);
sub getInfoFromCache
{
my $file = shift;
if(not $readFromCache)
{
return;
}
if(defined $cacheFile)
{
if(defined($cachedInfo{mtime}->{$file}))
{
if (mtime($file) <= $cachedInfo{mtime}->{$file})
{
return $cachedInfo{info}->{$file};
}
}
}
return;
}
# Purpose: Add information to the cache if caching is enabled
# Usage: addToCache($file,\%info);
sub addToCache
{
my $file = shift;
my $info = shift;
if(
defined $cachedInfo{mtime} &&
defined $cachedInfo{mtime}->{$file} &&
defined $cachedInfo{info} &&
defined $cachedInfo{info}->{$file} &&
(keys %{$cachedInfo{info}->{$file}} > 1)
)
{
if ($cachedInfo{mtime}->{$file} >= mtime($file))
{
return;
}
else
{
printv(M_DEBUG,'Re-evaluating cached information for '.$file."\n");
}
}
if(not (keys %{$info} > 1))
{
printv(M_DEBUG,'Refusing to add cached data for '.$file.': too little information'."\n");
return;
}
if(not defined $cachedInfo{info})
{
$cachedInfo{info} = {};
}
if(not defined $cachedInfo{mtime})
{
$cachedInfo{mtime} = {};
}
my $fileinfo = mtime($file);
if(not $fileinfo)
{
printv(M_VERYVERBOSE,'Failed to stat() '.$fileinfo.' - skipping adding it to the cache'."\n");
delete($cachedInfo{info}->{$file});
}
else
{
$cachedInfo{info}->{$file} = $info;
$cachedInfo{mtime}->{$file} = $fileinfo;
if ($cacheAdditions > 200)
{
eval
{
writeCache();
};
$cacheAdditions = 0;
}
else
{
$cacheAdditions++;
}
}
}
# Purpose: Remove cached information about a file
# Usage: removeFromCache($file);
sub removeFromCache
{
my $file = shift;
delete($cachedInfo{info}->{$file});
delete($cachedInfo{mtime}->{$file});
}
# Purpose: Rename a file in the cache
# Usage: cacheRename($oldName,$newName);
sub cacheRename
{
my $oldFile = shift;
if(not defined $cachedInfo{info} or not defined $cachedInfo{info}->{$oldFile})
{
return;
}
my $newFile = shift;
addToCache($newFile,$cachedInfo{info}->{$oldFile});
removeFromCache($oldFile);
}
# Purpose: Load cached information
# Usage: loadCache();
sub loadCache
{
my $cleaning = shift;
printv(M_DEBUG,'Caching is enabled'."\n");
if(not defined $ENV{HOME})
{
die('The environment variable HOME is not set. Unable to continue. Set HOME or use --no-cache'."\n");
}
$cacheFile = $ENV{HOME}.'/.mussort-cache';
if (-e $cacheFile)
{
my $iFile = $cacheFile;
$iFile =~ s/^$ENV{HOME}\/*/~/;
if ($readFromCache)
{
printv(M_STANDARD,'Using cache from '.$cacheFile."\n");
}
else
{
printv(M_STANDARD,'Refreshing cache at '.$cacheFile."\n");
}
my $info = retrieve($cacheFile) or FatalError('Failed to retrieve() cache from '.$cacheFile.': '.$!);
if ($info && ref($info) eq 'HASH')
{
printv(M_DEBUG,"Read cached information\n");
%cachedInfo = %{$info};
# Data in these older caches are in many cases invalid, so to be on the
# safe side we ignore it.
if(not defined $cachedInfo{cacheCompat})
{
printv(M_STANDARD,'Note: Cache format outdated, ignoring cached data'."\n");
%cachedInfo = ();
}
else
{
if ( (not defined($cachedInfo{lastMussortVersion}) or $cachedInfo{lastMussortVersion} ne $VERSION)
and
(not $cleaning) )
{
printv(M_STANDARD,'NOTICE: You appear to have recently upgraded mussort, you may want to run "mussort --cleancache"'."\n");
printv(M_STANDARD,' This message will only be displayed once.'."\n");
}
# If the cache file is over 25 MiB, display a warning
if(not $cleaning and -s $cacheFile > 26214400)
{
my $size = -s $cacheFile / 1024 / 1024;
printv(M_STANDARD,'NOTICE: Your cache file is unreasonably large ('.$size.' MiB).'."\n");
printv(M_STANDARD,' You may want to run "mussort --cleancache" to reduce the size'."\n");
}
$cachedInfo{cacheCompat} ||= 1;
if ($cachedInfo{cacheCompat} > 1)
{
FatalError('The mussort cache file is incompatible with this version of mussort'."\n".
'Upgrade to '.$cachedInfo{lastMussortVersion}.' or use --no-cache (or remove the cache file)'."\n");
}
}
return true;
}
elsif(ref($info))
{
FatalError('retrieve() returned a useless reference of type: '.ref($info).' - try --no-cache or remove '.$cacheFile."\n");
}
return false;
}
else
{
printv(M_STANDARD,'Will create cache file at '.$cacheFile."\n");
return false;
}
}
# Purpose: Write the cache if there is data in it
# Usage: writeCache();
sub writeCache
{
my $ignoreError = shift;
if(defined($cachedInfo{info}) && keys(%{$cachedInfo{info}}))
{
my $doneLevel = M_VERBOSE;
my $err;
if (-e $cacheFile)
{
printv(M_VERBOSE,'Writing updated cache to '.$cacheFile.'...');
}
else
{
$doneLevel = M_STANDARD;
printv(M_STANDARD,'Writing cache file '.$cacheFile.'...');
}
$cachedInfo{lastMussortVersion} = $VERSION;
$cachedInfo{cacheFormat} = 1;
$cachedInfo{cacheCompat} = 1;
store(\%cachedInfo,$cacheFile) or $err = 1;
if ($err)
{
if(not $ignoreError)
{
printv($doneLevel,'failed. ');
FatalError('Failed to store() cache in '.$cacheFile.': '.$err);
}
else
{
printv($doneLevel,'failed ('.$err.')'."\n");
}
}
else
{
printv($doneLevel,'done'."\n");
}
}
}
# Purpose: Retrieve a generated filename or dirname of at most 254 characters
# Usage: name = GetNameOfLength(\@names, JOINCHAR,EXT);
# \@names is an arrayref containing the name components
# JOINCHAR is the character to join the contents of \@names with
# EXT is the extension (sans the .) to add, can be left undef for no extension
sub GetNameOfLength
{
# The names arrayref
my $names = shift;
# The join character
my $joiner = shift;
# The extension, if any
my $extension = shift;
# Generate a name
my $NewName;
if ($extension)
{
$NewName = join($joiner,@{$names}).'.'.$extension;
}
else
{
$NewName = join($joiner,@{$names});
}
# Keep track of the number of times through the loop
my $loopC = 0;
# The maximum length the bare string (sans extension) can be
my $maxBareLength;
if ($extension)
{
# 253 because the . in the Ext takes one character (254-1)
$maxBareLength = 253-length($extension);
}
else
{
$maxBareLength = 254;
}
# Generate a string of the proper length
while(length($NewName) > $maxBareLength)
{
# Figure out how many characters we need to get
my $need = length($NewName)-$maxBareLength;
# The largest component of the names-array
my $largestI;
# The length of the above component string
my $largestLen = 0;
# Loop through each component to figure out which is the largest
for my $i(0..@{$names})
{
next if ! defined($names->[$i]);
if (length($names->[$i]) > $largestLen)
{
$largestI = $i;
$largestLen = length($names->[$i]);
}
}
# Figure out how many characters we need to remove, the +3 is for the three dots
# we add when the component is too long
my $reclaim = $need+3;
# During the first 10 loops reclaim at most 100 characters from a single string
if ($reclaim > 100 && $loopC < 10)
{
printv(M_DEBUG,'GetNameOfLength: Need '.$need.' characters, setting current value to 100'."\n");
$reclaim = 100;
}
printv(M_DEBUG,'GetNameOfLength: Need to reclaim '.$reclaim.' characters from string of '.$largestLen." characters\n");
# Find the index to cut at
my $index = $largestLen-$reclaim;
printv(M_DEBUG,'GetNameOfLength: Reclaiming at index '.$index."\n");
# Cut the string
$names->[$largestI] = substr($names->[$largestI],0,$index).'...';
# Generate a new filename
if ($extension)
{
$NewName = join($joiner,@{$names}).'.'.$extension;
}
else
{
$NewName = join($joiner,@{$names});
}
# Bump the loop counter
$loopC++;
# Abort if we've tried 50 times
if ($loopC > 50)
{
FatalError('Failed to generate filename. Ended up with "'.$NewName.'", but kept looping to generate a better name.');
}
}
return $NewName;
}
# Purpose: Get the new filename for the file
# Usage: my $NewFileName = GetNewFileName(InfoHash);
sub GetNewFileName
{
my $Info = shift;
my $Compilation = shift;
if (IsInvalidTagValue($Info->{Title}) or IsInvalidTagValue($Info->{Band}))
{
return;
}
my @NameComponents;
push(@NameComponents, $Info->{Band},$Info->{Title});
if(not $Info->{Track} eq 'Unknown' and not $Info->{Track} =~ /\D/)
{
if(not $Info->{Track} =~ /\d\d/)
{
$Info->{Track} = "0$Info->{Track}";
}
# Disallow 00/0
if($Info->{Track} > 0 )
{
unshift(@NameComponents,$Info->{Track});
}
}
if (
ref($Info->{SetNo}) &&
defined $Info->{SetNo}->{total} &&
$Info->{SetNo}->{total} > 1 &&
$Info->{SetNo}->{number} != 0
)
{
unshift(@NameComponents,'CD'.$Info->{SetNo}->{number});
}
my $NewName = GetNameOfLength(\@NameComponents,'-',$Info->{Ext});
return($NewName);
}
# Purpose: Get a list of directories in a directory - possibly cached
# Usage: array = getDirectoriesIn(directory,type);
sub getDirectoriesIn
{
my $dir = shift;
my $type = shift;
if(not defined $dir or not defined $type)
{
die;
}
if(not defined $dirInfoCache[$type])
{
$dirInfoCache[$type] = {};
}
if(defined $dirInfoCache[$type]->{$dir})
{
return @{$dirInfoCache[$type]->{$dir}};
}
if(not $type eq DIR_TYPE_PRIMARY)
{
$dirInfoCache[$type] = {};
}
opendir(my $dirHandle,$dir);
my @dirs;
foreach(readdir($dirHandle))
{
next if not -d $_;
push(@dirs,$_);
}
@dirs = sort(@dirs);
$dirInfoCache[$type]->{$dir} = \@dirs;
return @dirs;
}
# Purpose: Get the new directory path for the file
# Usage: my $NewDirName = GetNewDirName(InfoHash);
sub GetNewDirName
{
my $Info = shift;
my $BaseDir = shift;
my $Compilation = shift;
if(IsInvalidTagValue($Info->{Band}))
{
return;
}
my $FirstDir;
my $SecondDir;
if ($Compilation)
{
if(IsInvalidTagValue($Info->{Album}))
{
return;
}
$FirstDir = $Info->{Album};
$SecondDir = '';
}
else
{
$FirstDir = $Info->{Band};
$SecondDir = $Info->{Album};
}
if(defined($FirstDir) && length($FirstDir))
{
$FirstDir = GetNameOfLength([ $FirstDir ],'',undef);
}
if(defined($SecondDir) && length($SecondDir))
{
$SecondDir = GetNameOfLength([ $SecondDir ],'',undef);
}
if($DirInsensitive)
{
($FirstDir,$SecondDir) = GetNewDirName_insensitive($FirstDir,$SecondDir,$BaseDir);
}
elsif($DirConsistent)
{
($FirstDir,$SecondDir) = GetNewDirName_consistent($FirstDir,$SecondDir);
}
if(defined $SecondDir and length $SecondDir and not $SecondDir eq 'Unknown')
{
return("$BaseDir/$FirstDir/$SecondDir/");
}
else
{
return("$BaseDir/$FirstDir/");
}
}
# Purpose: Retrieves case-insensitive directory names
# Usage: ($first,$second) = GetNewDirName_insensitive($first,$second,$base);
sub GetNewDirName_insensitive
{
my($FirstDir,$SecondDir,$BaseDir) = @_;
# Get lowercased and fixed name
my $lowerFirst = lc($FirstDir);
$lowerFirst =~ s/[\.\,\&]//g;
my $lowerSecond = lc($SecondDir);
$lowerSecond =~ s/[\.\,\&]//g;
if($DirPreferance{$lowerFirst})
{
$FirstDir = $DirPreferance{$lowerFirst};
if($DirPreferance{$lowerSecond})
{
$SecondDir = $DirPreferance{$lowerSecond};
}
}
else
{
$FirstDir = getSingleInsensitiveDirName($FirstDir,$BaseDir,DIR_TYPE_PRIMARY);
if(defined $SecondDir and length $SecondDir and -d $BaseDir . '/' . $FirstDir)
{
$SecondDir = getSingleInsensitiveDirName($SecondDir,$BaseDir.'/'.$FirstDir,DIR_TYPE_SECONDARY);
}
$DirPreferance{$lowerFirst} = $FirstDir;
if(defined $lowerSecond and length $lowerSecond)
{
$DirPreferance{$lowerSecond} = $SecondDir;
}
}
return ($FirstDir,$SecondDir);
}
# Purpose: Retrieves a single consistent directory name
# Usage: ($dir) = getSingleConsistentDirName($dir,$baseDir,$dirType);
sub getSingleInsensitiveDirName
{
my ($dir,$base,$type) = @_;
if (-d $base)
{
my $name;
my $alternate;
my $lower = lc($dir);
my @dirList = getDirectoriesIn($base,$type);
foreach my $try (@dirList)
{
my $lTry = lc($try);
if ($lTry eq $lower)
{
$name = $try;
$alternate = undef;
last;
}
elsif(not $alternate)
{
my $testDir = $lower;
$testDir =~ s/(-|_)+//g;
my $testTry = $lTry;
$testTry =~ s/(-|_)+//g;
if ($testDir eq $testTry)
{
$alternate = $try;
}
}
}
if ($name)
{
return $name;
}
elsif($alternate)
{
return $alternate;
}
}
return $dir;
}
# Purpose: Retrieves consistent directory names
# Usage: ($first,$second) = GetNewDirName_consistent($first,$second);
sub GetNewDirName_consistent
{
my($FirstDir,$SecondDir) = @_;
$FirstDir = getSingleConsistentDirName($FirstDir);
$SecondDir = getSingleConsistentDirName($SecondDir);
return($FirstDir,$SecondDir);
}
# Purpose: Retrieves a single consistent directory name
# Usage: ($dir) = getSingleConsistentDirName($dir);
sub getSingleConsistentDirName
{
my $dir = shift;
# Don't process undef
if(not defined $dir)
{
return $dir;
}
$dir = lc($dir);
$dir = getSingleConsistentEntity($dir,qr/[\s_]/o,'_',2);
if ($dir =~ /\./)
{
$dir = getSingleConsistentEntity($dir,qr/[\.]/o,'.',0);
}
no warnings; # If not perl will spew warnings about the below qw()
foreach my $thing (qw(- : ,))
{
if ($dir =~ /$thing/)
{
$dir = getSingleConsistentEntity($dir,qr/$thing/,$thing,2);
}
}
# Finally, uppercase the first char and return
$dir = ucfirst($dir);
return $dir;
}
# Purpose: Get a single consistently named entity according to supplied rules
# Usage: entity = getSingleConsistentEntity(name, regex, joinwith, ucthreshold);
sub getSingleConsistentEntity
{
my $name = shift;
my $splitRegex = shift;
my $joinWith = shift;
my $ucThreshold = shift;
my @entries = split($splitRegex,$name);
my @final;
foreach my $e(@entries)
{
if (length($e) > $ucThreshold)
{
# This is far from an exhaustive list, but having these as lowercase
# usually makes a lot of sense, and they are very common in names.
if ($e ne 'and' && $e ne 'the')
{
$e = ucfirst($e);
}
}
push(@final,$e);
}
return join($joinWith,@final);
}
# Purpose: Do the actual renaming
# Usage: RenameFile(Original name, New Name, BaseDir);
sub RenameFile
{
my $OrigFile = shift;
my $NewFile = shift;
my $BaseDir = shift;
# Get some prettier names to display
my $OutputOldName = $OrigFile;
my $OutputNewName = $NewFile;
$OutputOldName =~ s/^\Q$BaseDir\E\/*//;
$OutputOldName =~ s#/+#/#g;
$OutputNewName =~ s/^\Q$BaseDir\E\/*//;
$OutputNewName =~ s#/+#/#g;
my $Directory = dirname($NewFile);
if(($OutputOldName eq $OutputNewName) or ($NewFile eq $OrigFile))
{
return;
}
# If it exists and isn't a directory then just return
if(-e $Directory and not -d $Directory)
{
return;
}
# If the length of the filename is less than 7 just return
if(length(basename($NewFile)) < 7)
{
return;
}
# Create the dir
eval {
intMkpath(dirname($NewFile));
1;
}
or do
{
FatalError('Failed to create path' . dirname($NewFile) . ": $@\nWas processing $OrigFile");
};
# Check if the file already exists
if(-e $NewFile)
{
if ($dupeAction == DUPE_REPLACE)
{
# Check which file is the largest
if(-s realpath($NewFile) > -s realpath($OrigFile))
{
printv(M_STANDARD,"Remove: $OrigFile\n");
unlink($OrigFile);
$cleanTree{dirname($OrigFile)} = true;
return;
}
}
elsif($dupeAction == DUPE_NOREMOVE)
{
return;
}
elsif($dupeAction == DUPE_KEEP)
{
my $ext = $NewFile;
my $name = $NewFile;
$ext =~ s/.*\.([^\.]+)$/$1/;
$name =~ s/^(.*)\.[^\.]+$/$1/;
my $no = 0;
my $nextName;
while((not defined $nextName) or (-e $nextName))
{
$no++;
$nextName = $name.'-'.$no.'.'.$ext;
}
$NewFile = $nextName;
}
else
{
die('Unknown dupeAction: '.$dupeAction);
}
}
# Output info and do the actual renaming
printv(M_STANDARD,"Rename: $OutputOldName -> $OutputNewName\n");
unless(defined $DryRunMove)
{
move($OrigFile,$NewFile) or FatalError("Unable to move $OrigFile to $NewFile: $!");
cacheRename($OrigFile,$NewFile);
$cleanTree{dirname($OrigFile)} = true;
}
}
# Purpose: Queue a file for renaming
# Usage: QueueRename(Original name, New Name, BaseDir);
sub QueueRename
{
my $OrigFile = shift;
my $NewFile = shift;
my $BaseDir = shift;
return if ($OrigFile eq $NewFile);
$CompilationDetection->{fileMap}{$OrigFile} = {
newFile => $NewFile,
baseDir => $BaseDir,
};
}
# Purpose: Remove the supplied directory and up to two directories below it in the tree.
# Usage: RemoveIfEmpty(DIR);
sub RemoveIfEmpty
{
return if $dupeAction == DUPE_NOREMOVE;
my $Dir = shift;
my $SecondDir = dirname($Dir);
my $ThirdDir = dirname($SecondDir);
my $Removed;
foreach($Dir,$SecondDir,$ThirdDir)
{
if(not -d $_ or not DirIsEmpty($_))
{
last;
}
rmdir($_) or Error "Unable to remove $Dir: $!";
$Removed = $_;
}
if($Removed)
{
printv(M_STANDARD,"Removed empty directory: $Removed\n");
}
}
# Purpose: Process a file
# Usage: ProcessFile(FILE);
sub ProcessFile
{
my $File = shift;
my $BaseDir = shift;
if (-d $File)
{
printv(M_VERBOSE,'Searching through directory '.$File."\n");
if ($addAllCleanTree)
{
$cleanTree{$File} = true;
}
return;
}
return if not $File =~ /\.(ogg|mp3|flac)$/gi;
# It might be a dangling symlink, if it is then just silently skip it
return if not -e $File;
my($NewName,$NewDir) = getNewFilePath($File,$BaseDir);
return if not defined $NewName or not defined $NewDir;
if ($CompilationDetection)
{
QueueRename($File,"$NewDir/$NewName",$BaseDir);
}
else
{
RenameFile($File,"$NewDir/$NewName",$BaseDir);
}
}
# Purpose: Get the new dir path and filenames for a file
# Usage: my($NewName,$NewDir) = getNewFilePath($File,$BaseDir,Compilation?);
sub getNewFilePath
{
my($File,$BaseDir,$Compilation) = @_;
# Get info
my $FileInfo = GetInfo($File);
# Build a new filename for the music file
my $NewName = GetNewFileName($FileInfo,$Compilation);
my $NewDir = GetNewDirName($FileInfo,$BaseDir,$Compilation);
if(not defined($NewName))
{
if(not $SilentSkip)
{
printv(M_STANDARD,"Skipped: $File\n");
}
return;
}
my $DirInfo = dirname($File);
$DirInfo =~ s/^\Q$BaseDir\E//;
$NewDir = $NewDir ? $NewDir : dirname($File);
return($NewName,$NewDir);
}
# Purpose: Process pending compilation actions in $CompilationDetection
# Usage: ProcessCompilation();
sub ProcessCompilation
{
my $BaseDir = shift;
my $compilationAlbums = [];
my @compilationRename;
printv(M_VERBOSE,'Detecting compilation albums...');
foreach my $album (keys(%{$CompilationDetection->{album}}))
{
next if IsInvalidTagValue($album);
my $thisAlbum = $CompilationDetection->{album}->{$album};
if (not keys(%{$thisAlbum}) > 2)
{
next;
}
foreach my $artist (keys(%{$thisAlbum}))
{
if(@{$thisAlbum->{$artist}} > 2)
{
next;
}
}
my @files;
foreach my $artist (keys %{$thisAlbum})
{
foreach my $file (@{$thisAlbum->{$artist}})
{
push(@files,$file);
if ($CompilationDetection->{fileMap} and $CompilationDetection->{fileMap}->{$file})
{
delete($CompilationDetection->{fileMap}->{$file});
}
}
}
push(@compilationRename,{
files => \@files,
album => $album,
});
}
printv(M_VERBOSE,"done\n");
foreach my $compilationRef (@compilationRename)
{
my $compilation = $compilationRef->{files};
printv(M_STANDARD,"Detected compilation album: $compilationRef->{album}\n");
foreach my $File (@{$compilation})
{
# Build a new filename for the music file
my($NewName,$NewDir) = getNewFilePath($File,$BaseDir,true);
next if not defined $NewDir or not defined $NewName;
RenameFile($File,$NewDir.'/'.$NewName,$BaseDir);
}
}
# Finally, rename those left in fileMap
if ($CompilationDetection->{fileMap})
{
foreach my $file (sort keys %{$CompilationDetection->{fileMap}})
{
RenameFile($file,
$CompilationDetection->{fileMap}->{$file}->{newFile},
$CompilationDetection->{fileMap}->{$file}->{baseDir},
);
}
}
# Reset it for the next dir
$CompilationDetection = {};
}
# Purpose: Sort files
# Usage: sort sortFiles @_
sub sortFiles
{
if (-d $a and not -d $b)
{
return 1;
}
elsif(-d $b and not -d $a)
{
return -1;
}
my $sA = lc($a);
my $sB = lc($b);
return $sA cmp $sB;
}
# Purpose: Traverse a tree
# Usage: ProcessTree(DIR);
sub ProcessTree
{
my $Dir = shift;
setTermTitle('mussort ['.$Dir.']');
$Dir = realpath($Dir);
if(not -e $Dir)
{
FatalError($Dir.': does not exist');
}
elsif(not -d $Dir or not -r $Dir)
{
FatalError($Dir.': is not a directory or is not readable');
}
printv(M_STANDARD,"Processing: $Dir\n");
find({
wanted => sub { ProcessFile($File::Find::name,$Dir); },
preprocess => sub {
my @files;
foreach my $f (sort sortFiles @_)
{
if ($f eq '.git')
{
next;
}
push(@files,$f);
}
return @files;
},
},
$Dir
);
if ($CompilationDetection)
{
ProcessCompilation($Dir);
}
printv(M_VERBOSE,'Looking for empty directories that should be removed...'."\n");
foreach my $cleanDir (sort keys %cleanTree)
{
RemoveIfEmpty($cleanDir);
}
return(1);
}
# Purpose: Output information useful for debugging
# Usage: debugInfo();
sub debugInfo
{
eval('use Digest::MD5; use IPC::Open3;');
my $format = '%-30s: %s'."\n";
printf($format,'mussort version',$VERSION);
my $md5 = Digest::MD5->new();
my $loc = realpath($0);
open(my $f,'<',$loc);
$md5->addfile($f);
my $digest = $md5->hexdigest;
close($f);
printf($format,'MD5:',$digest);
print "\nDependencies/library versions:\n";
# Get ogginfo/i3info versions
my($id3ver,$id3v2ver,$oggver,$oggPerlVer,$oggPurePerlVer,$audioFileVer,$metaflacver) = ('missing','missing','missing','missing','missing','missing','missing');
if(InPath('id3info'))
{
open3(my $in, my $out,my $err, 'id3info','-V');
$id3ver = <$out>;
$id3ver =~ s/^\D+\d?\D+//;
chomp($id3ver);
}
if(InPath('ogginfo'))
{
# --meh isn't a valid command-line parameter, but older versions doesn't have -V,
# but to an invalid parameter (--meh) it will give us a version number we can regex
# out
open3(my $in, my $out, my $err, 'ogginfo','--meh');
local $/ = undef;
$oggver = <$out>;
$oggver =~ s/^\D+//;
$oggver =~ s/\n.*//g;
chomp($oggver);
}
if(InPath('id3v2'))
{
open3(my $in, my $out,my $err, 'id3v2','--version');
$id3v2ver = <$out>;
$id3v2ver =~ s/^\s*id3v2\s*//;
chomp($id3v2ver);
}
if(InPath('metaflac'))
{
open3(my $in, my $out,my $err, 'metaflac','--version');
$metaflacver = <$out>;
$metaflacver =~ s/^\s*metaflac\s*//;
chomp($metaflacver);
}
if(tryuse('Ogg::Vorbis::Header::PurePerl'))
{
$oggPurePerlVer = $Ogg::Vorbis::Header::PurePerl::VERSION;
}
if(tryuse('Ogg::Vorbis::Header'))
{
$oggPerlVer = $Ogg::Vorbis::Header::VERSION;
# ::Header whines if we don't call Inline::init()
eval { Inline::init(); };
}
if(tryuse('Audio::File'))
{
$audioFileVer = $Audio::File::VERSION;
}
printf($format,'id3info',$id3ver);
printf($format,'id3v2',$id3v2ver);
printf($format,'ogginfo',$oggver);
printf($format,'metaflac',$metaflacver);
printf($format,'Ogg::Vorbis::Header',$oggPerlVer);
printf($format,'Ogg::Vorbis::Header::PurePerl',$oggPurePerlVer);
printf($format,'Audio::File',$audioFileVer);
if ($audioFileVer eq 'missing' && $metaflacver eq 'missing')
{
print "Both Audio::File and metaflac are missing, FLAC support will be unavailable\n";
}
print "\nFormat support:\n";
my($mp3,$ogg,$flac) = ('no','no','no');
if ($audioFileVer ne 'missing')
{
($mp3,$ogg,$flac) = ('yes','yes','yes');
}
if ($id3ver ne 'missing' || $id3v2ver ne 'missing')
{
$mp3 = 'yes';
}
if ($oggver ne 'missing' || $oggPerlVer ne 'missing' || $oggPurePerlVer ne 'missing')
{
$ogg = 'yes';
}
if ($metaflacver ne 'missing')
{
$flac = 'yes';
}
printf($format,'MP3',$mp3);
printf($format,'OGG Vorbis',$ogg);
printf($format,'FLAC',$flac);
if ($mp3 eq 'no' || $ogg eq 'no' || $flac eq 'no')
{
print "Install missing dependencies to enable additional format support.\n";
}
exit(0);
}
# Purpose: Print formatted --help output
# Usage: PrintHelp('-shortoption', '--longoption', 'description');
# Description will be reformatted to fit within a normal terminal
sub PrintHelp
{
# The short option
my $short = shift,
# The long option
my $long = shift;
# The description
my $desc = shift;
# The generated description that will be printed in the end
my $GeneratedDesc;
# The current line of the description
my $currdesc = '';
# The maximum length any line can be
my $maxlen = 80;
# The length the options take up
my $optionlen = 20;
# The length the short option take up
my $shortlen = 4;
if(length($short) > 4)
{
$shortlen = length($short);
}
# Check if the short/long are LONGER than optionlen, if so, we need
# to do some additional magic to take up only $maxlen.
# The +2 here is because we always add some space between them, no matter what
if (($shortlen + length($long) + 2) > $optionlen)
{
$optionlen = $shortlen + length($long) + 2;
}
# Split the description into lines
foreach my $part (split(/ /,$desc))
{
if(defined $GeneratedDesc)
{
if ((length($currdesc) + length($part) + 1 + $optionlen) > $maxlen)
{
$GeneratedDesc .= "\n";
$currdesc = '';
}
else
{
$currdesc .= ' ';
$GeneratedDesc .= ' ';
}
}
$currdesc .= $part;
$GeneratedDesc .= $part;
}
# Something went wrong
die('Option mismatch') if not $GeneratedDesc;
# Print it all
foreach my $description (split(/\n/,$GeneratedDesc))
{
printf "%-4s %-15s %s\n", $short,$long,$description;
# Set short and long to '' to ensure we don't print the options twice
$short = '';$long = '';
}
# Succeed
return true;
}
# Purpose: Print a message if verbosity level matches
# Usage: printv(VERBOSITY_LEVEL,message);
sub printv
{
my $level = shift;
if ($level > $verbosity)
{
return;
}
if ($level == M_DEBUG)
{
print 'Debug: ';
}
my $string = $_[0];
$string =~ s/\n+/\n/;
print $string;
# Flush STDOUT
$| = true;
$| = false;
}
# Purpose: Check for a file in path
# Usage: InPath(FILE)
sub InPath
{
foreach (split /:/, $ENV{PATH}) { if (-x "$_/@_" and not -d "$_/@_" ) { return 1; } } return 0;
}
# Purpose: Clean up the cache
# Usage: cleanCache()
sub cleanCache
{
if(not loadCache(true))
{
print "No cached data found - nothing to clean\n";
exit(0);
}
$| = true;
print "Cleaning and validating the cache...";
print "\n" if $verbosity == M_DEBUG;
my $removedDangling = 0;
my $removedObsolete = 0;
my $removedIncomplete = 0;
my $existing = 0;
my $checked = 0;
my $last = 0;
foreach my $file (keys %{$cachedInfo{info}})
{
$checked++;
if ($last + 100 < $checked)
{
$last = $checked;
print '.' if $verbosity < M_DEBUG;
}
if(not -e $file)
{
$removedDangling++;
removeFromCache($file);
printv(M_DEBUG,$file.': was dangling - removed'."\n");
next;
}
elsif(not defined $cachedInfo{mtime}->{$file})
{
$removedDangling++;
removeFromCache($file);
printv(M_DEBUG,$file.': was semi-dangling (no mtime reference) - removed'."\n");
next;
}
elsif(not defined $cachedInfo{info}->{$file})
{
$removedDangling++;
removeFromCache($file);
printv(M_DEBUG,$file.': had no actual data - removed'."\n");
next;
}
elsif(mtime($file) > $cachedInfo{mtime}->{$file})
{
$removedObsolete++;
removeFromCache($file);
printv(M_DEBUG,$file.': has expired from the cache - removed'."\n");
next;
}
else
{
my $hadInfo = false;
foreach my $ent (qw(Title Band))
{
my $t = $cachedInfo{info}->{$file}->{$ent};
if(IsValidTagValue($t) && $t =~ /(\w|\d)/)
{
$hadInfo++;
}
}
if($hadInfo <= 1)
{
$removedIncomplete++;
removeFromCache($file);
printv(M_DEBUG,$file.': had incomplete information - removed'."\n");
next;
}
elsif(IsValidTagValue($cachedInfo{info}->{$file}->{Track}) and
(
($cachedInfo{info}->{$file}->{Track} =~ /\D/) or
($cachedInfo{info}->{$file}->{Track} > 999)
) )
{
$removedIncomplete++;
removeFromCache($file);
printv(M_DEBUG,$file.': had an invalid track number - removed'."\n");
next;
}
}
$existing++;
}
print '.' if $verbosity < M_DEBUG;
foreach my $file (keys %{$cachedInfo{mtime}})
{
if(not defined $cachedInfo{info}->{$file})
{
$removedDangling++;
$checked++;
printv(M_DEBUG,$file.': only had an mtime reference but no data - removed'."\n");
delete($cachedInfo{mtime}->{$file});
}
}
print "done\n" if $verbosity < M_DEBUG;
print "The cache had $checked entries\n";
my $f = "%-25s: %s\n";
printf($f,'Dangling references',$removedDangling);
printf($f,'Obsolete references',$removedObsolete);
printf($f,'Incomplete references',$removedIncomplete);
print "Removed a total of ".($removedDangling+$removedObsolete+$removedIncomplete)." references\n";
print "$existing valid references left\n";
writeCache();
exit(0);
}
# Purpose: Initialize dependency lists
sub initDeps
{
if(tryuse('Ogg::Vorbis::Header::PurePerl'))
{
$Has{'Ogg::Vorbis::Header::PurePerl'} = true;
}
if(tryuse('Ogg::Vorbis::Header'))
{
$Has{'Ogg::Vorbis::Header'} = true;
# ::Header won't work properly if we don't call Inline::init()
eval { Inline::init(); };
}
if(tryuse('Audio::File'))
{
$Has{'Audio::File'} = true;
}
foreach my $cmd (qw(id3v2 id3info ogginfo metaflac))
{
if(InPath($cmd))
{
$Has{$cmd} = true;
}
}
if(not $Has{'Audio::File'})
{
if (not $Has{'id3v2'} and not $Has{'id3info'})
{
depError('MP3',[ 'id3v2','id3info','Audio::File'],'id3v2');
}
if(not $Has{'ogginfo'} and not $Has{'Ogg::Vorbis::Header::PurePerl'})
{
depError('OGG Vorbis',[ 'ogginfo', 'Ogg::Vorbis::Header::PurePerl' ], 'Ogg::Vorbis::Header::PurePerl');
}
}
}
# Purpose: Main application
sub main
{
initDeps();
# This only gets printed if debug output actually IS enabled.
printv(M_DEBUG,"Debug output is enabled\n");
if(not (@ARGV))
{
FatalError('Requires one option: the directory to process. See --help for more information');
}
if($cacheFile)
{
loadCache();
}
else
{
printv(M_DEBUG,"Cache file did not exist - not loading\n");
}
foreach(@ARGV)
{
ProcessTree($_);
}
if ($cacheFile)
{
writeCache();
}
exit(0);
}
Getopt::Long::Configure ('bundling');
GetOptions (
'help|h' => sub {
print "mussort version $VERSION\n\n";
print 'Usage: ' . basename($0) . " [OPTIONS] dir1 dir2 ..\n";
PrintHelp('-h','--help','Display this help screen');
PrintHelp('','--version','Display version information');
PrintHelp('-k','--keepdupes','Keep and sort duplicates (default)');
PrintHelp('-l','--largest','When hitting two files with identical names leave the largest file in place and remove the smallest (overrides --keepdues)');
PrintHelp('-n','--noremove','Never remove any files (overrides --keepdupes)');
PrintHelp('-a','--allowspecial','Allow special letters (accents and other non-ASCII letters)');
PrintHelp('-i','--insensitive','Do case insensitive searches for directory names. This will avoid directory names with different casing but is also slightly slower');
PrintHelp('-s','--consistent','Similarly to -i, this will avoid directory names with different casing by enforcing a consistent casing for all names');
PrintHelp('','--prefer-audiofile','Prefer to use Audio::File for reading tags');
PrintHelp('-c','--compilation','Enable compilation detection (see the manpage)');
PrintHelp('','--silentskip','Supress messages about files skipped');
PrintHelp('-v','--verbose','Increase verbosity. Supply it twice to increase it further');
PrintHelp('','--no-cache','Disable file information caching');
PrintHelp('','--cleancache','Go through the cache and clean up dangling or obsolete data');
PrintHelp('','--refresh-cache','Refresh the cache (use the cache in a write-only mode)');
PrintHelp('','--quiet','Suppress all non-error messages');
PrintHelp('','--cleantree','Attempt to clean up the directory tree. This has a significant performance impact. See the manpage for more information');
PrintHelp('','--fileinfo','Display parsed file information for the supplied file.');
PrintHelp('','--debuginfo','Print various technical information related to this installation of mussort');
exit(0);
},
'prefer-audiofile|preferaudiofile' => \$PreferAudioFile,
'n|noremove' => sub { $dupeAction = DUPE_NOREMOVE; },
'l|largest' => sub { $dupeAction = DUPE_REPLACE; },
'k|keepdupes' => sub { $dupeAction = DUPE_KEEP; },
'i|insensitive' => sub {
$DirInsensitive = true;
die("--insensitive can not be combined with --consistent\n") if $DirConsistent;
},
's|consistent' => sub {
$DirConsistent = true;
die("--consistent can not be combined with --insensitive\n") if $DirInsensitive;
},
'a|allowspecial' => \$AllowSpecialChars,
'debuginfo' => \&debugInfo,
'c|compilation' => sub { $CompilationDetection = {} },
'e|cache' => sub {
warn('--cache is deprecated, caching is now enabled by default'."\n");
$cacheFile = true;
},
'nocache|no-cache' => sub { $cacheFile = false },
'refresh-cache' => sub { $readFromCache = false },
'v|verbose+' => \$verbosity,
'dryrun' => sub { $DryRunMove = true; $cacheFile = false },
'dryrunmove' => sub { $DryRunMove = true },
'q|quiet' => sub { $verbosity = 0 },
'cleantree' => $addAllCleanTree,
'cleancache' => \&cleanCache,
'fileinfo=s' => sub {
shift; my $file = shift;
$file = realpath(dirname($file)).'/'.basename($file);
eval('use Data::Dumper qw(Dumper); $Data::Dumper::Indent = 0;');
initDeps();
if($cacheFile)
{
loadCache();
}
print "File information for $file:\n";
my $info = GetInfo($file);
foreach my $entry (keys(%{$info}))
{
my $c = $info->{$entry};
if(defined($c) and ref($c))
{
$c = Dumper($c);
$c =~ s/;\n$//;
$c =~ s/^\$VAR\d*\s*=\s*//;
}
if(not defined $c or not length($c))
{
$c = '(unknown)';
}
print "$entry: $c\n";
}
exit(0);
},
'version' => sub {
print "mussort version $VERSION\n";
exit(0);
},
'silentskip' => \$SilentSkip,
) or die('See ' . basename($0) . " --help for more information\n");
main();
__END__
=head1 NAME
mussort - A simple music file sorting program
=head1 SYNOPSIS
mussort I<[OPTIONS]> I<dir1 dir2 ..>
=head1 DESCRIPTION
This is a simple tool that sorts a music collection.
It recursively searches a directory for MP3, OGG Vorbis and FLAC files, processing
them one by one. It then reads the information from the file and puts them into
a newly sorted directory tree, in the form
I<base_directory/ARTIST/ALBUM/FILENAME>. The files are renamed to
I<CDn-TRACKNUMBER-ARTIST-TRACKNAME.EXT>. TRACKNUMBER is omitted if it isn't found, and
CDn is only included if it detects multiple discs.
If you use --compilation then the sorting will differ for those albums that are
detected to be compilation albums, those will be sorted as
I<base_directory/ALBUM/FILENAME> instead. mussort will remove directories that
end up empty after sorting.
By default mussort will cache file information in ~/.mussort-cache, which speeds
up subsequent invocations significantly
=head1 OPTIONS
=over
=item B<-h, --help>
Display a short help screen and exit
=item B<--version>
Display version information and exit
=item B<-k, --keepdupes>
When mussort reaches two files with identical names, it will keep both,
naming the one it finds last with a number at the end (ie.
I<TRACKNUMBER-ARTIST-TRACKNAME-DUPENUMBER.EXT>). This is the default.
Use I<--largest> and I<--noremove> to alter this behaviour.
=item B<-l, --largest>
When mussort reaches two files with identical names, it will remove (delete) the smallest
file and leave the largest in place.
=item B<-n, --noremove>
When mussort reaches two files with identical names it will ignore one of the
files (the last one it sees) and not sort it.
=item B<-i, --insensitive>
Makes mussort do case-insensitive searches for directory names. This is useful
when the case in the name of an artist differs and makes mussort place all the
music correctly into one directory. This I<might> be a bit slower than normal
operation, but not much. When using this option there is a chance that
directory names will differ slightly between different collections (see also
I<--consistent>).
You can combine this option with I<--compilation> if you want to. You can not
combine I<--insensitive> with I<--consistent>.
=item B<-s, --consistent>
Makes mussort generate consistent directory names. This is useful to ensure that
two collections on different machines have the exact same directory layout, so
that they can be rsynced easily. It avoids directory names that are identical
except for the casing, and is also consistent between runs, and accross
different machines. Unlike I<-i>, the performance penalty is almost
nonexistant.
You can combine this option with I<--compilation> if you want to. Note that
I<--compilation> changes the directory layout, so a collection sorted without
I<--compilation> but with I<--consistent> will differ from a collection sorted
with both I<--compilation> B<and> I<--consistent>.
You can not combine I<--consistent> with I<--insensitive>.
=item B<-a, --allowspecial>
This forces mussort to not strip special characters from file and directory
names.
=item B<-c, --compilation>
Enables compilation detection. In this mode mussort will go through all files
without moving them first, sorting them in an internal data structure, then it
will attempt to detect compilation albums and re-sort members of the
compilation albums before finally moving files on-disk. Compilation albums
are placed into a directory tree matching I<ALBUM/FILENAME> instead of
I<ARTIST/ALBUM/FILENAME>.
Note that this mode is a bit slower than normal mode, and it will take a bit
longer before any actual changes are done. This is because it needs to search
through all files before it can start to rename them.
=item B<--silentskip>
Makes mussort not output messages about files that it skips (those that are skipped
are generally badly tagged, or not tagged at all).
=item B<-v, --verbose>
Increases the verbosity of mussort. Supply twice to further increase verbosity.
If supplied once, extra information about what mussort is doing, such as which
directory is being processed, will be output (this can be useful in --compilation
mode, as --compilation won't output anything until after all files have been read).
If supplied twice, it will also output information about which file is currently
being read.
=item B<--quiet>
Suppresses all status messages (and the terminal title). Errors will still get
printed.
=item B<--no-cache>
Disables caching of file information. By default mussort will cache file information
(such as tags) between runs as that significantly improves speed in susequent
invocations. If you supply this, mussort will neither write nor read any cache.
Usually there is very little gain in using this, collections as large as 13 000+
music files only have a cache file of ~3.2MB. If you want to get back a little of
the space the cache file takes, see --cleancache.
=item B<--refresh-cache>
This will enable caching in a 'write-only' mode. That is it will write new
information to the cache, but it will not use existing information. This can
be used as an alternative to deleting the entire cache when you have different
directories that you sort, and only want to re-sort (refresh) one of them.
=item B<--cleancache>
This cleans up the cache. It will go through all entries in the cache and remove
dangling references, expired data and incomplete data. Incomplete data is added
when mussort finds a file that has invlalid/empty tags. Those files are still
added to the cache so that mussort does not have to re-evaluate them each time,
but their entries contain no valid data.
This can be useful in a few cases:
=over
=item - If you have deleted or mvoed a large amount of music files that mussort has previously sorted
=item - If you want to save a tiny bit of HD space
=item - If you want mussort to re-read information from files that previously contained invalid data
=back
mussort will automatically re-evaluate any expired cache data during runtime,
so you are never B<required> to run I<--cleancache>.
=item B<--cleantree>
When this parameter is in effect, mussort will attempt to clean up the
directory tree by removing all empty directories that it hits (in addition to
performing its usual sorting). Note that this has a rather significant
performance impact because mussort needs to do a lot of extra readdir()
operations, and should only be done if you notice a lot of empty directories in
your music tree and want them cleaned up.
Normally mussort will automatically clean directories that are empty as a
result of sorting, but this option will make it check all directories it finds.
=back
=head1 DEPENDENCIES
mussort can use several libraries and utilities to retrieve tag information.
It requires only one for each format, however if multiple methods are available
mussort will try each in turn if one of them fails.
It needs:
=over
=item For MP3: id3v2 (best), id3info from id3lib, or Audio::File
=item For Vorbis: Ogg::Vorbis::Header::PurePerl (best), ogginfo from vorbis-tools, or Audio::File
=item For FLAC: Audio::File (best) or metaflac
=back
=head1 FILES
=over
=item ~/.mussort-cache
The mussort file information cache. Not created if --no-cache is supplied.
You can safely remove this file any time if you want to (see also --cleancache).
=back
=head1 BUGS AND LIMITATIONS
If you find a bug, please report it at L<http://random.zerodogg.org/mussort/bugs>.
=head1 INCOMPATIBILITIES
None known.
=head1 AUTHOR
B<mussort> is written by Eskild Hustvedt I<<eskild -at- zerodogg d.ot org>>
=head1 LICENSE AND COPYRIGHT
Copyright (C) Eskild Hustvedt 2007, 2008, 2009, 2010, 2011
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.