#!/usr/bin/perl -w # -*- cperl -*- #
#
#  gnump3d-index - Create a database of all tag information for audio files.
#
#  GNU MP3D - A portable(ish) MP3 server.
#
# Homepage:
#   http://www.gnump3d.org/
#
# Author:
#  Steve Kemp <steve@steve.org.uk>
# Altered: Gordon Haverland <perl@materialisations.com> 2003/11/11
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
#
#  Steve Kemp
#  ---
#  http://www.steve.org.uk/
#
#
# GH: The format of a song tag entry was
#  /path/to/file\tLENGTH=.*\tCHANNELS=.*\tBLOCKSIZE_0=.*\tARTIST=.*\t\
#  BITRATE_WINDOW=.*\tCOMMENT=.*\tBITRATE_LOWER=.*\tTRACK=.*\t\
#  FRAMING_FLAG=.*\tGENRE=.*\tRATE=.*\tFILENAME=.*\tBITRATE_UPPER=.*\t\
#  ALBUM=.*\tTITLE=.*\tVERSION=.*\tBITRATE_NOMINAL=.*\tBLOCKSIZE_1=.*
# On a Dual Athlon (1.6 GHz each) and 600+ songs in 60 directories, this
# indexing was taking 20 minutes!?!  Maybe make the indexing a bit smarter,
# look to see if we have the data already.  Look first for pathname, then
# look at file modification time (like Make).  So now tag is:
#  /path/to/file\tmtime\tLENGTH=.*\t...
# Also, I suppose we can force user to store files with proper "extensions"
# (.ogg, .mp3, ...), but really we want the type of data in the file (as per
# the 'file' command), not something from the name of the file.

use strict;
use Getopt::Long;
use File::Find;

use gnump3d::config;               # For reading our configuration file.
use gnump3d::files;                # For testing if a file is audio
use gnump3d::ogginfo;              # Pure Perl OGG Vorbis tag parsing.
use gnump3d::oggtagreader;         # Local vorbis code
use gnump3d::mp3info;	           # Local copy of MP3::Info.



# Version identifier for this script.
my $VERSION_NUMBER = '$Revision: 1.11 $';

#
# Determine which configuration file to read.
#
my $CONFIG_FILE = "";
if ( ( $ENV{"HOME"} ) &&
     ( -e $ENV{"HOME"} . "/.gnump3drc" ) )
{
    $CONFIG_FILE = $ENV{"HOME"} . "/.gnump3drc";
}
elsif ( -e "/etc/gnump3d/gnump3d.conf" )
{
    $CONFIG_FILE = "/etc/gnump3d/gnump3d.conf";
}
elsif ( -e "gnump3d.conf" )
{
    # This is mainly here for Windows users.
    $CONFIG_FILE = "gnump3d.conf";
}


#
# Command line flags
#
my $SHOW_HELP    = 0;
my $SHOW_VERSION = 0;
my $SHOW_STATS   = 0;
my $VERBOSE      = 0;
my $DEBUG        = 0;

#
# Global variables.
#
my $root     = "" ;
my $lockfile = "" ;
my $cache    = "" ;
my %cache;

my @FOUND = ( );  # An array to hold filenames of audio files we locate.


#
#  Make sure that signals cause our END segment to run
# so that our lockfile is removed on abnormal termination.
#
use sigtrap qw(die normal-signals error-signals);


#
#  Parse the command line arguments.
#
&parseArguments();


#
#  Make sure that we can read a configuration file.
#
if ( ! -e $CONFIG_FILE )
{
    print "The configuration file which I've tried to read doesn't exist:\n";
    print "'$CONFIG_FILE'\n";
    print "Aborting.\n";
    exit;
}

#
# Initialize ourself from the configuration file.
#
&readConfig( $CONFIG_FILE );



#
# Read various options from the configuration file - unless they have been
# specified upon the command line.
#
if ( !length( $root ) )
{
  $root = &getConfig( "root",      "/home/mp3" ); 
}
if ( !length( $lockfile ) )
{
  $lockfile = &getConfig( "lockfile",  "/tmp/index.lok" );
}
if ( !length( $cache ) )
{
  $cache    = &getConfig( "tag_cache", "/tmp/tags.cache" );
}


# Take any immediate actions...
if ( $SHOW_HELP )
{
    &showHelp();
    exit;
}
if ( $SHOW_VERSION )
{
    &showVersion();
    exit;
}
if ( $SHOW_STATS )
{
  &showStats( $cache );
  exit;
}



#
# Test for a lockfile indicating that we're already running.
#
if ( &lockPresent() )
{
    print "gnump3d-index appears to be already running.\n";
    print "if this is in error remove the lockfile $lockfile\n";
    exit;
}


#
# Create our lock file.
&createLock();


#
# Sanity check the code.
#
if ( ! -e $root )
{
    print "The server root directory you are trying to index '$root' doesn't exist.\n";
    &removeLock();
    exit;
}


# Read in existing cache here, if present.
&readCache( $cache ) if( -e $cache );


#
# Do the indexing - following symlinks on non-Windows platforms.
# 
if ( &isWindows() )
{
  find({ wanted => \&findAudio }, $root);
}
else
{
  find({ wanted => \&findAudio, follow => 1 }, $root);
}


#
# Process the list of found files.
#
&indexFiles( );

#
# Tidy up.
&removeLock();

#
# Finished
exit;


#
#  Use the excellent File::Find module to locate all the files beneath
# our archive root.
#
sub findAudio( )
{
    my ( $file ) = $File::Find::name;

    if ( $DEBUG )
    {
      print $file . "\n";
    }

    return if ( ! isAudio( $file ) );
    return if ( -z $file );

    push @FOUND, $file;
}


#
#  Show the number of files we have indexed, their total size, and
# the total playlength.
#
sub showStats( $ )
{
  my ( $file ) = (@_);
  my $COUNT = 0;
  my $SIZE  = 0;
  my $TIME  = "0";

  my %DETAILS = ( );

  if ( ! -e $file )
  {
      print "Stats could not be displayed as the cache file doesnt exist";
      print "The cache file we expect is $file\n";
      exit;
  }

  open( FILY, "<$file" );

  foreach (<FILY>)
  {
    chomp;
    my @NAMES = split( /\t/, $_);
    my $file = shift(@NAMES);
    $DETAILS{$file} = \@NAMES;
  }
  close( FILY );

  #
  #  Process the hash.
  #    Add up song times.
  #    Add up total song size.
  #
  foreach my $file ( keys %DETAILS )
    {
      my $details = $DETAILS{$file};
      
      # A new line == A new file in archive.
      $COUNT ++;

      foreach my $pair ( @$details )
	{
	  if ( ( $pair =~ /([A-Z]+)=(.*)/ ) &&
	       ( length( $2 ) ) )
	    {
	      my $key = $1;
	      my $val = $2;
	      
	      if ( $key eq "SIZE" )
		{
		  $SIZE += $val;
		}
	      if ( $key eq "LENGTH" )
		{
		  if ( $val =~ /([0-9]+):([0-9]+):([0-9]+)/ )
		    {
		      $TIME += $3 + ($2 * 60 ) + ($1 * 60 * 60 );
		    }
		  elsif( $val =~ /([0-9]+):([0-9]+)/ )
		    {
		      $TIME += $2 + ($1 * 60 ) 
		    }
		}
	    }
	}
    }

  #
  #  Fudge the size
  my $sizeTotal = $SIZE;
  $sizeTotal = $sizeTotal < (1024)      ?
  $sizeTotal . " bytes" : (
			   $sizeTotal < (1024 ** 2) ? 
			   (int (10 * $sizeTotal/1024)/10) . "K" : (
								    $sizeTotal < (1024 ** 3) ? 
								    (int (10 * $sizeTotal/(1024 ** 2) )/10) . "Mb" :
								    ((int (10 * $sizeTotal/(1024 ** 3) )/10) . "Gb")));


  #
  #  Fudge the time.
  #
  my $foo = "";
  
  $foo .= int($TIME/(24*60*60)) . " days, ";
  $foo .= ($TIME/(60*60))%24    . " hours, ";
  $foo .= ($TIME/60)%60         . " mins ";
  $foo .= $TIME%60              . " seconds";
  
  $TIME = $foo;


  #
  #  Print the results
  #
  print <<E_O_INFO;
Total number of songs: $COUNT
Total size of archive: $sizeTotal ($SIZE bytes)
Total playlength     : $TIME
E_O_INFO

}


#
#  Read in the tag files of our found files, and write out their values.
#
sub indexFiles( )
{
    my $count = 0;
    my $total = $#FOUND + 1;

    my $error = 0;
    open ( OUT, ">$cache" ) or $error = 1;
    if ( $error )
    {
	print "Error opening the cache file '$cache' - $!\n";
	&removeLock();
	exit;
    }


    foreach my $file ( sort @FOUND )
    {
	#
	# Update our progress count every ten tracks.
	if ( ( $count % 10 ) == 0 )
	{
	    &updateLock( $count, $total );
	}

	#
	# Skip file if it's 0-bytes.  Dunno why people
	# would want to do this, but I've had reports..
	#	
	my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
	    $atime,$mtime,$ctime,$blksize,$blocks);

	($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
	 $atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
	next if ( $size < 1 );

        # If we looked for an old cache file, and it existed, we can now
        # look to see if the mtime in the cache is the same as the mtime
        # of the file.  If they are the same, trust the cache.
	my $skip = 0;
	# Is $file a symlink, or a "file"?
	my @fstat;
	if( -l $file ) {  # $file is a link
	  @fstat = lstat( $file ) 
	} else {
	  @fstat = stat( $file );
	}
	if( exists( $cache{$file} ) ) {
	  if( $#fstat == 12 ) {  # UNIX stat structure, mtime is index 9
	    if( $cache{$file}->{mtime} == $fstat[9] ) {
	      $skip = 1;
	    }
	  } else {
	    # Unknown, build entry manually.
	  }
	} else {
	  # No cache, build entry manually.
	}

	unless( $skip ) {
	  # Holder for the tags within the files.
	  my %TAGS;

	  # Populate the tag lists.
	  if ( $file =~ /mp3$/i )
	    {
	      %TAGS = &getMP3Display($file);
	    }
	  elsif( $file =~ /ogg$/i )
	    {
	      %TAGS = &getOGGDisplay($file);
              $TAGS{'SIZE'} = $size unless $TAGS{'SIZE'};
	    }


	  #
	  # Make sure filename is defined..
	  #
	  my $base = $file;
	  if ( $base =~ /(.*)\/(.*)/ )
	    {
	      # Strip away the leading directory name.
	      $base = $2;
	    }
	  if ( $base =~ /(.*)\.(.*)/ )
	    {
	      # Remove any suffix.
	      $base = $1;
	    }
	  $TAGS{ 'FILENAME' } = $base;

	  #
	  #  Show the tags we've found if the user wanted verbosity.
	  if ( $VERBOSE )
	    {
	      print $file . "\n";
	      foreach my $k ( keys %TAGS )
		{
		  print "\t$k\t" . $TAGS{ $k } . "\n";
		}
	    }

	  if( $#fstat == 12 ) {  # UNIX stat structure, mtime is index 9
	    $TAGS{mtime} = $fstat[9];
	  }

	  #
	  # Write the filename and tag details to the cache file, as
	  # tab seperated values.
	  #
	  if( $TAGS{mtime} ) {
	    print OUT "$file\tmtime=$TAGS{mtime}";
	    delete( $TAGS{mtime} );

	  } else {
	    print OUT "$file";
	  }

	  foreach my $k ( keys %TAGS )
	    {
	      my $value = $TAGS{ $k };

	      # Replace tabs in tag values with spaces so that the reading
	      # code doesn't get confused by excessive deliminators.  (Curious,
	      # why 5 spaces?  GH)
	      $value =~ s/\t/     /g;

	      print OUT "\t" . $k . "=" . $value;
	    }
	  print OUT "\n";

	} else {  # We have a previously cached entry to write.
	  if( $cache{$file}->{mtime} ) {
	    print OUT "$file\tmtime=$cache{$file}->{mtime}";
	    delete( $cache{$file}->{mtime} );

	  } else {
	    print OUT "$file";
	  }

	  foreach my $k ( keys %{$cache{$file}} )
	    {
	      my $value = defined( $cache{$file}->{ $k } ) ?
                          $cache{$file}->{ $k }            : '';
	      print OUT "\t" . $k . "=";
	      if( length( $value ) > 0 ) {
		$value =~ s/\t/     /g;
		print OUT $value;
	      }
	    }
	  print OUT "\n";
	}


	# Update our processed count.
	$count += 1;
    }

    #
    # Close the output file.
    close( OUT );
}



#
#  Get the meta tags from an MP3 file.
#
sub getMP3Display($)
{
    my ( $file ) = (@_);

    my %TAGS;

    #
    # MPEG file information - get this regardless
    # of the presence of an ID3 tag or not.
    #
    my $inf         = &get_mp3info( $file );
    $TAGS{'LENGTH'} = $inf->{TIME}     || "";
    $TAGS{'BITRATE'}= $inf->{BITRATE}  || "";
    $TAGS{'SIZE'}   = $inf->{SIZE}     || "";


    #
    # Now look for tag information.
    #
    my $tag = &get_mp3tag( $file );

    # Early termination.
    if ( not defined $tag )
    {
	return( %TAGS );
    }


    #
    #  We have some tags .. so store them
    #
    $TAGS{'ARTIST'} = $tag->{ARTIST}   || "";
    $TAGS{'TITLE'}  = $tag->{TITLE}    || "";
    $TAGS{'ALBUM'}  = $tag->{ALBUM}    || "";
    $TAGS{'YEAR'}   = $tag->{YEAR}     || "";
    $TAGS{'COMMENT'}= $tag->{COMMENT}  || "";
    $TAGS{'TRACK'}  = $tag->{TRACKNUM} || "";
    $TAGS{'GENRE'}  = $tag->{GENRE}    || "";


    return( %TAGS );
}


#
#  Get the display text for an OGG Vorbis file.
#
sub getOGGDisplay($)
{
    my ($file) = (@_);

    my $reader = gnump3d::ogginfo->new($file);
    my %TAGS;

    # info
    while (my ($key, $v) = each %{$reader->info})
    {
      $TAGS{uc($key)} = $v;
    }


    my $comment = gnump3d::oggtagreader->new( );
    my %tags = $comment->getTags($file);

    if ( keys( %tags ) )
    {
	$TAGS{'ARTIST'} = $tags{'artist'}  || "";
	$TAGS{'COMMENT'}= $tags{'comment'} || "";
	$TAGS{'GENRE'}  = $tags{'genre'}   || "";
	$TAGS{'TRACK'}  = $tags{'track'}   || "";
	$TAGS{'ALBUM'}  = $tags{'album'}   || "";
	$TAGS{'TITLE'}  = $tags{'title'}   || "";
        $TAGS{'YEAR'}         = $tags{'year'} || "";
        $TAGS{'SIZE'}         = $tags{'size'} || "";
        if ($TAGS{'LENGTH'}) # Ogg returns in sss format vice mm:ss
        {
            my $s = $TAGS{'LENGTH'} % 60;
            my $m = ($TAGS{'LENGTH'} - $s) / 60;
            $TAGS{'LENGTH'} = sprintf("%d:%02d", $m, $s);
        }

     }

    return( %TAGS );
}


#
#  Test to see if our lockfile is present.
sub lockPresent( )
{
    return( -e $lockfile );
}

#
#  Unconditionally remove our lock file.
sub removeLock( )
{
    unlink( $lockfile );
}

#
#  Update our lockfile with the current progress count.
sub updateLock( )
{
    my ( $cur, $total ) = ( @_ );
    my $PER = ( ( $total - $cur  ) / $total ) * 100.0;
    if ( $PER =~ /([0-9]+)\.([0-9]+)/ )
    {
    	$PER = $1;
    }
    open( LOK, ">$lockfile" );
    print LOK "Processing file $cur of $total ( \%$PER remaining )\n";
    close( LOK );

}

#
# Create our lockfile.
sub createLock(  )
{
    open( LOK, ">>$lockfile" );
    close( LOK );
}



#
#  Parse the command line options.
sub parseArguments()
{
    GetOptions(
	       "config=s", \$CONFIG_FILE,
	       "debug",    \$DEBUG,
               "help",     \$SHOW_HELP,
	       "lock=s",   \$lockfile,
	       "output=s", \$cache,
	       "root=s",   \$root,
	       "stats",    \$SHOW_STATS,
               "verbose",  \$VERBOSE,
               "version",  \$SHOW_VERSION,
               );

}

#
#  Show help for this script.
sub showHelp()
{
    showVersion();
    print <<END_OF_USAGE;

Usage: gnump3d-index [options]

  gnump3d-index is a simple script to index the tag information located
 within the audio files of your archive.

  The script will index the files found beneath your root, as defined
 in the gnump3d.conf file - and write out a cache for speedy access.

  (See also: 'man gnump3d.conf', 'man gnump3d', and man 'gnump3d-index')

Options:
    --config file      The configuration file to read.
    --help             Show this help.
    --lock file        Use the given lockfile rather than the default.
    --output file      Write the output to the given file.
    --root directory   Start the indexing at the given directory.
    --stats            Don't update the cache file, just display audio stats.
    --verbose          Display all the tag values read.
    --version          Show the version number.
END_OF_USAGE
}


#
#  Show the version number of this script.
sub showVersion()
{
    my $revision = $VERSION_NUMBER;

    #
    # Extract the version from the CVS revision marker,
    # the only tricky bit is making sure the words "$" Revision " $"
    # don't appear here - because they'd be replaced - this
    # has confused me before.
    #
    if (  $VERSION_NUMBER =~ /\$([a-zA-Z:]+) ([0-9\.]+) \$/ )
    {
        $revision = $2;
    }

    print "gnump3d-index - version $revision - http://www.gnump3d.org/\n";

}

# GH: subroutine to read the cache.
sub readCache {
  my $fname = shift;

  # Cache is 1 line per entry as /absolute/pathname\tKEY=VALUE...
  open( CACHE, "< $fname" ) || die "Can't open cache ($fname) for read. $!\n";
  while( my $line = <CACHE> ) {
    chomp( $line );
    my @fields   = split( /\t/, $line );
    my $pathname = shift( @fields );
    if( exists( $cache{$pathname} ) ) {
      print "Error: duplicate entries for ($pathname)\n";
    }
    my $entry = {};
    if( $fields[0] =~ /^mtime=(\d+)$/ ) { # Pure number, assume mtime
      $entry->{mtime} = $1;
      shift( @fields );
    } else {
      $entry->{mtime} = -1;  # Old dbase format, no mtime
    }
    foreach my $field (@fields) {
      my($key,$value,@junk) = split(/=/, $field);
      # Use empty string for undef.
      $value = defined( $value ) ? $value : '';
#      print "Junk in field ($field)\n" if( @junk );
      $entry->{$key} = $value;
    }
    $cache{$pathname} = $entry;
  }
  close( CACHE );
}


#
#  This section of code always runs when the script terminates.
#
END
{
	&removeLock();
}
