[Koha] A script for converting MARC records from Sagebrush Athena
Jeffrey LePage
jeffrey_lepage at yahoo.com
Tue Dec 23 06:02:25 NZDT 2008
I've been trying to convert a Sagebrush Athena generated MARC export to a format acceptable to Koha.
I believe I have a working script that does the conversion.
I did the final conversion this morning, and the import with bulkmarcimport.pl. I haven't had the chance to look at the catalog in depth, but at first glance everything seems OK. I will now unleash the librarians with instructions to examine the new catalog and make sure everything is good. If I find problems I'll post the new information.
One recent change: Apparently Koha like UTF-8, so I explicitly encode any strings in the MARC records as UTF08.
For now, here's my script. Please note, the script optionally sets the current and permanent branch for the books (Koha 952$a and 952$b). If you don't want the branch set then don't use this option. If you use this option, then the branch_code should correspond to branches.branchcode in the Koha DB. Is setting the branch a bad idea?
Use with care, no warranty is expressed or implied, blah blah blah.
For those of you familiar with Koha/Perl, please comment. Especially let me know if I'm doing something horribly horribly wrong.
PERL CODE FOLLOWS:
********************************
use MARC::Batch;
use Encode;
my $input_file;
my $output_file;
my $location;
if($ARGV[0] eq '-h' || $ARGV[0] eq '--help' || scalar(@ARGV) < 2 )
{
print "This converts a MARC file generated by Sagebrush Athena to a file appropriate for Koha\n\n";
print "Usage: perl marcconvert.pl originalmarcfile convertedmarcfile\n";
print "\tor \n";
print "Usage: perl marcconvert.pl originalmarcfile convertedmarcfile branch_code \n\n";
exit;
}
else
{
$input_file = $ARGV[0];
if( -f $input_file )
{
# the file exists and it is a file (not a directory or something else)
}
else
{
print "The input file '$input_file' does not exist\n";
exit;
}
$output_file = $ARGV[1];
}
if($ARGV[2]){ $location = encode("utf8", $ARGV[2]); }
my $batch = MARC::Batch->new('USMARC',$input_file);
open(FF4,">./$output_file");
while ( my $record = $batch->next())
{
my @fields = $record->fields();
my $newrecord = MARC::Record->new();
#$newrecord->leader($record->leader()); # i let MARC::Record generate a leader. Is this wrong?
foreach my $field (@fields)
{
my $tag = $field->tag();
my $newfield;
if($tag < 10)
{
# it has data but no indicators or subfields
my $data = encode("utf8", $field->data()); # Koha like UTF-8, so we have to convert
$newfield = MARC::Field->new($tag,$data);
}
elsif($tag eq '852')
{
# do data conversion to 952
# Sagebrush Athena puts some stuff in tag 852 but Koha likes it in tag 952
my @subfields = ();
my $athena_k = ''; # Koha 952 $o is composed of 852 $k $h $i $m
my $athena_h = ''; #Koha 952 $o is composed of 852 $k $h $i $m
my $athena_i = ''; #Koha 952 $o is composed of 852 $k $h $i $m
my $athena_m = ''; #Koha 952 $o is composed of 852 $k $h $i $m
foreach my $sub ($field->subfields())
{
my $data = encode("utf8", $sub->[1]);
if($sub->[0] eq 't')
{
push(@subfields,'t',$data);
}
elsif($sub->[0] eq '6')
{
push(@subfields,'y',$data);
}
elsif($sub->[0] eq 'b')
{
#push(@subfields,'b','FHM'); # I set the current and permanent branch manually - see below
}
elsif($sub->[0] eq 'a')
{
#push(@subfields,'a','FHM'); # I set the current and permanent branch manually - see below
}
elsif($sub->[0] eq 'z')
{
push(@subfields,'z',$data);
}
elsif($sub->[0] eq 'k')
{
$athena_k = $data;
}
elsif($sub->[0] eq 'h')
{
$athena_h = $data;
}
elsif($sub->[0] eq 'i')
{
$athena_i = $data;
}
elsif($sub->[0] eq 'm')
{
$athena_m = $data;
}
elsif($sub->[0] eq '9')
{
push(@subfields,'g',$data);
}
elsif($sub->[0] eq '5')
{
push(@subfields,'e',$data);
}
elsif($sub->[0] eq '8')
{
push(@subfields,'d',$data);
}
elsif($sub->[0] eq 'p')
{
push(@subfields,'p',$data);
}
else
{
push(@subfields,$sub->[0],$data);
}
}
#Koha 952 $o is composed of 852 $k $h $i $m
my $koha_o= "$athena_k $athena_h $athena_i $athena_m"; #Koha 952 $o is composed of 852 $k $h $i $m
$koha_o =~ s/^\s+//;
$koha_o =~ s/\s+$//;
$koha_o =~ s/\s{2,}/ /g;
$koha_o = encode("utf8", $koha_o);
push(@subfields,'o',$koha_o);
if($location)
{
push(@subfields,'a',$location); # I set the current and permanent branch manually - is this a bad idea
push(@subfields,'b',$location); # I set the current and permanent branch manually - is this a bad idea
}
$newfield = MARC::Field->new('952', $field->indicator(1), $field->indicator(2), @subfields );
}
else
{
# no data, but has
# 1) indicators (defined, but not necessarily set)
# 2) subfields
#
# This is for all the tags >= 10 and not tag 852
my @subfields = ();
foreach my $sub ($field->subfields())
{
my $data = encode("utf8", $sub->[1]);
push(@subfields,$sub->[0],$data);
}
$newfield = MARC::Field->new($tag, $field->indicator(1), $field->indicator(2), @subfields );
}
$newrecord->append_fields($newfield);
}
print FF4 $newrecord->as_usmarc();
}
close(FF4);
More information about the Koha
mailing list