[Koha] marc_word table and koha 2.2 performance on updates

Thu Sep 14 02:54:01 NZST 2006

Hello,

I am trying to use 2.2.6RC2 with MySQL 4.1 on Debian Stable to store catalog with mix of russian and english books in UTF-8.

I wrote a perl script to import books from CSV file. Performance of
import slows down after importing 500-600 books, because size of
marc_word becomes huge. Currently I have:

mysql> select count(*) from biblio;
+----------+
| count(*) |
+----------+
|      745 |
+----------+
1 row in set (0.00 sec)

mysql> select count(*) from marc_word;
+----------+
| count(*) |
+----------+
|  6747764 |
+----------+
1 row in set (0.00 sec)

There are a lot of words in marc_word table that looks empty. Search is
working only for english names and does not works for Russian, although
I can see russian letters in correct UTF-8 inside mysql and I have set
up mysql and apache to work with UTF-8;

CSV file I use does not have a lot of empty fields. Why so many rows in
marc_word although I use koha-2.2.6RC2? Please help.
--
Regards,
Victor Ashik

P.S.: Here is the script:

#!/usr/bin/perl

use MARC::Record;
use C4::Context;
use C4::Biblio;
use C4::AuthoritiesMarc;

my $dbh = C4::Context->dbh;

my $newRecord = MARC::Record->new();

# FIXME: I do not know how to calculate record size now so use fixed one
$newRecord->leader('00903pam   2200265 a 4500');
#$record->set_leader_lengths();

# parse CSV fields
while (<>) {
	my $text = $_;
	my @new  = ();
	push(@new, $+) while $text =~ m{
		"([^\"\\]*(?:\\.[^\"\\]*)*)",?
		|([^,]+),?
		|,
	}gx;
	push (@new, undef) if substr($text,-1,1) eq ',';

	# System Control Number
	my $newField = MARC::Field->new(
	  '035','','',
	  'a' => $new[0],
	);
	$newRecord->insert_fields_ordered($newField);

	# Author
	$newField = MARC::Field->new(
	  '100','','',
	  'a' => $new[1],
	);
	$newRecord->insert_fields_ordered($newField);

	# Organization
	$newField = MARC::Field->new(
	  '110','','',
	  'a' => $new[2],
	);
	$newRecord->insert_fields_ordered($newField);
	#my $newField = MARC::Field->new(
	#  '440a','','',
	#  'a' => $new[3],
	#);
	#$newRecord->insert_fields_ordered($newField);
	#my $newField = MARC::Field->new(
	#  '440n','','',
	#  'a' => $new[4],
	#);
	# I found that next line was not commented out while writing this post
	#$newRecord->insert_fields_ordered($newField);

	# Title
	$newField = MARC::Field->new(
	  '245','','',
	  'a' => $new[5],
	);
	$newRecord->insert_fields_ordered($newField);

	# Place of publication
	$newField = MARC::Field->new(
	  '260','','',
	  'a' => $new[7],
	);
	$newRecord->insert_fields_ordered($newField);

	# Name of publisher 
	$newField = MARC::Field->new(
	  '260','','',
	  'b' => $new[8],
	);
	$newRecord->insert_fields_ordered($newField);

	# Date of publication
	$newField = MARC::Field->new(
	  '260','','',
	  'c' => $new[9],
	);
	$newRecord->insert_fields_ordered($newField);

	# General subdivision
	$newField = MARC::Field->new(
	  '655','','',
	  'x' => $new[10],
	);
	$newRecord->insert_fields_ordered($newField);

	print $newRecord->as_formatted();

	my ($bibid,$oldbibnum,$oldbibitemnum) = NEWnewbiblio($dbh,$newRecord,'');
}