#!/usr/bin/perl
# GPL
# author: Rohan Baxter, Ultimode Inc. rohan@ultimode.com
#
# CGI-script args:
# q= query string
# np= page index (default: 0)
# ps= num of results to show on a page
# mode= ??all or any?
# t= url status constraint for search
# ul= url filter for search default: 0
# ps_next=
#
#
use strict;
#use diagnostics;
use udmDB;
use udmTemplate;
use udmParser;
use DBI();
use CGI();
# print out header straight-away
print "Content-type: text/html\r\n\r\n";
# Set some defaults (can be overwritten in template file)
$ENV{MySQLDB} = 'udmsearch';
$ENV{MySQLHost} = 'localhost';
$ENV{MySQLUser} = 'root';
$ENV{MySQLPass} = '';
my $self = 'search.pl';
my $template_file = "/hsphere/local/home/phantomp/mnogosearch/reel2reeltexas.com/search.htm";
my $search_page = "";
# Read in Template (will set $ENV{} variables as side-effect)!
my $template_hashref = &udmTemplate::read_template( $template_file );
my %template = %$template_hashref;
my %template_env = {}; # contains template environment variables
$template_env{self} = $self;
# Query Processing
# Query string is taken from command line
#my $query = $ARGV[0];
#my $np = 0;
#my $ps = 20; # number of results to display on a page
# Query string is taken from CGI param
my $cgi = new CGI;
my $query = $cgi->param("q");
my $np = $cgi->param("np");
my $ps = $cgi->param("ps");
if (! defined($ps)){
$ps = 20;
}
my $ul = $cgi->param("ul"); # a filter for urls to be searched...
my $tag = $cgi->param("t"); # a constraint on status of URL being searched i.e. 404 ,etc
my $tagstr = "";
if ($tag) {
$tagstr = "AND url.tag = $tag";
}
(my $qu, my $words, my $error) = &udmParser::parse( $query );
if ($error ne ""){
$search_page .= &udmTemplate::print_error($error, \%template,\%template_env);
print $search_page;
exit(0);
}
if ($words eq ""){ # query is empty
$search_page .= &udmTemplate::print_template(\%template,"top",\%template_env);
$search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env);
print $search_page;
exit(0);
}
$template_env{query} = $query;
# CGI Issue: what about htmlspecialchars?
# If user supplies &">%lt
# Connect to Database
my $dbh = &udmDB::dbconnect($ENV{MySQLDB},$ENV{MySQLHost},
$ENV{MySQLUser},$ENV{MySQLPass});
$template_env{dbh} = $dbh;
# Look for stopwords in words, delete them
# set up words_info to explain what is going on
$template_env{words_info} = "";
($words,$template_env{words_info}) = &udmDB::remove_stopwords( $dbh, $words );
#open FILE,">/tmp/tmp.arg";
#print FILE "words: $words \n";
#close(FILE);
if ($words eq "" && $qu ne ""){ # the only words in query are stopwords
$error = "all query words are too common to search on.";
$search_page .= &udmTemplate::print_error($error, \%template,\%template_env);
$dbh->disconnect();
print $search_page;
exit(0);
}
# Now collect word count stats for remaining words
$template_env{words_info} = &udmDB::get_word_stats( $dbh, $words, $template_env{words_info}, $ul, $tagstr );
# Database Query
# I'm puzzled why ul and tagstr work in here???
my $num_hits = &udmDB::get_num_of_hits($dbh,$qu,$words,$ul,$tagstr);
$template_env{found} = $num_hits;
# Do Page Calculations (NB: It would be good to tidy this up)
my $from = $np * $ps; # used in query
$template_env{'from1'} = $from + 1; # num of first doc on page
$template_env{'to'} = ($np + 1) * $ps; # num of last doc on page
my $ps1 = $ps + 1; # used in next query
if ($template_env{'to'} > $num_hits){
$template_env{'to'} = $num_hits;
}
my $ps_next = $num_hits - $template_env{'to'};
if ( ($ps_next > $ps )|| ($ps_next < 0)){
$ps_next = $ps;
}
# Finish if words do not occur in the db
# Start outputting Template
$search_page .= &udmTemplate::print_template(\%template,"top",\%template_env);
if ($num_hits > 0){
$search_page .= &udmTemplate::print_template(\%template,"restop",\%template_env);
} else {
$search_page .= &udmTemplate::print_template(\%template,"notfound",\%template_env);
$search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env);
$dbh->disconnect();
print $search_page;
exit(0);
}
# Database Query: get urls with word counts
my $sth;
if ( (defined($ul)) || (defined($tagstr)) ){
$sth = &udmDB::get_url_ids_using_filter( $dbh, $qu, $words, $from, $ps1, $ul, $tagstr );
} else {
$sth = &udmDB::get_url_ids( $dbh, $qu, $words, $from, $ps1 );
}
my $rows = $sth->rows();
# Determine if there are more results than can fit on a page
my $isnext = 0;
my $i=0;
if ($rows > $ps){
$rows = $ps;
$isnext = 1;
}
# Output the each of the results found
my $url_in = "-1";
my $url_id = "";
while ($i < $rows){
$i++;
my @row = $sth->fetchrow_array;
$url_id = $row[0];
if ($url_in eq "-1"){
$url_in = "$url_id";
} else {
$url_in .= ",$url_id";
}
}
if ($url_in ne "-1"){
my $sth2 = &udmDB::get_docs($dbh, $qu, $words, $url_in);
$template_env{'ndoc'} = $from + 1;
my @data = ();
while (@data = $sth2->fetchrow_array){
$template_env{'url'} = $data[0];
$template_env{'title'} = $data[1];
$template_env{'text'} = $data[2];
$template_env{'contype'} = $data[3];
$template_env{'docsize'} = $data[4];
$template_env{'lastmod'} = $data[5];
$template_env{'keyw'} = $data[6];
$template_env{'desc'} = $data[7];
$template_env{'crc'} = $data[8];
$template_env{'rec_id'} = $data[9];
$template_env{'rating'} = $data[10];
$template_env{'query'} = $data[11];
$search_page .= &udmTemplate::print_template(\%template,"res",\%template_env);
#print "title:$template_env{title} :url=$template_env{url} \n";
$template_env{'ndoc'}++;
}
$sth2->finish();
}
# q
my $prevp = 0;
my $nextp = 0;
my $mode = 'all'; # Don't understand this variable ???
$template_env{nav} = " ";
# putting in html special chars
$query =~ s/ /\+/g;
$query =~ s/&/%26/g;
if ($np > 0){
$prevp = $np - 1;
$template_env{nav} =
"[< < Prev $ps] ";
}
if ($isnext == 1){
$nextp = $np + 1;
$template_env{nav} = $template_env{nav} .
"[Next $ps_next > >]";
}
$search_page .= &udmTemplate::print_template(\%template,"resbot",\%template_env);
$search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env);
$sth->finish();
$dbh->disconnect();
# It might be better to "dribble" output so that results can be seen
# straightaway
print $search_page;