#!/usr/bin/perl # GPL # author: Rohan Baxter, Ultimode Inc. rohan@ultimode.com # # CGI-script args: # q= query string # np= page index (default: 0) # ps= num of results to show on a page # mode= ??all or any? # t= url status constraint for search # ul= url filter for search default: 0 # ps_next= # # use strict; #use diagnostics; use udmDB; use udmTemplate; use udmParser; use DBI(); use CGI(); # print out header straight-away print "Content-type: text/html\r\n\r\n"; # Set some defaults (can be overwritten in template file) $ENV{MySQLDB} = 'udmsearch'; $ENV{MySQLHost} = 'localhost'; $ENV{MySQLUser} = 'root'; $ENV{MySQLPass} = ''; my $self = 'search.pl'; my $template_file = "/hsphere/local/home/phantomp/mnogosearch/reel2reeltexas.com/search.htm"; my $search_page = ""; # Read in Template (will set $ENV{} variables as side-effect)! my $template_hashref = &udmTemplate::read_template( $template_file ); my %template = %$template_hashref; my %template_env = {}; # contains template environment variables $template_env{self} = $self; # Query Processing # Query string is taken from command line #my $query = $ARGV[0]; #my $np = 0; #my $ps = 20; # number of results to display on a page # Query string is taken from CGI param my $cgi = new CGI; my $query = $cgi->param("q"); my $np = $cgi->param("np"); my $ps = $cgi->param("ps"); if (! defined($ps)){ $ps = 20; } my $ul = $cgi->param("ul"); # a filter for urls to be searched... my $tag = $cgi->param("t"); # a constraint on status of URL being searched i.e. 404 ,etc my $tagstr = ""; if ($tag) { $tagstr = "AND url.tag = $tag"; } (my $qu, my $words, my $error) = &udmParser::parse( $query ); if ($error ne ""){ $search_page .= &udmTemplate::print_error($error, \%template,\%template_env); print $search_page; exit(0); } if ($words eq ""){ # query is empty $search_page .= &udmTemplate::print_template(\%template,"top",\%template_env); $search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env); print $search_page; exit(0); } $template_env{query} = $query; # CGI Issue: what about htmlspecialchars? # If user supplies &">%lt # Connect to Database my $dbh = &udmDB::dbconnect($ENV{MySQLDB},$ENV{MySQLHost}, $ENV{MySQLUser},$ENV{MySQLPass}); $template_env{dbh} = $dbh; # Look for stopwords in words, delete them # set up words_info to explain what is going on $template_env{words_info} = ""; ($words,$template_env{words_info}) = &udmDB::remove_stopwords( $dbh, $words ); #open FILE,">/tmp/tmp.arg"; #print FILE "words: $words \n"; #close(FILE); if ($words eq "" && $qu ne ""){ # the only words in query are stopwords $error = "all query words are too common to search on."; $search_page .= &udmTemplate::print_error($error, \%template,\%template_env); $dbh->disconnect(); print $search_page; exit(0); } # Now collect word count stats for remaining words $template_env{words_info} = &udmDB::get_word_stats( $dbh, $words, $template_env{words_info}, $ul, $tagstr ); # Database Query # I'm puzzled why ul and tagstr work in here??? my $num_hits = &udmDB::get_num_of_hits($dbh,$qu,$words,$ul,$tagstr); $template_env{found} = $num_hits; # Do Page Calculations (NB: It would be good to tidy this up) my $from = $np * $ps; # used in query $template_env{'from1'} = $from + 1; # num of first doc on page $template_env{'to'} = ($np + 1) * $ps; # num of last doc on page my $ps1 = $ps + 1; # used in next query if ($template_env{'to'} > $num_hits){ $template_env{'to'} = $num_hits; } my $ps_next = $num_hits - $template_env{'to'}; if ( ($ps_next > $ps )|| ($ps_next < 0)){ $ps_next = $ps; } # Finish if words do not occur in the db # Start outputting Template $search_page .= &udmTemplate::print_template(\%template,"top",\%template_env); if ($num_hits > 0){ $search_page .= &udmTemplate::print_template(\%template,"restop",\%template_env); } else { $search_page .= &udmTemplate::print_template(\%template,"notfound",\%template_env); $search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env); $dbh->disconnect(); print $search_page; exit(0); } # Database Query: get urls with word counts my $sth; if ( (defined($ul)) || (defined($tagstr)) ){ $sth = &udmDB::get_url_ids_using_filter( $dbh, $qu, $words, $from, $ps1, $ul, $tagstr ); } else { $sth = &udmDB::get_url_ids( $dbh, $qu, $words, $from, $ps1 ); } my $rows = $sth->rows(); # Determine if there are more results than can fit on a page my $isnext = 0; my $i=0; if ($rows > $ps){ $rows = $ps; $isnext = 1; } # Output the each of the results found my $url_in = "-1"; my $url_id = ""; while ($i < $rows){ $i++; my @row = $sth->fetchrow_array; $url_id = $row[0]; if ($url_in eq "-1"){ $url_in = "$url_id"; } else { $url_in .= ",$url_id"; } } if ($url_in ne "-1"){ my $sth2 = &udmDB::get_docs($dbh, $qu, $words, $url_in); $template_env{'ndoc'} = $from + 1; my @data = (); while (@data = $sth2->fetchrow_array){ $template_env{'url'} = $data[0]; $template_env{'title'} = $data[1]; $template_env{'text'} = $data[2]; $template_env{'contype'} = $data[3]; $template_env{'docsize'} = $data[4]; $template_env{'lastmod'} = $data[5]; $template_env{'keyw'} = $data[6]; $template_env{'desc'} = $data[7]; $template_env{'crc'} = $data[8]; $template_env{'rec_id'} = $data[9]; $template_env{'rating'} = $data[10]; $template_env{'query'} = $data[11]; $search_page .= &udmTemplate::print_template(\%template,"res",\%template_env); #print "title:$template_env{title} :url=$template_env{url} \n"; $template_env{'ndoc'}++; } $sth2->finish(); } # q my $prevp = 0; my $nextp = 0; my $mode = 'all'; # Don't understand this variable ??? $template_env{nav} = " "; # putting in html special chars $query =~ s/ /\+/g; $query =~ s/&/%26/g; if ($np > 0){ $prevp = $np - 1; $template_env{nav} = "[< < Prev $ps]   "; } if ($isnext == 1){ $nextp = $np + 1; $template_env{nav} = $template_env{nav} . "[Next $ps_next > >]"; } $search_page .= &udmTemplate::print_template(\%template,"resbot",\%template_env); $search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env); $sth->finish(); $dbh->disconnect(); # It might be better to "dribble" output so that results can be seen # straightaway print $search_page;