--- /usr/local/opt/twiki/lib/TWiki/Search.pm 2003-01-04 17:36:46.000000000 -0800 +++ Search.pm 2005-06-24 21:46:20.000000000 -0700 @@ -204,27 +204,8 @@ } } - # Construct command line with 'ls' and 'grep. Note that 'ls' does not - # need to be locale-aware as long as it does not transform filenames - - # all results are sorted by Perl 'sort'. However, 'grep' must use - # locales if needed, for case-insensitive searching. - my $cmd = ""; - if( $theScope eq "topic" ) { - $cmd = "$TWiki::lsCmd %FILES% | %GREP% %SWITCHES% -- $TWiki::cmdQuote%TOKEN%$TWiki::cmdQuote"; - } else { - $cmd = "%GREP% %SWITCHES% -l -- $TWiki::cmdQuote%TOKEN%$TWiki::cmdQuote %FILES%"; - } - - if( $caseSensitive ) { - $tempVal = ""; - } else { - $tempVal = "-i"; - } - $cmd =~ s/%SWITCHES%/$tempVal/go; - my @tokens; if( $theRegex ) { - $tempVal = $TWiki::egrepCmd; @tokens = split( /;/, $theSearchVal ); if( $theScope eq "topic" ) { # Fix for Codev.CantAnchorSearchREToEnd @@ -232,10 +213,8 @@ } } else { - $tempVal = $TWiki::fgrepCmd; @tokens = $theSearchVal; } - $cmd =~ s/%GREP%/$tempVal/go; # write log entry if( ( $TWiki::doLogTopicSearch ) && ( ! $doInline ) ) { @@ -275,34 +254,12 @@ # 0501 kjk : vvv New var for accessing web dirs. my $sDir = "$TWiki::dataDir/$thisWebName"; - my @topicList = ""; + my @topicList = (); if( $theSearchVal ) { # do grep search chdir( "$sDir" ); _traceExec( "chdir to $sDir", "" ); - @topicList = ( "*.txt" ); - foreach my $token ( @tokens ) { - my $acmd = $cmd; - $acmd =~ s/%TOKEN%/$token/o; - $acmd =~ s/%FILES%/@topicList/; - $acmd =~ /(.*)/; - $acmd = "$1"; # untaint variable (NOTE: Needs a better check!) - $tempVal = `$acmd`; - _traceExec( $acmd, $tempVal ); - @topicList = split( /\n/, $tempVal ); - last if( ! @topicList ); - } - # cut .txt extension - my @tmpList = map { /(.*)\.txt$/; $_ = $1; } @topicList; - @topicList = (); - my $lastTopic = ""; - foreach( @tmpList ) { - $tempVal = $_; - # make topic unique - if( $tempVal ne $lastTopic ) { - push @topicList, $tempVal; - } - } + @topicList = &getMatchingTopics( $theScope, $caseSensitive, $theRegex, \@tokens ); } next if ( $noEmpty && ! @topicList ); # Nothing to show for this topic @@ -756,6 +713,79 @@ } #========================= +sub getMatchingTopics +{ + my $theScope = $_[0]; + my $caseSensitive = $_[1]; + my $theRegex = $_[2]; + my $tokenRef = $_[3]; + + my @tokens = @$tokenRef; + my $tokenCount = @tokens; + my $tokenStrings = join( ',', @tokens ); + + # Construct command line with 'ls' and 'grep. Note that 'ls' does not + # need to be locale-aware as long as it does not transform filenames - + # all results are sorted by Perl 'sort'. However, 'grep' must use + # locales if needed, for case-insensitive searching. + my $cmd = ""; + my $fileGlob = "*.txt"; + my $switches = ( $caseSensitive ) ? "" : "-i"; + my $grepCmd = ( $theRegex ) ? $TWiki::egrepCmd : $TWiki::fgrepCmd; + if( $theScope eq "topic" ) { + $cmd = "find . -name '$fileGlob' -print | $grepCmd $switches -- $TWiki::cmdQuote%TOKEN%$TWiki::cmdQuote"; + } else { + $cmd = "$grepCmd -r --include='$fileGlob' $switches -l -- $TWiki::cmdQuote%TOKEN%$TWiki::cmdQuote . "; + } + ##TWiki::writeDebug( "getMatchingTopics(): theScope=$theScope; caseSensitive=$caseSensitive; theRegex=$theRegex; tokenCount=$tokenCount; tokens=$tokenStrings; cmd=$cmd" ); + + # If the number of tokens is greater + # than one, this routine repeatly searches over all documents and + # successively finds the intersection of documents from + # each iteration. Most of the time, the number of tokens is 1. + + # Get the list of pages that meet all the criteria in @tokens + my %fileList = (); + my $iterationCounter = 0; + foreach my $token ( @tokens ) { + my $acmd = $cmd; + $acmd =~ s/%TOKEN%/$token/o; + $acmd =~ /(.*)/; + $acmd = "$1"; # untaint variable (NOTE: Needs a better check!) + my $tempVal = `$acmd`; + _traceExec( $acmd, $tempVal ); + my @fileList = split( /\n/, $tempVal ); + + # Find successive intersections. + if ($iterationCounter == 0) { + # Initialize the hash map. + foreach my $e (@fileList) { + $fileList{$e} = 1; + } + } else { + # Find the intersection of successive lists. + my %common = (); + foreach my $e (@fileList) { + if ( $fileList{$e} ) { + $common{$e} = 1; + } + } + %fileList = %common; + } + $iterationCounter++; + last if( ! %fileList ); + } + my @fileList = keys %fileList; + my $fileListCount = scalar @fileList; + ##TWiki::writeDebug( "getMatchingTopics(): iterationCounter=$iterationCounter; count=$fileListCount" ); + + # cut "./" prefix and the ".txt" extension + my @topicList = map { /^\.\/(.*)\.txt$/; $_ = $1; } @fileList; + + return @topicList; +} + +#========================= sub getTextPattern { my( $theText, $thePattern ) = @_;