曾经 N 兄想知道某人(这事王晨, 沈建泽, 王彬都知道)的信息, 托我做一个搜索未名全站的程序. 我用SHELL/Perl/Java分别写个—给他的是Java版本. 我现在只剩下Perl版还在.
后来当作练习, 用Perl写了个多线程的, 第二天, 我的 IP 就被禁了, 我还以为未名挂了.
前几天, 还有不认识的人发邮件给我说, 这个有BUG, 让修正, 结果只是线程数设置过高.
现在, 未名有 Google 全站搜索, 这个也没什么用了, 放出来做为一个纪念.
#!/usr/bin/perl -w # Copyright (C) 2007 Changsheng Jiang # Time-stamp: <Changsheng Jiang 2008-06-11 01:07:24> # Author: Changsheng Jiang<jiangzuoyan@gmail.com> # Date: 2007/10/18 # Version: 2007/10/18 # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA. # search articles by author or title from bdwm, use the original www # interface. # any change, please do change the email in the help message! use strict; use IO::Handle; use Text::Iconv; use LWP::UserAgent; use threads; use threads::shared; use Getopt::Long; use URI; my $author:shared=""; my $title:shared=""; my $otitle:shared=""; my $savefile:shared=""; my $verbose:shared = 4; my $savefilelock:shared = 0; my @bbsboards:shared; my $searchtype:shared=""; my $help=0; my $nths=1; sbbs_help(1) unless GetOptions( "help|h"=>\$help, "verbose=i"=>\$verbose, "savefile=s"=>\$savefile, "author=s"=>\$author, "title=s"=>\$title, "otitle=s"=>\$otitle, "nths=i" =>\$nths ); sub sbbs_help { print STDERR "searchbbs options, ...\n", " search articles by author or title at bdwm\n", "options:\n", " --author author: name to search\n", " --title title: search by title\n", " --otitle otitle: search use go=O\n", " --savefile filename: save file name\n", " --nths num: # of threads, 1-40, default $nths\n", "No warranty!\n", "Contact: Sheos <jiangzuoyan\@gmail.com>, -:)\n"; exit $_[0] if $_[0] > 0; } if ($help) { sbbs_help(1); } if(length($savefile)){ open(SAVEFILE, ">".$savefile) or die "cann't open file $savefile\n"; select(SAVEFILE); SAVEFILE->autoflush(1); } # bs_content(board, conent) sub bs_content { my $board = shift; my $cont = shift; if ($cont =~ m{(<tr><td class=body1>.*</a>)</table>}s) { my $cnt = $1; $cnt =~ s/[\t\r]//sg; $cnt =~ s/\n\n+/\n/sg; print STDERR "==found $board:", length($cnt), "\n" unless $verbose < 2; { lock($savefilelock); print "<tr bgcolor=lightgreen><td colspan=5>Board ", "<a href=\"http://bbs.pku.edu.cn/bbs/bbsdoc.php?board=$board\">$board</a>\n", $cnt, "\n"; } } } sub getsearch { my $c = shift; my $board; my $ua = LWP::UserAgent->new; my $url = URI->new("http://162.105.204.150/bbs/bbssearch.php"); my $go = $searchtype eq 'author' ? 'W' : $searchtype eq 'title' ? 'T' : 'O'; my $to = "$author$title$otitle"; while (1) { { lock(@bbsboards); $board = shift @bbsboards; } return unless $board; $url->query_form('board'=>$board, "go"=>$go, "to"=>$to); my $req=HTTP::Request->new('GET', $url); my $res=$ua->request($req); bs_content($board, $res->content) if $res->is_success; # select(undef, undef, undef, 0.001);# sleep print STDERR "[$c]$board\n" unless $verbose < 4; } } sub get_boards { my $ua = LWP::UserAgent->new; my $req = HTTP::Request->new(GET => "http://162.105.204.150/bbs/bbsall.php"); my $res = $ua->request($req); die $res->status_line unless $res->is_success; @bbsboards = $res->content=~ m{href="/bbs/bbsdoc.php\?board=([^"]+)"}g; @bbsboards = sort(@bbsboards); } ## main $searchtype = 'author' if length($author); $searchtype = 'title' if length($title); $searchtype = 'otitle' if length($otitle); my $LANG=$ENV{'LANG'}; $LANG =~ s/^.*\.//; if ($LANG) { my $cnv = Text::Iconv->new($LANG, "gbk"); $title = $cnv->convert($title); $otitle = $cnv->convert($otitle); } sbbs_help(1) unless $searchtype; print <<EOF; <html><head> <meta http-equiv="Content-Type" content="text/html; charset=gbk"> <title>Search all $searchtype:$author$title$otitle</title> <base href="http://bbs.pku.edu.cn/" /> <link rel="stylesheet" type="text/css" href="/bbstyle.php"> </head><body bgproperties="fixed"> <center>All $searchtype:$author$title$otitle<hr /> <table class=body> <tr><th class=body>\#<th class=body>S<th class=body>Author<th class=body>Date<th class=body>Title EOF get_boards; my @sthrs; $nths = 1 if $nths < 1; $nths = 40 if $nths > 40; for (my $i=0; $i<$nths; ++$i) { push @sthrs, threads->new(\&getsearch, $i); } foreach my $thr(@sthrs) { $thr->join; } print <<EOF; </table> </center><hr />style, :-) </body></html> EOF
No comments:
Post a Comment