曾经 N 兄想知道某人(这事王晨, 沈建泽, 王彬都知道)的信息, 托我做一个搜索未名全站的程序. 我用SHELL/Perl/Java分别写个—给他的是Java版本. 我现在只剩下Perl版还在.
后来当作练习, 用Perl写了个多线程的, 第二天, 我的 IP 就被禁了, 我还以为未名挂了.
前几天, 还有不认识的人发邮件给我说, 这个有BUG, 让修正, 结果只是线程数设置过高.
现在, 未名有 Google 全站搜索, 这个也没什么用了, 放出来做为一个纪念.
#!/usr/bin/perl -w
# Copyright (C) 2007 Changsheng Jiang
# Time-stamp: <Changsheng Jiang 2008-06-11 01:07:24>
# Author: Changsheng Jiang<jiangzuoyan@gmail.com>
# Date: 2007/10/18
# Version: 2007/10/18
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# search articles by author or title from bdwm, use the original www
# interface.
# any change, please do change the email in the help message!
use strict;
use IO::Handle;
use Text::Iconv;
use LWP::UserAgent;
use threads;
use threads::shared;
use Getopt::Long;
use URI;
my $author:shared="";
my $title:shared="";
my $otitle:shared="";
my $savefile:shared="";
my $verbose:shared = 4;
my $savefilelock:shared = 0;
my @bbsboards:shared;
my $searchtype:shared="";
my $help=0;
my $nths=1;
sbbs_help(1) unless GetOptions(
"help|h"=>\$help,
"verbose=i"=>\$verbose,
"savefile=s"=>\$savefile,
"author=s"=>\$author,
"title=s"=>\$title,
"otitle=s"=>\$otitle,
"nths=i" =>\$nths
);
sub sbbs_help
{
print STDERR "searchbbs options, ...\n",
" search articles by author or title at bdwm\n",
"options:\n",
" --author author: name to search\n",
" --title title: search by title\n",
" --otitle otitle: search use go=O\n",
" --savefile filename: save file name\n",
" --nths num: # of threads, 1-40, default $nths\n",
"No warranty!\n",
"Contact: Sheos <jiangzuoyan\@gmail.com>, -:)\n";
exit $_[0] if $_[0] > 0;
}
if ($help) {
sbbs_help(1);
}
if(length($savefile)){
open(SAVEFILE, ">".$savefile) or die "cann't open file $savefile\n";
select(SAVEFILE);
SAVEFILE->autoflush(1);
}
# bs_content(board, conent)
sub bs_content
{
my $board = shift;
my $cont = shift;
if ($cont =~ m{(<tr><td class=body1>.*</a>)</table>}s) {
my $cnt = $1;
$cnt =~ s/[\t\r]//sg;
$cnt =~ s/\n\n+/\n/sg;
print STDERR "==found $board:", length($cnt), "\n" unless $verbose < 2;
{
lock($savefilelock);
print "<tr bgcolor=lightgreen><td colspan=5>Board ",
"<a href=\"http://bbs.pku.edu.cn/bbs/bbsdoc.php?board=$board\">$board</a>\n",
$cnt,
"\n";
}
}
}
sub getsearch
{
my $c = shift;
my $board;
my $ua = LWP::UserAgent->new;
my $url = URI->new("http://162.105.204.150/bbs/bbssearch.php");
my $go = $searchtype eq 'author' ? 'W' :
$searchtype eq 'title' ? 'T' : 'O';
my $to = "$author$title$otitle";
while (1) {
{
lock(@bbsboards);
$board = shift @bbsboards;
}
return unless $board;
$url->query_form('board'=>$board, "go"=>$go, "to"=>$to);
my $req=HTTP::Request->new('GET', $url);
my $res=$ua->request($req);
bs_content($board, $res->content) if $res->is_success;
# select(undef, undef, undef, 0.001);# sleep
print STDERR "[$c]$board\n" unless $verbose < 4;
}
}
sub get_boards
{
my $ua = LWP::UserAgent->new;
my $req = HTTP::Request->new(GET => "http://162.105.204.150/bbs/bbsall.php");
my $res = $ua->request($req);
die $res->status_line unless $res->is_success;
@bbsboards = $res->content=~ m{href="/bbs/bbsdoc.php\?board=([^"]+)"}g;
@bbsboards = sort(@bbsboards);
}
## main
$searchtype = 'author' if length($author);
$searchtype = 'title' if length($title);
$searchtype = 'otitle' if length($otitle);
my $LANG=$ENV{'LANG'};
$LANG =~ s/^.*\.//;
if ($LANG) {
my $cnv = Text::Iconv->new($LANG, "gbk");
$title = $cnv->convert($title);
$otitle = $cnv->convert($otitle);
}
sbbs_help(1) unless $searchtype;
print <<EOF;
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=gbk">
<title>Search all $searchtype:$author$title$otitle</title>
<base href="http://bbs.pku.edu.cn/" />
<link rel="stylesheet" type="text/css" href="/bbstyle.php">
</head><body bgproperties="fixed">
<center>All $searchtype:$author$title$otitle<hr />
<table class=body>
<tr><th class=body>\#<th class=body>S<th class=body>Author<th class=body>Date<th class=body>Title
EOF
get_boards;
my @sthrs;
$nths = 1 if $nths < 1;
$nths = 40 if $nths > 40;
for (my $i=0; $i<$nths; ++$i) {
push @sthrs, threads->new(\&getsearch, $i);
}
foreach my $thr(@sthrs) {
$thr->join;
}
print <<EOF;
</table>
</center><hr />style, :-)
</body></html>
EOF
No comments:
Post a Comment