挖金子---小爬虫

sunhj000java

浏览: 124747 次
性别:
来自: 北京

最近访客更多访客>>

zjy_369

ecjtubaowp

sgamin1001

suncong1024

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Java基础

Java PHP IE 多线程 quartz

马萨玛索（http://www.masamaso.com/index.shtml）每天10点都会推出一折商品5件，就是秒购。男装质量还不错，所以就经常去抢，感觉手动太慢了，就写了一个小爬虫程序，让自己去爬，如果是金子页面（免费商品）就会自动打开，我就可以抢到了。和大家分享一下。这个应该不算广告吧，之所以给链接和网站名是想着便于各位感兴趣看官测试，如果管理员觉得不妥，请通知哈，我再修改，不要直接封我哈，谢了。

思路：
1. 把所有想要的商品的链接读到程序中。
2. 分别打开每一个链接读取源代码
3. 验证是否是金子商品（源代码中含有free_msg字符串）
4. 如果是金子就把该链接用IE打开

源代码：
读链接文件：

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.List;

/**
 * @author Administrator
 *
 */
public class FileReader {
	private String fileName;
	
	public FileReader() {
	}
	
	public FileReader(String fileName) {
		this.fileName = fileName;
	}
	
	/**
	 * 读取链接，返回一个List
	 * @return
	 */
	public List<String> getLines() {
		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new InputStreamReader(new FileInputStream(this.fileName)));
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		}
		
		List<String> lines = new LinkedList<String>();
		String line = null;
		try {
			while ( (line = reader.readLine()) != null) {
			    lines.add(line);
			}
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				reader.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		
		return lines;
	}
}

URL类：

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

/**
 * @author Administrator
 *
 */
public class Url {
	private String url;
	
	public Url() {
	}
	
	public Url(String url) {
		this.url = url;
	}

	public String getUrl() {
		return url;
	}

	public void setUrl(String url) {
		this.url = url;
	}
	
	/**
	 * 获得链接
	 * @return
	 */
	public URLConnection getConnection() {
		URL httpUrl = null;
		try {
			httpUrl = new URL(url);
		} catch (MalformedURLException e) {
			e.printStackTrace();
		}
		
		URLConnection conn = null;
		
		if(httpUrl != null) {
			try {
				conn = httpUrl.openConnection();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		
		return conn;
	}
	
	/**
	 * 获得链接上的输出流
	 * @return
	 */
	public BufferedReader getReader() {
		URLConnection conn = getConnection();
		BufferedReader br = null;
		if(conn == null) {
			return null;
		}
		conn.setConnectTimeout(9000);
		try {
			conn.connect();
			br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
		} catch (IOException e) {
			e.printStackTrace();
			return null;
		}
		
		return br;
	}
	
	/**
	 * 从输出流中一行一行读取文件，查看是否含有str字符串，有就返回真
	 * @param str
	 * @return
	 */
	public boolean isExist(String str) {
		BufferedReader bis = getReader();
		boolean exist = false;
		
		String line = null;
		try {
			while ( (line = bis.readLine()) != null) {
			    exist = line.contains(str);
			    if(exist) {
			    	break;
			    }
			}
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				bis.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		return exist;
	}
}

Digger类:

import java.io.IOException;
import java.util.List;

/**
 * @author Administrator
 *
 */
public class Digger extends Thread{
	private Url url;
	
	public Digger() {
		super();
	}
	
	public Digger(Url url) {
		this.url = url;
	}
	
	/**
	 * main方法，把配置信息（链接）读入程序，并为每一个链接开启一个线程
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws IOException {
		FileReader reader = new FileReader("F:/myworkspace/workspace/diggold/src/url.txt");
		List<String> urls = reader.getLines();
		
		for (String string : urls) {
			Url url = new Url(string);
			Digger digger = new Digger(url);
			digger.start();
		}
//		Runtime.getRuntime().exec("C:/Program   Files/Internet   Explorer/iexplore.exe   http://www.masamaso.com/index.shtml");
	}

	/**
	 * 查看该链接是否存在free_msg字段，存在即为金子 用IE打开该链接
	 */
	@Override
	public void run() {
		if(url.isExist("free_msg")) {
			try {
				Runtime.getRuntime().exec("C:/Program   Files/Internet   Explorer/iexplore.exe   " + url.getUrl());
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		System.out.println(url.getUrl() + "END!");
	}
}

url.txt配置文件

http://www.masamaso.com/goods.php?id=3128
http://www.masamaso.com/goods.php?id=3132
http://www.masamaso.com/goods.php?id=3120

写的比较简单，但是挺实用，各位看官莫笑话哈。

diggold.rar (7.5 KB)
下载次数: 299

分享到：

封装json ajax函数 | 妙用javascript:void(0)

2010-01-16 10:38
浏览 784
评论(13)
论坛回复 / 浏览 (13 / 7771)
分类:编程语言
查看更多

13 楼 james112496 2010-01-19

我运行了一下IE也没打开？

12 楼 gelnyang 2010-01-19

我之前也写了一个类似的，一直想找时间优化一下，没来得时间弄。楼主可以参考一下，欢迎提出意见：http://code.google.com/p/seawind

11 楼 ajonjun 2010-01-19

好东西，拿来研究哈，谢。

10 楼 sunhj000java 2010-01-19

CZy5168 写道

我运行了你的代码，虽然把商品的链接读到程序中，但是为什么IE没有打开链接地址呢。。。
而且链接加多了一条件：
http://www.masamaso.com/goods.php?id=3128&free_msg=1
http://www.masamaso.com/goods.php?id=3132
http://www.masamaso.com/goods.php?id=3120
菜鸟肯请赐教！！！

有可能就是没有哦，你可以把链接打开看看。10点以后在运行，10点钱肯定没有的

9 楼 CZy5168 2010-01-18

8 楼 joohnnie 2010-01-18

用quartz来定时任务

7 楼 sunhj000java 2010-01-18

li445970924 写道

是不是要每天 10天之前把程序跑起来

呵呵，你掐点撒

6 楼 li445970924 2010-01-18

是不是要每天 10天之前把程序跑起来

5 楼 sunhj000java 2010-01-18

mp_juan 写道

建议楼主采用多线程机制实现和url自动分析机制，这样的话可以大大提高效率

现在就是多线程的，每个链接就是一个线程。

4 楼 mp_juan 2010-01-18

建议楼主采用多线程机制实现和url自动分析机制，这样的话可以大大提高效率

3 楼 sunhj000java 2010-01-18

gundumw100 写道

这里的配置文件里的链接，比如：http://www.masamaso.com/goods.php?id=3132
难道一个一个写进去？你怎么知道id=3132？

就是一个一个拷进去的，喜欢哪个就把那个给拷进去。

2 楼 gundumw100 2010-01-18

这里的配置文件里的链接，比如：http://www.masamaso.com/goods.php?id=3132
难道一个一个写进去？你怎么知道id=3132？

1 楼 whaosoft 2010-01-18

真惭愧我对这个一点没有研究唉

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论