码迷,mamicode.com
首页 > Web开发 > 详细

socket抓取网页

时间:2014-04-29 13:21:20      阅读:506      评论:0      收藏:0      [点我收藏+]

标签:blog   http   os   文件   io   2014   

#include <iostream>
#include <string>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <strings.h>
#include <string.h>
#include <unistd.h>
using namespace std;

void func()
{
	string url;
	cout << "输入网址:" << endl;
	cin >> url;
	
	//根据域名获取ip地址
	struct hostent *website_host = NULL;
	website_host = gethostbyname(url.c_str());
	if (website_host == NULL)
	{
		perror("gethostbyname error");
		exit(-1);
	}
	cout << "主机名称:";
	cout << website_host->h_name << endl;
	cout << "地址类型:";
	cout << website_host->h_addrtype << endl;
	cout << "地址长度:";
	cout << website_host->h_length << endl;
	
	//建立socket描述符
	int sockfd;
	sockfd = socket(AF_INET, SOCK_STREAM, 0);
	if (sockfd == -1)
	{
		perror("socket error");
		exit(-1);
	}
	cout << "建立socket完成" << endl; 
	
	//初始化地址结构
	struct sockaddr_in website_addr;
	bzero((void*)&website_addr, sizeof(website_addr));
	website_addr.sin_family = AF_INET;
	website_addr.sin_port = htons(80);
	website_addr.sin_addr.s_addr = ((struct in_addr *)(website_host->h_addr))->s_addr;
	cout << "地址初始化完成" << endl; 

	//连接
	int ret;
	ret = connect(sockfd, (struct sockaddr*)&website_addr, sizeof(website_addr));
	if (ret == -1)
	{
		perror("connect error");
		exit(-1);
	}
	cout << "连接完成" << endl;
	
	//向80端口发送http头
	char buf[10*1024];
	char addr[100];
	sprintf(buf, "GET / HTTP/1.1\r\n");
	strcat(buf, "Host:");
	strcat(buf,url.c_str());
	strcat(buf, "\r\n");
	strcat(buf, "Accept: */*\r\n");
	strcat(buf, "User-Agent: Mozilla/4.0(compatible)\r\n");
	strcat(buf, "connection:Keep-Alive\r\n");
	strcat(buf, "\r\n\r\n"); 
	cout << "请求头构造完成" << endl;
	cout << buf << endl;
	ret = send(sockfd, buf, strlen(buf), 0);
	cout << "发送完成" << endl;
	cout << "send:\n" << ret << endl;
	
	//打开接收文件
	int fd;
	fd = open("recv.html", O_RDWR);
	if (fd == -1)
	{
		perror("open error");
		exit(-1);
	}
	
	//开始接收
	while(1)
	{
		ret = recv(sockfd, buf, sizeof(buf), 0);
		if (ret == 0)
		{
			cout << "对端关闭" << endl;
			exit(-1);
		}
		if (ret == -1)
		{
			perror("read error");
			exit(-1);
		}
		buf[ret] = 0;
		cout << "recv:" << ret << endl;
		cout << buf << endl;
		write(fd, buf, strlen(buf));
	}
}

int main()
{
	func();
	return 0;
}


mamicode.com,码迷


mamicode.com,码迷


mamicode.com,码迷



socket抓取网页,码迷,mamicode.com

socket抓取网页

标签:blog   http   os   文件   io   2014   

原文地址:http://blog.csdn.net/aspnet_lyc/article/details/24700059

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!