JAVA之爬虫demo

简介:

HttpClient:相等于python中requests

1.使用方法:(调用jar包的方式)

下载地址:https://hc.apache.org/downloads.cgi

ide 下载地址:https://download-cdn.jetbrains.com/idea/ideaIC-2024.1.3.exe

在项目目录下新建一个lib 文件夹 ,将下载好的jar包解压到该文件夹中

java_1

一 :GET请求

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
package src;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.impl.client.HttpClients;
import java.io.IOException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;

class Demos {
// 创建HttpClient实例 ---公共的方法
public static CloseableHttpClient httpClient = HttpClients.createDefault();
public static void getTest1() throws IOException {
// 创建get请求方法
HttpGet httpGet= new HttpGet("https://baidu.com");
// 获取响应
CloseableHttpResponse response = httpClient.execute(httpGet);
//打印结果
System.out.println(response);
//获取状态码
int status = response.getStatusLine().getStatusCode();
//打印状态码
System.out.println(status);
//解析响应数据
if (status >= 200 && status <300){
HttpEntity entity =response.getEntity();
System.out.println("解析的数据++++++++++++++");
String html = EntityUtils.toString(entity);
System.out.println(html);
}else {
throw new ClientProtocolException(" 状态码错误"+status);
}
}
//
public static void getTest() throws IOException{
HttpGet httpGet = new HttpGet("https://baidu.com");
ResponseHandler<String> responseHandler= new ResponseHandler<String>() {
@Override
public String handleResponse(HttpResponse httpResponse) throws ClientProtocolException, IOException {
// 重写响应获取
int status = httpResponse.getStatusLine().getStatusCode();
if (status >= 200 && status <300){
return EntityUtils.toString(httpResponse.getEntity());
}else {
throw new IOException("响应错误"+status);
}
// return "";
}
};

try{
String resBody=httpClient.execute(httpGet,responseHandler);
System.out.println(resBody);
}catch (IOException e){
e.printStackTrace();
}

}
}
public class demo {
//运行入口
public static void main(String[] args) throws IOException{
Demos.getTest();
Demos.getTest1();


}
}

POST请求

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package src;

import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClients;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

class Demos {
// 创建HttpClient实例 ---公共的方法
public static CloseableHttpClient httpClient = HttpClients.createDefault();


// post 请求
public static void postTest() throws IOException{

HttpPost httpPost= new HttpPost("http://httpbin.org/post");
httpPost.setEntity(new StringEntity(" post???"));
//调用httpClient 实例执行httpPost 实例
CloseableHttpResponse response =httpClient.execute(httpPost);
//获取响应状态码
int status =response.getStatusLine().getStatusCode();
//打印下状态码
System.out.println(status);
if (status >= 200 && status <300){
HttpEntity entity =response.getEntity();
System.out.println("解析的数据++++++++++++++");
String html = EntityUtils.toString(entity);
System.out.println(html);
}else {
throw new ClientProtocolException(" 状态码错误"+status);
}
//释放连接
response.close();
httpClient.close();

}


// post 请求 +加参数
public static void postTest_p() throws IOException{
List<NameValuePair> params= new ArrayList();
params.add(new BasicNameValuePair("column","szse"));
params.add(new BasicNameValuePair("pageNum","1"));
params.add(new BasicNameValuePair("pageSize","30"));
params.add(new BasicNameValuePair("sortName",""));
params.add(new BasicNameValuePair("sortType",""));
params.add(new BasicNameValuePair("plate",""));
params.add(new BasicNameValuePair("tabName","fulltext"));
params.add(new BasicNameValuePair("stock",""));
params.add(new BasicNameValuePair("secid",""));
params.add(new BasicNameValuePair("trade",""));
params.add(new BasicNameValuePair("seDate","2023-12-16~2024-06-16"));
params.add(new BasicNameValuePair("isHLtitle","true"));
//创建实例
HttpPost httpPost= new HttpPost("http://www.cninfo.com.cn/new/hisAnnouncement/query");
//设置头部
httpPost.setHeader("Content-Type","application/x-www-form-urlencoded; charset=UTF-8");
httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36");
// 发送请求 获取响应
httpPost.setEntity(new UrlEncodedFormEntity(params, Consts.UTF_8));
CloseableHttpResponse response=httpClient.execute(httpPost);
//获取响应状态码
int status =response.getStatusLine().getStatusCode();
//打印下状态码
System.out.println(status);
if (status >= 200 && status <300){
String res =EntityUtils.toString(response.getEntity());
System.out.println("解析的数据++++++++++++++");

System.out.println(res);
}else {
throw new ClientProtocolException(" 状态码错误"+status);
}
//释放连接
response.close();
httpClient.close();

}
// post 发送json格式数据
public static void postTest_j()throws IOException{
//创建一个json 数据
String json ="{\"key\":\"value\"}"; // 需要转译
HttpPost httpPost= new HttpPost("http://httpbin.org/post");
httpPost.setEntity(new StringEntity(json));
httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36");

//调用httpClient 实例执行httpPost 实例
CloseableHttpResponse response =httpClient.execute(httpPost);
//获取响应状态码
int status =response.getStatusLine().getStatusCode();
//打印下状态码
System.out.println(status);
if (status >= 200 && status <300){
HttpEntity entity =response.getEntity();
System.out.println("解析的数据++++++++++++++");
String html = EntityUtils.toString(entity);
System.out.println(html);
}else {
throw new ClientProtocolException(" 状态码错误"+status);
}
//释放连接
response.close();
httpClient.close();
}
}

public class demo {
//运行入口
public static void main(String[] args) throws IOException{

// Demos.postTest_p();
Demos.postTest_j();


}
}

一个MD5加密的网站

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
package src;
import org.apache.http.Consts;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;

public class RequestDemo {
public static void main(String[] args)throws IOException {
String md =md5("1");
String md1=md5_l("1");
//== 运算符用于比较对象引用是否相同 ,使用 equals 方法比较两个字符串的内容
if (md.equals(md1)){
System.out.println("ok");
}else {
System.out.println(md);
System.out.println(md1);
}

// getTest_p();
}

// post 请求 +加参数
public static void getTest_p() throws IOException{
String host ="https://api.mytokenapi.com/ticker/currencyranklist";
try {
URIBuilder uriBuilder =new URIBuilder(host);


//获取当前时间戳毫秒级别
long timestamp = System.currentTimeMillis();
String timestampStr = Long.toString(timestamp);
// System.out.println(timestampStr);
//获取时间戳的前六位
String firstSixDigits = timestampStr.substring(0, 6);
//拼接字符串
String result = timestampStr+"9527"+firstSixDigits ;
// System.out.println(result);
//使用md5加密
String code =md5(result);
// System.out.println(code);
uriBuilder.setParameter("pages","2,1")
.setParameter("sizes","100,100")
.setParameter("subject","market_cap")
.setParameter("language","en_US")
.setParameter("legal_currency","USD")
.setParameter("timestamp",timestampStr)
.setParameter("code",code)
.setParameter("platform","web_pc")
.setParameter("v","0.1.0")
.setParameter("international","1");


URI url =uriBuilder.build();
// System.out.println(url);
HttpGet httpGet = new HttpGet(url);
httpGet.setHeader("Content-Type:","application/x-www-form-urlencoded;charset=utf-8");
httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36");
ResponseHandler<String> responseHandler= new ResponseHandler<String>() {
@Override
public String handleResponse(HttpResponse httpResponse) throws ClientProtocolException, IOException {
// 重写响应获取
int status = httpResponse.getStatusLine().getStatusCode();
if (status >= 200 && status <300){
return EntityUtils.toString(httpResponse.getEntity());
}else {
throw new IOException("响应错误"+status);
}
// return "";
}
};
try{
String resBody= Demos.httpClient.execute(httpGet,responseHandler);
System.out.println(resBody);
}catch (IOException e){
e.printStackTrace();
}
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}

}

public static String md5(String text) {
try {
// 获取一个MD5算法的MessageDigest实例
MessageDigest md5 = MessageDigest.getInstance("MD5");

// 计算哈希值,存储在byte数组中
byte[] messageDigest = md5.digest(text.getBytes());

// 将byte数组转换为十六进制字符串
StringBuilder hexString = new StringBuilder();
for (byte b : messageDigest) {
// 将每个字节转换为十六进制字符串
String hex = Integer.toHexString(0xff & b);

// 若长度为1,即只有一位,则在该字符串前面加一个'0'
if (hex.length() == 1) {
hexString.append('0');
}

// 将处理的结果保存到hexString中,最终得到完整的十六进制结果
hexString.append(hex);
}

// 返回处理结果
return hexString.toString();
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
}
public static String md5_l(String text) {
try {
MessageDigest md5 =MessageDigest.getInstance("MD5");
//计算哈希值,存储在byte数组中
byte [] messageDigest =md5.digest(text.getBytes());
// 将byte 转为十六进制字符串
StringBuilder hexString = new StringBuilder();
//遍历messageDigest 字节数组中的没个字节
for (byte b: messageDigest){
// 对每个字节进行位运算和转换,得到对应的十六进制字符串, 0xff & b 是一个位运算,会将b转换为无符号整数(即使是负数),然后将结果转为十六进制字符串
String hex =Integer.toHexString(0xff & b);
//若长度为1,即只有一位,则在该字符串前面加一个‘0’,保证每个字节的十六进制表示都是两位
if (hex.length() ==1){
hexString.append('0');
}
// 将处理的结果保存咋子hexString 中,最终得到完整的十六进制结果
hexString.append(hex);
}
//返回处理结果
return hexString.toString();
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}

}
}