JAVA之爬虫demo
简介:
HttpClient:相等于python中requests
1.使用方法:(调用jar包的方式)
下载地址:https://hc.apache.org/downloads.cgi
ide 下载地址:https://download-cdn.jetbrains.com/idea/ideaIC-2024.1.3.exe
在项目目录下新建一个lib 文件夹 ,将下载好的jar包解压到该文件夹中

一 :GET请求
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
| package src;
import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.ResponseHandler; import org.apache.http.impl.client.HttpClients; import java.io.IOException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.util.EntityUtils;
class Demos { public static CloseableHttpClient httpClient = HttpClients.createDefault(); public static void getTest1() throws IOException { HttpGet httpGet= new HttpGet("https://baidu.com"); CloseableHttpResponse response = httpClient.execute(httpGet); System.out.println(response); int status = response.getStatusLine().getStatusCode(); System.out.println(status); if (status >= 200 && status <300){ HttpEntity entity =response.getEntity(); System.out.println("解析的数据++++++++++++++"); String html = EntityUtils.toString(entity); System.out.println(html); }else { throw new ClientProtocolException(" 状态码错误"+status); } } public static void getTest() throws IOException{ HttpGet httpGet = new HttpGet("https://baidu.com"); ResponseHandler<String> responseHandler= new ResponseHandler<String>() { @Override public String handleResponse(HttpResponse httpResponse) throws ClientProtocolException, IOException { int status = httpResponse.getStatusLine().getStatusCode(); if (status >= 200 && status <300){ return EntityUtils.toString(httpResponse.getEntity()); }else { throw new IOException("响应错误"+status); }
} };
try{ String resBody=httpClient.execute(httpGet,responseHandler); System.out.println(resBody); }catch (IOException e){ e.printStackTrace(); }
} } public class demo { public static void main(String[] args) throws IOException{ Demos.getTest(); Demos.getTest1();
} }
|
POST请求
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
| package src;
import org.apache.http.Consts; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.ResponseHandler; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.utils.URIBuilder; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.HttpClients; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils;
class Demos { public static CloseableHttpClient httpClient = HttpClients.createDefault();
public static void postTest() throws IOException{
HttpPost httpPost= new HttpPost("http://httpbin.org/post"); httpPost.setEntity(new StringEntity(" post???")); CloseableHttpResponse response =httpClient.execute(httpPost); int status =response.getStatusLine().getStatusCode(); System.out.println(status); if (status >= 200 && status <300){ HttpEntity entity =response.getEntity(); System.out.println("解析的数据++++++++++++++"); String html = EntityUtils.toString(entity); System.out.println(html); }else { throw new ClientProtocolException(" 状态码错误"+status); } response.close(); httpClient.close();
}
public static void postTest_p() throws IOException{ List<NameValuePair> params= new ArrayList(); params.add(new BasicNameValuePair("column","szse")); params.add(new BasicNameValuePair("pageNum","1")); params.add(new BasicNameValuePair("pageSize","30")); params.add(new BasicNameValuePair("sortName","")); params.add(new BasicNameValuePair("sortType","")); params.add(new BasicNameValuePair("plate","")); params.add(new BasicNameValuePair("tabName","fulltext")); params.add(new BasicNameValuePair("stock","")); params.add(new BasicNameValuePair("secid","")); params.add(new BasicNameValuePair("trade","")); params.add(new BasicNameValuePair("seDate","2023-12-16~2024-06-16")); params.add(new BasicNameValuePair("isHLtitle","true")); HttpPost httpPost= new HttpPost("http://www.cninfo.com.cn/new/hisAnnouncement/query"); httpPost.setHeader("Content-Type","application/x-www-form-urlencoded; charset=UTF-8"); httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"); httpPost.setEntity(new UrlEncodedFormEntity(params, Consts.UTF_8)); CloseableHttpResponse response=httpClient.execute(httpPost); int status =response.getStatusLine().getStatusCode(); System.out.println(status); if (status >= 200 && status <300){ String res =EntityUtils.toString(response.getEntity()); System.out.println("解析的数据++++++++++++++");
System.out.println(res); }else { throw new ClientProtocolException(" 状态码错误"+status); } response.close(); httpClient.close();
} public static void postTest_j()throws IOException{ String json ="{\"key\":\"value\"}"; HttpPost httpPost= new HttpPost("http://httpbin.org/post"); httpPost.setEntity(new StringEntity(json)); httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36");
CloseableHttpResponse response =httpClient.execute(httpPost); int status =response.getStatusLine().getStatusCode(); System.out.println(status); if (status >= 200 && status <300){ HttpEntity entity =response.getEntity(); System.out.println("解析的数据++++++++++++++"); String html = EntityUtils.toString(entity); System.out.println(html); }else { throw new ClientProtocolException(" 状态码错误"+status); } response.close(); httpClient.close(); } }
public class demo { public static void main(String[] args) throws IOException{
Demos.postTest_j();
} }
|
一个MD5加密的网站
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
| package src; import org.apache.http.Consts; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.ResponseHandler; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.utils.URIBuilder; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils;
import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.time.Instant; import java.util.ArrayList; import java.util.List;
public class RequestDemo { public static void main(String[] args)throws IOException { String md =md5("1"); String md1=md5_l("1"); if (md.equals(md1)){ System.out.println("ok"); }else { System.out.println(md); System.out.println(md1); }
}
public static void getTest_p() throws IOException{ String host ="https://api.mytokenapi.com/ticker/currencyranklist"; try { URIBuilder uriBuilder =new URIBuilder(host);
long timestamp = System.currentTimeMillis(); String timestampStr = Long.toString(timestamp);
String firstSixDigits = timestampStr.substring(0, 6); String result = timestampStr+"9527"+firstSixDigits ;
String code =md5(result);
uriBuilder.setParameter("pages","2,1") .setParameter("sizes","100,100") .setParameter("subject","market_cap") .setParameter("language","en_US") .setParameter("legal_currency","USD") .setParameter("timestamp",timestampStr) .setParameter("code",code) .setParameter("platform","web_pc") .setParameter("v","0.1.0") .setParameter("international","1");
URI url =uriBuilder.build();
HttpGet httpGet = new HttpGet(url); httpGet.setHeader("Content-Type:","application/x-www-form-urlencoded;charset=utf-8"); httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"); ResponseHandler<String> responseHandler= new ResponseHandler<String>() { @Override public String handleResponse(HttpResponse httpResponse) throws ClientProtocolException, IOException { int status = httpResponse.getStatusLine().getStatusCode(); if (status >= 200 && status <300){ return EntityUtils.toString(httpResponse.getEntity()); }else { throw new IOException("响应错误"+status); }
} }; try{ String resBody= Demos.httpClient.execute(httpGet,responseHandler); System.out.println(resBody); }catch (IOException e){ e.printStackTrace(); } } catch (URISyntaxException e) { throw new RuntimeException(e); }
}
public static String md5(String text) { try { MessageDigest md5 = MessageDigest.getInstance("MD5");
byte[] messageDigest = md5.digest(text.getBytes());
StringBuilder hexString = new StringBuilder(); for (byte b : messageDigest) { String hex = Integer.toHexString(0xff & b);
if (hex.length() == 1) { hexString.append('0'); }
hexString.append(hex); }
return hexString.toString(); } catch (NoSuchAlgorithmException e) { throw new RuntimeException(e); } } public static String md5_l(String text) { try { MessageDigest md5 =MessageDigest.getInstance("MD5"); byte [] messageDigest =md5.digest(text.getBytes()); StringBuilder hexString = new StringBuilder(); for (byte b: messageDigest){ String hex =Integer.toHexString(0xff & b); if (hex.length() ==1){ hexString.append('0'); } hexString.append(hex); } return hexString.toString(); } catch (NoSuchAlgorithmException e) { throw new RuntimeException(e); }
} }
|