java 爬取微信公众号文章 - 搜狗微信搜索
需求描述:参照图,1、2、3、4步骤!
HttpTool工具类、Jsoup maven 依赖参见文章
Jsoup 入门参考
1、获取SNUID ,目的是为了绕过搜狗的验证码页面
// 获取snuid (因为sunid有时间和访问次数限制建议每次自动查询数据时更新一次)
public String getSnuid() { CloseableHttpClient httpClient = null; CookieStore cookieStore = null; String url = "https://www.sogou.com/web?query=333&_asf=www.sogou.com&_ast=1488955851&w=01019900&p=40040100&ie=utf8&from=index-nologin"; int timeout = 30000; String snuid = null; try { cookieStore = new BasicCookieStore(); HttpClientContext context = HttpClientContext.create(); context.setCookieStore(cookieStore); RequestConfig globalConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build(); httpClient = HttpClients.custom().setDefaultRequestConfig(globalConfig).setDefaultCookieStore(cookieStore).build(); HttpGet httpGet = new HttpGet(url); httpGet.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build()); httpGet.setHeader("Cookie", "ABTEST=0|1488956269|v17;IPLOC=CN3301;SUID=E9DA81B7290B940A0000000058BFAB6D;PHPSESSID=rfrcqafv5v74hbgpt98ah20vf3;SUIR=1488956269"); httpClient.execute(httpGet); for (Cookie c : cookieStore.getCookies()) { if (c.getName().equals("SNUID")) { snuid = c.getValue(); } } } catch (Exception e) { e.printStackTrace(); } return snuid; }