java 爬取微信公众号文章 - 搜狗微信搜索

需求描述:参照图,1、2、3、4步骤!

HttpTool工具类、Jsoup maven 依赖参见文章

Jsoup 入门参考

1、获取SNUID ,目的是为了绕过搜狗的验证码页面

// 获取snuid (因为sunid有时间和访问次数限制建议每次自动查询数据时更新一次)

public String getSnuid() {
        CloseableHttpClient httpClient = null;
        CookieStore cookieStore = null;
        String url = "https://www.sogou.com/web?query=333&_asf=www.sogou.com&_ast=1488955851&w=01019900&p=40040100&ie=utf8&from=index-nologin";
        int timeout = 30000;
        String snuid = null;
        try {
            cookieStore = new BasicCookieStore();
            HttpClientContext context = HttpClientContext.create();
            context.setCookieStore(cookieStore);
            RequestConfig globalConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build();
            httpClient = HttpClients.custom().setDefaultRequestConfig(globalConfig).setDefaultCookieStore(cookieStore).build();
            HttpGet httpGet = new HttpGet(url);
            httpGet.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build());
            httpGet.setHeader("Cookie",
                    "ABTEST=0|1488956269|v17;IPLOC=CN3301;SUID=E9DA81B7290B940A0000000058BFAB6D;PHPSESSID=rfrcqafv5v74hbgpt98ah20vf3;SUIR=1488956269");
            httpClient.execute(httpGet);
            for (Cookie c : cookieStore.getCookies()) {
                if (c.getName().equals("SNUID")) {
                    snuid = c.getValue();
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return snuid;
    }
经验分享 程序员 微信小程序 职场和发展