/* * Copyright (c) 2018-2028, DreamLu All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * Neither the name of the dreamlu.net developer nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * Author: DreamLu 卢春梦 (596392912@qq.com) */ package org.springblade.core.http; import org.jsoup.helper.DataUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.parser.Parser; import org.jsoup.select.Elements; import org.springblade.core.tool.utils.Exceptions; import org.springframework.cglib.proxy.Enhancer; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; /** * 爬虫 xml 转 bean 基于 jsoup * * @author L.cm */ public class DomMapper { /** * Returns body to jsoup Document. * * @return Document */ public static Document asDocument(ResponseSpec response) { return readDocument(response.asString()); } /** * 将流读取为 jsoup Document * * @param inputStream InputStream * @return Document */ public static Document readDocument(InputStream inputStream) { try { return DataUtil.load(inputStream, StandardCharsets.UTF_8.name(), ""); } catch (IOException e) { throw Exceptions.unchecked(e); } } /** * 将 html 字符串读取为 jsoup Document * * @param html String * @return Document */ public static Document readDocument(String html) { return Parser.parse(html, ""); } /** * 读取 xml 信息为 java Bean * * @param inputStream InputStream * @param clazz bean Class * @param 泛型 * @return 对象 */ public static T readValue(InputStream inputStream, final Class clazz) { return readValue(readDocument(inputStream), clazz); } /** * 读取 xml 信息为 java Bean * * @param html html String * @param clazz bean Class * @param 泛型 * @return 对象 */ public static T readValue(String html, final Class clazz) { return readValue(readDocument(html), clazz); } /** * 读取 xml 信息为 java Bean * * @param doc xml element * @param clazz bean Class * @param 泛型 * @return 对象 */ @SuppressWarnings("unchecked") public static T readValue(final Element doc, final Class clazz) { Enhancer enhancer = new Enhancer(); enhancer.setSuperclass(clazz); enhancer.setUseCache(true); enhancer.setCallback(new CssQueryMethodInterceptor(clazz, doc)); return (T) enhancer.create(); } /** * 读取 xml 信息为 java Bean * * @param 泛型 * @param inputStream InputStream * @param clazz bean Class * @return 对象 */ public static List readList(InputStream inputStream, final Class clazz) { return readList(readDocument(inputStream), clazz); } /** * 读取 xml 信息为 java Bean * * @param 泛型 * @param html html String * @param clazz bean Class * @return 对象 */ public static List readList(String html, final Class clazz) { return readList(readDocument(html), clazz); } /** * 读取 xml 信息为 java Bean * * @param doc xml element * @param clazz bean Class * @param 泛型 * @return 对象列表 */ public static List readList(Element doc, Class clazz) { CssQuery annotation = clazz.getAnnotation(CssQuery.class); if (annotation == null) { throw new IllegalArgumentException("DomMapper readList " + clazz + " mast has annotation @CssQuery."); } String cssQueryValue = annotation.value(); Elements elements = doc.select(cssQueryValue); List valueList = new ArrayList<>(); for (Element element : elements) { valueList.add(readValue(element, clazz)); } return valueList; } }