词频统计小程序-WordCount.exe

最近顶哥为了完成学历提升学业中的小作业,做了一个词频统计的.exe小程序。因为当时做的时候网上的比较少,因此顶哥决定把自己拙略的作品发出来给需要的人提供一种思路,希望各位看官不要dis才好。最后附上源码链接,感兴趣的朋友可以继续优化哦。

01

先看效果

双击运行,下拉框选择源文件来源,支持本地和网络资源,如图:

本地源文件

网络源文件

02

主要代码

1.pom文件

<dependencies> <!-- 分词器 --> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> </dependency> <!-- 单元测试 --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <version>2.18.1</version> <configuration> <skipTests>true</skipTests> </configuration> </plugin> <!--打包插件 --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-assembly-plugin</artifactId> <version>2.4.1</version>
<configuration> <!-- get all project dependencies --> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> <!-- MainClass in mainfest make a executable jar --> <archive> <manifest> <addClasspath>true</addClasspath> <mainClass>cn.dintalk.service.WordCount</mainClass> </manifest> </archive> </configuration>
<executions> <execution> <id>make-assembly</id> <!-- bind to the packaging phase --> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> </plugins> </build>

2.WebUtils

/** * @author Mr.song * @date 2019/10/13 9:26 */public class WebUtils {
/** * 根据url和参数发送get请求 * * @param url * @param param * @return 返回网页内容 */ public static String sendGet(String url, String param) { String result = ""; if (param != null) { url = url + "?" + param; } try { URL realUrl = new URL(url); // 打开和URL之间的连接 HttpURLConnection conn = getHttpURLConnection(realUrl); result = getResponse(conn); } catch (Exception e) { e.printStackTrace(); } return result; }

//根据url 获取连接 private static HttpURLConnection getHttpURLConnection(URL realUrl) { StringBuilder sb = new StringBuilder(); sb.append("Mozilla/5.0 (Windows NT 10.0; Win64; x64)"); sb.append(" AppleWrbKit/537.36(KHTML, like Gecko)"); sb.append(" Chrome/72.0.3626.119 Safari/537.36"); HttpURLConnection conn = null; try { // 打开和URL之间的连接 conn = (HttpURLConnection) realUrl.openConnection(); // 设置通用的请求属性 conn.setRequestProperty("accept", "*/*"); conn.setRequestProperty("connection", "Keep-Alive"); conn.setRequestProperty("user-agent", sb.toString()); } catch (IOException e) { e.printStackTrace(); } return conn; }
// 根据url连接获取响应 private static String getResponse(HttpURLConnection conn) { // 读取URL的响应 String result = ""; try (InputStream is = conn.getInputStream(); InputStreamReader isr = new InputStreamReader(is, "utf-8"); BufferedReader in = new BufferedReader(isr)) { String line; while ((line = in.readLine()) != null) { result += "\n" + line; } } catch (Exception e) { System.out.println("Err:getResponse()"); e.printStackTrace(); } finally { conn.disconnect(); }// System.out.println("getResponse():" + result.length()); return result; }
/** * 解析网页为文本 * * @param html * @return */ public static String parseHtmlToText(String html) { Document document = Jsoup.parse(html); return document.text(); }}

3.IKSUtils

/** * @author Mr.song * @date 2019/10/10 21:12 */public class IKSUtils {
/** * 对文本进行分词 * @param text * @return * @throws Exception */ public static List<String> getStringList(String text) throws Exception{ //独立Lucene实现 StringReader re = new StringReader(text); IKSegmenter ik = new IKSegmenter(re, true); Lexeme lex; List<String> s = new ArrayList<>(); while ((lex = ik.next()) != null) { s.add(lex.getLexemeText()); } return s; }
/** * 统计词频 * @param wordList * @return */ public static Map<String,Integer> wordCount(List<String> wordList){ if (wordList == null) return null; Map<String,Integer> result = new HashMap<>(); for (String s : wordList) { Integer count = result.get(s); if (count == null){ result.put(s,1); }else { result.put(s,++count); } } //按照次数排序 result = result .entrySet() .stream() .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2, LinkedHashMap::new)); return result; }}

03

相关链接

源码地址:https://github.com/MrSonghui/wordCount

打包.exe :https://www.cnblogs.com/xiaoMzjm/p/3879766.html

(0)

相关推荐