码迷,mamicode.com
首页 > 其他好文 > 详细

遍历文件,查找文件下的汉字,并将汉字生成csv文件

时间:2020-07-22 01:38:34      阅读:98      评论:0      收藏:0      [点我收藏+]

标签:spl   linu   分隔符   amp   chinese   byte   content   bytes   ash   

package com.shine.eiuop.utils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.itextpdf.text.pdf.PdfStructTreeController.returnType;
import com.shine.framework.commutil.typewrap.EDto;

/**
* title: 清除注释
*
* @author 
* @时间 
*/
public class FileCopyChineseUtils {

/** 根目录 */
public static String rootDir = "C:\\Users\\14423\\Desktop\\亚强\\msp中文翻译\\msp2\\msp\\WebRoot";

public static void main(String args[]) throws Exception {
dofind(rootDir);
}

public static void dofind(String rootDir) throws Exception {
String alltmSr = deepDir(rootDir);

System.out.println(alltmSr);
String[] stringArrStrings = alltmSr.toString().split("\\r\\n");
String file_path = "D:\\SHINE_ROOT\\mspChinese.csv";
String file_name = "mspChinese.csv";
writeDataToCsvFile1(file_path,file_name,stringArrStrings);
}

public static String deepDir(String rootDir) throws Exception {
String string = "";
File folder = new File(rootDir);
StringBuilder alltmSr = new StringBuilder();
if (folder.isDirectory()) {
String[] files = folder.list();

for (int i = 0; i < files.length; i++) {
File file = new File(folder, files[i]);
if (file.isDirectory() && file.isHidden() == false) {
alltmSr.append(deepDir(file.getPath()));
} else if (file.isFile()) {
alltmSr.append(writeComment(file.getPath()));
}
}
} else if (folder.isFile()) {
alltmSr.append(writeComment(folder.getPath()));
}
return alltmSr.toString();
}

/**
* @param currentDir
* 当前目录
* @param currentFileName
* 当前文件名
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
*/
/**
* @param filePathAndName
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
*/
public static String writeComment(String filePathAndName)
throws FileNotFoundException, UnsupportedEncodingException {
StringBuffer buffer = new StringBuffer();
String line = null; // 用来保存每行读取的内容
InputStream is = new FileInputStream(filePathAndName);
BufferedReader reader = new BufferedReader(new InputStreamReader(is,"UTF-8"));
try {
line = reader.readLine();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} // 读取第一行
while (line != null) { // 如果 line 为空说明读完了
buffer.append(line); // 将读到的内容添加到 buffer 中
buffer.append("\r\n"); // 添加换行符
try {
line = reader.readLine();
} catch (IOException e) {
e.printStackTrace();
} // 读取下一行
}
buffer.append("\r\n"); // 添加换行符
String filecontent = buffer.toString();

String regex = "[\u4e00-\u9fa5]";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(filecontent);
StringBuilder tmSr = new StringBuilder();
int tmp = -1;

while (matcher.find()) {
MatchResult result = matcher.toMatchResult();
int start = result.start();
int end = result.end();
if(tmp == start || tmp == -1) {
// 判断连续
tmSr.append(filecontent.substring(start, end));
}else {
// 不连续
tmSr.append("\r\n");
tmSr.append(filecontent.substring(start, end));
}
tmp = end;
}
tmSr.append("\r\n"); // 添加换行符
return tmSr.toString();

}

/**
*
* @Description 写csv文件,
* @param filePath
* @param fields
* @param dtos void
* @param
* @throws @author 
* @date 2019年11月18日 上午9:45:31
* @see
*/
public static void writeDataToCsvFile1(String filePath, String fileName,String[] datas) throws Exception {

File csvFile = null;
BufferedWriter csvFileOutputStream = null;
FileOutputStream fos = null;
String uuidFilePath = "D:\\SHINE_ROOT\\mspChinese.csv";
try {
FileUtils.createNewFile(filePath);
FileUtils.createNewFile(uuidFilePath);
csvFile = new File(filePath);
try {
// 如果文件不存在,则创建新的文件
if (!csvFile.exists()) {
csvFile.createNewFile();
}
} catch (Exception e) {
e.printStackTrace();
}
// 写入bom头
byte[] uft8bom = { (byte) 0xef, (byte) 0xbb, (byte) 0xbf };
fos = new FileOutputStream(csvFile);
//fos.write(uft8bom);

// UTF-8使正确读取分隔符","
// 如果生产文件乱码,windows下用gbk,linux用UTF-8
//csvFileOutputStream = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"), 1024);

//csvFileOutputStream.newLine();
for (String dto : datas) {
if ("".equals(dto)!=true) {
fos.write((dto+"\r\n").getBytes());
}
}
fos.flush();
fos.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

遍历文件,查找文件下的汉字,并将汉字生成csv文件

标签:spl   linu   分隔符   amp   chinese   byte   content   bytes   ash   

原文地址:https://www.cnblogs.com/lwh-12345/p/13358294.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!