Compare commits
4 Commits
master
...
clesaec/TD
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6627d64228 | ||
|
|
1b7fe995df | ||
|
|
5311f83c49 | ||
|
|
a60f943f6a |
3
main/plugins/org.talend.designer.components.libs/libs_src/talendcsv/.gitignore
vendored
Normal file
3
main/plugins/org.talend.designer.components.libs/libs_src/talendcsv/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
.classpath
|
||||
.project
|
||||
target/
|
||||
@@ -1,564 +0,0 @@
|
||||
package com.talend.csv;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class CSVReader {
|
||||
|
||||
private Reader reader;
|
||||
|
||||
private char separator = ',';
|
||||
|
||||
private char quotechar = '"';
|
||||
|
||||
private char escapechar = '"';
|
||||
|
||||
private String lineEnd;
|
||||
|
||||
private boolean skipEmptyRecords = false;
|
||||
|
||||
private boolean trimWhitespace = true;
|
||||
|
||||
private static final int BUFFER_SIZE = 4 * 1024;
|
||||
|
||||
private static final int FETCH_SIZE = 10 * 50;
|
||||
|
||||
private char[] buffer = new char[FETCH_SIZE];
|
||||
private int currentPosition = 0;
|
||||
private int bufferCount = 0;
|
||||
|
||||
private boolean hasMoreData = true;
|
||||
|
||||
private boolean hasNext = false;
|
||||
|
||||
private boolean inColumn = false;
|
||||
|
||||
private boolean escaping = false;
|
||||
|
||||
private char previousChar = '\0';
|
||||
|
||||
private String[] values = new String[10];
|
||||
|
||||
private HeadersReader headersReader = new HeadersReader();
|
||||
|
||||
private int columnCount = 0;
|
||||
|
||||
private boolean inQuote = false;
|
||||
|
||||
private StringBuilder sb = new StringBuilder(16);
|
||||
|
||||
private boolean storeRawRecord = false;
|
||||
private StringBuilder stringBuilder = new StringBuilder(16 * 10);
|
||||
private String rawRecord = "";
|
||||
|
||||
public CSVReader(String filename,char separator,String charset) throws IOException {
|
||||
this(new FileInputStream(filename), separator, charset);
|
||||
}
|
||||
|
||||
public CSVReader(InputStream inputStream,char separator,String charset) throws IOException {
|
||||
this(new UnicodeReader(inputStream, charset), separator);
|
||||
}
|
||||
|
||||
public CSVReader(Reader reader,char separator) {
|
||||
this.reader = new BufferedReader(reader,BUFFER_SIZE);
|
||||
this.separator = separator;
|
||||
}
|
||||
|
||||
public static CSVReader parse(String content) {
|
||||
if (content == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"Parameter content can not be null.");
|
||||
}
|
||||
|
||||
return new CSVReader(new StringReader(content),',');
|
||||
}
|
||||
|
||||
public CSVReader setLineEnd(String lineEnd) {
|
||||
this.lineEnd = lineEnd;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setSeparator(char separator) {
|
||||
this.separator = separator;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setEscapeChar(char escapechar) {
|
||||
this.escapechar = escapechar;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setQuoteChar(char quotechar) {
|
||||
this.quotechar = quotechar;
|
||||
return this;
|
||||
}
|
||||
|
||||
public char getQuoteChar() {
|
||||
return this.quotechar;
|
||||
}
|
||||
|
||||
public CSVReader setTrimWhitespace(boolean trimWhitespace) {
|
||||
this.trimWhitespace = trimWhitespace;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setSkipEmptyRecords(boolean skipEmptyRecords) {
|
||||
this.skipEmptyRecords = skipEmptyRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setStoreRawRecord(boolean storeRawRecord) {
|
||||
this.storeRawRecord = storeRawRecord;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getRawRecord() {
|
||||
return rawRecord;
|
||||
}
|
||||
|
||||
public void endRecord() {
|
||||
hasNext = true;
|
||||
}
|
||||
|
||||
public void endColumn() {
|
||||
inColumn = false;
|
||||
|
||||
String currentValue = sb.toString();
|
||||
|
||||
if(trimWhitespace && !inQuote) {
|
||||
currentValue = trimTail(currentValue);
|
||||
}
|
||||
|
||||
if (columnCount == values.length) {
|
||||
int newLength = values.length * 2;
|
||||
|
||||
String[] holder = new String[newLength];
|
||||
|
||||
System.arraycopy(values, 0, holder, 0, values.length);
|
||||
|
||||
values = holder;
|
||||
}
|
||||
|
||||
values[columnCount] = currentValue;
|
||||
|
||||
columnCount++;
|
||||
|
||||
sb.setLength(0);
|
||||
}
|
||||
|
||||
public boolean readNext() throws IOException {
|
||||
columnCount = 0;
|
||||
hasNext = false;
|
||||
|
||||
rawRecord = "";
|
||||
|
||||
if(!hasMoreData) {
|
||||
return false;
|
||||
}
|
||||
|
||||
while(hasMoreData && !hasNext) {
|
||||
if(arriveEnd()) {
|
||||
fill();
|
||||
continue;
|
||||
}
|
||||
|
||||
char currentChar = buffer[currentPosition];
|
||||
|
||||
inQuote = false;
|
||||
|
||||
if(quotechar!='\0' && currentChar == quotechar) {//quote char as start of column
|
||||
inColumn = true;
|
||||
inQuote = true;
|
||||
currentPosition++;
|
||||
escaping = false;
|
||||
|
||||
boolean previousCharAsQuote = false;
|
||||
boolean deleteTrailNoUseChars = false;
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
|
||||
while(hasMoreData && inColumn) {
|
||||
if(arriveEnd()) {
|
||||
fill();
|
||||
continue;
|
||||
}
|
||||
|
||||
currentChar = buffer[currentPosition];
|
||||
if(deleteTrailNoUseChars){
|
||||
if(currentChar == separator) {
|
||||
endColumn();
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if((lineEnd == null && (currentChar == '\n' || currentChar == '\r'))
|
||||
|| (lineEnd!=null && currentChar == lineEnd.charAt(0))) {
|
||||
endColumn();
|
||||
endRecord();
|
||||
} else {
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
}
|
||||
} else if(currentChar == quotechar) {
|
||||
if(escaping) {//quote char as text
|
||||
sb.append(currentChar);
|
||||
escaping = false;
|
||||
previousCharAsQuote = false;
|
||||
} else {//quote char as escape or end of column
|
||||
if(escapechar!='\0' && currentChar == escapechar) {
|
||||
escaping = true;
|
||||
}
|
||||
previousCharAsQuote = true;
|
||||
}
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if(escapechar!='\0' && escapechar!=quotechar && escaping) {
|
||||
switch (currentChar) {
|
||||
case 'n':
|
||||
sb.append('\n');
|
||||
break;
|
||||
case 'r':
|
||||
sb.append('\r');
|
||||
break;
|
||||
case 't':
|
||||
sb.append('\t');
|
||||
break;
|
||||
case 'b':
|
||||
sb.append('\b');
|
||||
break;
|
||||
case 'f':
|
||||
sb.append('\f');
|
||||
break;
|
||||
case 'e':
|
||||
sb.append('\u001B');
|
||||
break;
|
||||
case 'v':
|
||||
sb.append('\u000B');
|
||||
break;
|
||||
case 'a':
|
||||
sb.append('\u0007');
|
||||
break;
|
||||
default :
|
||||
sb.append(currentChar);
|
||||
break;
|
||||
}
|
||||
|
||||
escaping = false;
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if(escapechar!='\0' && currentChar == escapechar) {
|
||||
escaping = true;
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if(previousCharAsQuote) {//quote char as end of column
|
||||
if(currentChar == separator) {
|
||||
endColumn();
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if((lineEnd == null && (currentChar == '\n' || currentChar == '\r'))
|
||||
|| (lineEnd!=null && currentChar == lineEnd.charAt(0))) {
|
||||
endColumn();
|
||||
endRecord();
|
||||
} else {
|
||||
deleteTrailNoUseChars = true;
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
}
|
||||
|
||||
previousCharAsQuote = false;
|
||||
} else {
|
||||
sb.append(currentChar);
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
}
|
||||
|
||||
previousChar = currentChar;
|
||||
|
||||
currentPosition++;
|
||||
}
|
||||
} else if(currentChar == separator) {
|
||||
previousChar = currentChar;
|
||||
endColumn();
|
||||
currentPosition++;
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if (lineEnd!=null && currentChar == lineEnd.charAt(0)) {
|
||||
if (inColumn || columnCount > 0 || !skipEmptyRecords) {
|
||||
endColumn();
|
||||
endRecord();
|
||||
}
|
||||
|
||||
currentPosition++;
|
||||
previousChar = currentChar;
|
||||
} else if(lineEnd==null && (currentChar == '\r' || currentChar == '\n')) {
|
||||
if (inColumn || columnCount > 0 || (!skipEmptyRecords && (currentChar == '\r' || previousChar!='\r'))) {
|
||||
endColumn();
|
||||
endRecord();
|
||||
}
|
||||
|
||||
currentPosition++;
|
||||
previousChar = currentChar;
|
||||
} else if(trimWhitespace && (currentChar == ' ' || currentChar == '\t')) {
|
||||
inColumn = true;
|
||||
currentPosition++;
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else {
|
||||
inColumn = true;
|
||||
escaping = false;
|
||||
|
||||
while(hasMoreData && inColumn) {
|
||||
if(arriveEnd()) {
|
||||
fill();
|
||||
continue;
|
||||
}
|
||||
|
||||
currentChar = buffer[currentPosition];
|
||||
|
||||
if(quotechar == '\0' && escapechar != '\0' && currentChar == escapechar) {
|
||||
if(escaping) {
|
||||
sb.append(currentChar);
|
||||
escaping = false;
|
||||
} else {
|
||||
escaping = true;
|
||||
}
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if(escapechar!='\0' && escapechar!=quotechar && escaping) {
|
||||
switch (currentChar) {
|
||||
case 'n':
|
||||
sb.append('\n');
|
||||
break;
|
||||
case 'r':
|
||||
sb.append('\r');
|
||||
break;
|
||||
case 't':
|
||||
sb.append('\t');
|
||||
break;
|
||||
case 'b':
|
||||
sb.append('\b');
|
||||
break;
|
||||
case 'f':
|
||||
sb.append('\f');
|
||||
break;
|
||||
case 'e':
|
||||
sb.append('\u001B');
|
||||
break;
|
||||
case 'v':
|
||||
sb.append('\u000B');
|
||||
break;
|
||||
case 'a':
|
||||
sb.append('\u0007');
|
||||
break;
|
||||
default :
|
||||
sb.append(currentChar);
|
||||
break;
|
||||
}
|
||||
|
||||
escaping = false;
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if(currentChar == separator) {
|
||||
endColumn();
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
} else if((lineEnd == null && (currentChar == '\n' || currentChar == '\r'))
|
||||
|| (lineEnd!=null && currentChar == lineEnd.charAt(0))) {
|
||||
endColumn();
|
||||
endRecord();
|
||||
} else {
|
||||
sb.append(currentChar);
|
||||
|
||||
if(storeRawRecord) {
|
||||
stringBuilder.append(currentChar);
|
||||
}
|
||||
}
|
||||
|
||||
previousChar = currentChar;
|
||||
currentPosition++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(inColumn || previousChar == separator) {
|
||||
endColumn();
|
||||
endRecord();
|
||||
}
|
||||
|
||||
if(storeRawRecord) {
|
||||
rawRecord = stringBuilder.toString();
|
||||
stringBuilder.setLength(0);
|
||||
}
|
||||
|
||||
return hasNext;
|
||||
|
||||
}
|
||||
|
||||
public String get(int index) {
|
||||
if (index > -1 && index < columnCount) {
|
||||
return values[index];
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public String[] getValues() {
|
||||
String[] result = new String[columnCount];
|
||||
System.arraycopy(values, 0, result, 0, columnCount);
|
||||
return result;
|
||||
}
|
||||
|
||||
private void fill() throws IOException {
|
||||
int count = reader.read(buffer, 0, buffer.length);
|
||||
currentPosition = 0;
|
||||
bufferCount = count;
|
||||
if(count == -1) {
|
||||
hasMoreData = false;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean arriveEnd() {
|
||||
return currentPosition == bufferCount;
|
||||
}
|
||||
|
||||
private String trimTail(String content) {
|
||||
int len = content.length();
|
||||
int newLen = len;
|
||||
|
||||
while (newLen > 0) {
|
||||
char tail = content.charAt(newLen - 1);
|
||||
if(tail != ' ' && tail != '\t') {
|
||||
break;
|
||||
}
|
||||
newLen--;
|
||||
}
|
||||
|
||||
if(newLen != len) {
|
||||
content = content.substring(0,newLen);
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
headersReader.clear();
|
||||
}
|
||||
|
||||
//Added 20141016 TDQ-9496
|
||||
public int getCurrentRecord(){
|
||||
return this.currentPosition;
|
||||
}
|
||||
|
||||
public char getSeperator(){
|
||||
return separator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the first record of data as the column headers. Added 20141016 TDQ-9496
|
||||
*
|
||||
* @return If the header was successfully read or not.
|
||||
*/
|
||||
public boolean readHeaders() throws IOException {
|
||||
boolean result = readNext();
|
||||
|
||||
headersReader.length = columnCount;
|
||||
|
||||
headersReader.headers = new String[columnCount];
|
||||
|
||||
for (int i = 0; i < headersReader.length; i++) {
|
||||
String columnValue = get(i);
|
||||
headersReader.headers[i] = columnValue;
|
||||
headersReader.indexByHeaderName.put(columnValue, new Integer(i));
|
||||
}
|
||||
|
||||
if (result) {
|
||||
currentPosition--;
|
||||
}
|
||||
|
||||
columnCount = 0;
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Returns the current column value for a given column header name.
|
||||
*/
|
||||
public String get(String headerName) throws IOException {
|
||||
return get(getIndex(headerName));
|
||||
}
|
||||
|
||||
private int getIndex(String headerName) throws IOException {
|
||||
if(headersReader.indexByHeaderName==null){
|
||||
return -1;
|
||||
}
|
||||
Object indexValue = headersReader.indexByHeaderName.get(headerName);
|
||||
|
||||
if (indexValue != null) {
|
||||
return ((Integer) indexValue).intValue();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public String[] getHeaders() throws IOException {
|
||||
if (headersReader.headers == null) {
|
||||
return null;
|
||||
} else {
|
||||
String[] clone = new String[headersReader.length];
|
||||
System.arraycopy(headersReader.headers, 0, clone, 0,
|
||||
headersReader.length);
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
|
||||
private class HeadersReader {
|
||||
private String[] headers;
|
||||
|
||||
private int length;
|
||||
|
||||
private HashMap indexByHeaderName;
|
||||
|
||||
public HeadersReader() {
|
||||
headers = null;
|
||||
length = 0;
|
||||
indexByHeaderName = new HashMap();
|
||||
}
|
||||
|
||||
public void clear(){
|
||||
headers = null;
|
||||
indexByHeaderName = null;
|
||||
}
|
||||
}
|
||||
/**End of added by TDQ-9496 **/
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.talend.libraries</groupId>
|
||||
<artifactId>talendcsv</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>talend-csv</name>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<talend.nexus.url>https://artifacts-oss.talend.com</talend.nexus.url>
|
||||
<java.source.version>1.8</java.source.version>
|
||||
<junit5.version>5.4.2</junit5.version>
|
||||
</properties>
|
||||
|
||||
<distributionManagement>
|
||||
<snapshotRepository>
|
||||
<id>talend_nexus_deployment</id>
|
||||
<url>${talend.nexus.url}/nexus/content/repositories/TalendOpenSourceSnapshot/</url>
|
||||
<snapshots>
|
||||
<enabled>true</enabled>
|
||||
</snapshots>
|
||||
<releases>
|
||||
<enabled>false</enabled>
|
||||
</releases>
|
||||
</snapshotRepository>
|
||||
<repository>
|
||||
<id>talend_nexus_deployment</id>
|
||||
<url>${talend.nexus.url}/nexus/content/repositories/TalendOpenSourceRelease/</url>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
</repository>
|
||||
</distributionManagement>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-api</artifactId>
|
||||
<version>${junit5.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-engine</artifactId>
|
||||
<version>${junit5.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<configuration>
|
||||
<source>${java.source.version}</source>
|
||||
<target>${java.source.version}</target>
|
||||
<showDeprecation>true</showDeprecation>
|
||||
<showWarnings>true</showWarnings>
|
||||
<fork>true</fork>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -0,0 +1,97 @@
|
||||
package com.talend.csv;
|
||||
|
||||
public class CSVConfig {
|
||||
|
||||
private char separator = ',';
|
||||
|
||||
private char quotechar = '"';
|
||||
|
||||
private char escapechar = '"';
|
||||
|
||||
private String lineEnd = null;
|
||||
|
||||
private boolean skipEmptyRecords = false;
|
||||
|
||||
private boolean trimWhitespace = true;
|
||||
|
||||
public char getSeparator() {
|
||||
return separator;
|
||||
}
|
||||
|
||||
public void setSeparator(char separator) {
|
||||
this.separator = separator;
|
||||
}
|
||||
|
||||
public boolean isSeparator(char value) {
|
||||
return value == this.separator;
|
||||
}
|
||||
|
||||
public boolean isQuoteChar(char value) {
|
||||
return this.quotechar == value;
|
||||
}
|
||||
|
||||
public char getQuotechar() {
|
||||
return quotechar;
|
||||
}
|
||||
|
||||
public void setQuotechar(char quotechar) {
|
||||
this.quotechar = quotechar;
|
||||
}
|
||||
|
||||
public char getEscapechar() {
|
||||
return escapechar;
|
||||
}
|
||||
|
||||
public void setEscapechar(char escapechar) {
|
||||
this.escapechar = escapechar;
|
||||
}
|
||||
|
||||
public boolean isEscapechar(char value) {
|
||||
return this.escapechar != '\0'
|
||||
&& value == this.escapechar
|
||||
&& this.escapechar != this.quotechar; // mean no escape char.
|
||||
}
|
||||
|
||||
public boolean isSkipEmptyRecords() {
|
||||
return skipEmptyRecords;
|
||||
}
|
||||
|
||||
public void setSkipEmptyRecords(boolean skipEmptyRecords) {
|
||||
this.skipEmptyRecords = skipEmptyRecords;
|
||||
}
|
||||
|
||||
public boolean isTrimWhitespace() {
|
||||
return trimWhitespace;
|
||||
}
|
||||
|
||||
public void setTrimWhitespace(boolean trimWhitespace) {
|
||||
this.trimWhitespace = trimWhitespace;
|
||||
}
|
||||
|
||||
public boolean isLineEnd(char elem, int pos) {
|
||||
if (this.lineEnd == null) {
|
||||
if (pos == 0) {
|
||||
return elem == '\n' || elem == '\r';
|
||||
}
|
||||
if (pos == 1) {
|
||||
return elem == '\n';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (pos >= this.lineEnd.length()) {
|
||||
return false;
|
||||
}
|
||||
return this.lineEnd.charAt(pos) == elem;
|
||||
}
|
||||
|
||||
public boolean isLineSep(String token) {
|
||||
if (this.lineEnd == null) {
|
||||
return "\n".equals(token) || "\r\n".equals(token);
|
||||
}
|
||||
return this.lineEnd.equals(token);
|
||||
}
|
||||
|
||||
public void setLineEnd(String lineEnd) {
|
||||
this.lineEnd = lineEnd;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,516 @@
|
||||
package com.talend.csv;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
public class CSVReader implements AutoCloseable {
|
||||
|
||||
private final Source source;
|
||||
|
||||
private final CSVConfig config = new CSVConfig();
|
||||
|
||||
private boolean hasNext = false;
|
||||
|
||||
private String[] values = new String[10];
|
||||
|
||||
private HeadersReader headersReader = new HeadersReader();
|
||||
|
||||
private int columnCount = 0;
|
||||
|
||||
private boolean storeRawRecord = false;
|
||||
|
||||
private String rawRecord = "";
|
||||
|
||||
public CSVReader(String filename,char separator,String charset) throws IOException {
|
||||
this(new FileInputStream(filename), separator, charset);
|
||||
}
|
||||
|
||||
public CSVReader(InputStream inputStream,char separator,String charset) throws IOException {
|
||||
this(new UnicodeReader(inputStream, charset), separator);
|
||||
}
|
||||
|
||||
public CSVReader(Reader reader,char separator) {
|
||||
this.source = new Source(reader);
|
||||
this.config.setSeparator(separator);
|
||||
}
|
||||
|
||||
public static CSVReader parse(String content) {
|
||||
if (content == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"Parameter content can not be null.");
|
||||
}
|
||||
|
||||
return new CSVReader(new StringReader(content),',');
|
||||
}
|
||||
|
||||
public CSVReader setLineEnd(String lineEnd) {
|
||||
this.config.setLineEnd(lineEnd);
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setSeparator(char separator) {
|
||||
this.config.setSeparator(separator);
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setEscapeChar(char escapechar) {
|
||||
this.config.setEscapechar(escapechar);
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setQuoteChar(char quotechar) {
|
||||
this.config.setQuotechar(quotechar);
|
||||
return this;
|
||||
}
|
||||
|
||||
public char getQuoteChar() {
|
||||
return this.config.getQuotechar();
|
||||
}
|
||||
|
||||
public CSVReader setTrimWhitespace(boolean trimWhitespace) {
|
||||
this.config.setTrimWhitespace(trimWhitespace);
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setSkipEmptyRecords(boolean skipEmptyRecords) {
|
||||
this.config.setSkipEmptyRecords(skipEmptyRecords);
|
||||
return this;
|
||||
}
|
||||
|
||||
public CSVReader setStoreRawRecord(boolean storeRawRecord) {
|
||||
this.storeRawRecord = storeRawRecord;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getRawRecord() {
|
||||
return rawRecord;
|
||||
}
|
||||
|
||||
|
||||
private State state = new StartState(null);
|
||||
|
||||
private void toRecord(List<String> fields) {
|
||||
this.values = fields.toArray(new String[fields.size()]);
|
||||
this.hasNext = true;
|
||||
}
|
||||
|
||||
private CSVReader.ResultAction result = new CSVReader.ResultAction(this::toRecord);
|
||||
|
||||
public boolean readNext() throws IOException {
|
||||
|
||||
hasNext = false;
|
||||
if (!this.source.hasMoreData()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
while(this.source.hasMoreData() && !hasNext) {
|
||||
char currentChar = this.source.currentChar();
|
||||
this.state = this.state.accept(currentChar, this.config, result);
|
||||
this.source.next();
|
||||
}
|
||||
if (!this.source.hasMoreData()) {
|
||||
this.state = this.state.accept('\0', this.config, result); // end of file.
|
||||
}
|
||||
return hasNext;
|
||||
}
|
||||
|
||||
public String get(int index) {
|
||||
if (index > -1 && index < columnCount) {
|
||||
return values[index];
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public String[] getValues() {
|
||||
String[] result = new String[values.length];
|
||||
System.arraycopy(values, 0, result, 0, values.length);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
this.source.close();
|
||||
headersReader.clear();
|
||||
}
|
||||
|
||||
//Added 20141016 TDQ-9496
|
||||
public int getCurrentRecord(){
|
||||
return this.source.getCurrentPosition();
|
||||
}
|
||||
|
||||
public char getSeperator(){
|
||||
return this.config.getSeparator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the first record of data as the column headers. Added 20141016 TDQ-9496
|
||||
*
|
||||
* @return If the header was successfully read or not.
|
||||
*/
|
||||
public boolean readHeaders() throws IOException {
|
||||
boolean result = readNext();
|
||||
|
||||
columnCount = this.values.length;
|
||||
|
||||
headersReader.length = columnCount;
|
||||
|
||||
headersReader.headers = new String[columnCount];
|
||||
|
||||
for (int i = 0; i < headersReader.length; i++) {
|
||||
String columnValue = get(i);
|
||||
headersReader.headers[i] = columnValue;
|
||||
headersReader.indexByHeaderName.put(columnValue, new Integer(i));
|
||||
}
|
||||
|
||||
if (result) {
|
||||
this.source.decreaseCurrentPosition();
|
||||
}
|
||||
|
||||
columnCount = 0;
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Returns the current column value for a given column header name.
|
||||
*/
|
||||
public String get(String headerName) throws IOException {
|
||||
return get(getIndex(headerName));
|
||||
}
|
||||
|
||||
private int getIndex(String headerName) throws IOException {
|
||||
if(headersReader.indexByHeaderName==null){
|
||||
return -1;
|
||||
}
|
||||
Object indexValue = headersReader.indexByHeaderName.get(headerName);
|
||||
|
||||
if (indexValue != null) {
|
||||
return ((Integer) indexValue).intValue();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public String[] getHeaders() throws IOException {
|
||||
if (headersReader.headers == null) {
|
||||
return null;
|
||||
} else {
|
||||
String[] clone = new String[headersReader.length];
|
||||
System.arraycopy(headersReader.headers, 0, clone, 0,
|
||||
headersReader.length);
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
|
||||
private class HeadersReader {
|
||||
private String[] headers;
|
||||
|
||||
private int length;
|
||||
|
||||
private HashMap indexByHeaderName;
|
||||
|
||||
public HeadersReader() {
|
||||
headers = null;
|
||||
length = 0;
|
||||
indexByHeaderName = new HashMap();
|
||||
}
|
||||
|
||||
public void clear(){
|
||||
headers = null;
|
||||
indexByHeaderName = null;
|
||||
}
|
||||
}
|
||||
/**End of added by TDQ-9496 **/
|
||||
|
||||
|
||||
static class ResultAction {
|
||||
|
||||
private final List<String> fields = new ArrayList<>();
|
||||
|
||||
private final StringBuilder field = new StringBuilder();
|
||||
|
||||
private final Consumer<List<String>> recordConsumer;
|
||||
|
||||
private boolean doTrimTail;
|
||||
|
||||
public ResultAction(Consumer<List<String>> recordConsumer) {
|
||||
this.recordConsumer = recordConsumer;
|
||||
}
|
||||
|
||||
public void addToCurrentField(char c) {
|
||||
this.field.append(c);
|
||||
}
|
||||
|
||||
public void addToCurrentField(String c) {
|
||||
this.field.append(c);
|
||||
}
|
||||
|
||||
public void endField() {
|
||||
if (this.doTrimTail) {
|
||||
this.trimTail();
|
||||
}
|
||||
this.fields.add(this.field.toString());
|
||||
this.field.setLength(0);
|
||||
}
|
||||
|
||||
public void endRecord(boolean skipEmpty) {
|
||||
if (!skipEmpty || this.fields.size() > 0) {
|
||||
this.recordConsumer.accept(this.fields);
|
||||
}
|
||||
this.fields.clear();
|
||||
}
|
||||
|
||||
public void setDoTrimTail(boolean doTrimTail) {
|
||||
this.doTrimTail = doTrimTail;
|
||||
}
|
||||
|
||||
public List<String> getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
private void trimTail() {
|
||||
boolean doTrim = true;
|
||||
while (doTrim) {
|
||||
doTrim = this.field.length() > 0;
|
||||
if (doTrim) {
|
||||
char lastChar = this.field.charAt(this.field.length() - 1);
|
||||
doTrim = lastChar == ' ' || lastChar == '\t';
|
||||
}
|
||||
if (doTrim) {
|
||||
this.field.setLength(this.field.length() - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class State {
|
||||
protected final State preceding;
|
||||
|
||||
public State(State preceding) {
|
||||
this.preceding = preceding;
|
||||
}
|
||||
|
||||
public State backToStart() {
|
||||
// back to start.
|
||||
State prec = this.preceding;
|
||||
while (prec != null
|
||||
&& !(StartState.class.isInstance(prec))
|
||||
&& prec.preceding != null) {
|
||||
prec = prec.preceding;
|
||||
}
|
||||
return prec;
|
||||
}
|
||||
|
||||
public abstract State accept(char newChar, CSVConfig config, ResultAction action);
|
||||
}
|
||||
|
||||
static class EscapeState extends State {
|
||||
public EscapeState(State preceding) {
|
||||
super(preceding);
|
||||
}
|
||||
|
||||
@Override
|
||||
public State accept(char currentChar, CSVConfig config, ResultAction action) {
|
||||
char real = currentChar;
|
||||
|
||||
switch (currentChar) {
|
||||
case 'n':
|
||||
real = '\n';
|
||||
break;
|
||||
case 'r':
|
||||
real = '\r';
|
||||
break;
|
||||
case 't':
|
||||
real = '\t';
|
||||
break;
|
||||
case 'b':
|
||||
real = '\b';
|
||||
break;
|
||||
case 'f':
|
||||
real = '\f';
|
||||
break;
|
||||
case 'e':
|
||||
real = '\u001B';
|
||||
break;
|
||||
case 'v':
|
||||
real = '\u000B';
|
||||
break;
|
||||
case 'a':
|
||||
real = '\u0007';
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
action.addToCurrentField(real);
|
||||
return this.preceding;
|
||||
}
|
||||
}
|
||||
|
||||
static class StartState extends State {
|
||||
|
||||
public StartState(State preceding) {
|
||||
super(preceding);
|
||||
}
|
||||
|
||||
@Override
|
||||
public State accept(char newChar, CSVConfig config, ResultAction action) {
|
||||
if ((newChar == '\t' || newChar == ' ') && config.isTrimWhitespace()) {
|
||||
return this;
|
||||
}
|
||||
if (newChar == '\0') {
|
||||
return this;
|
||||
}
|
||||
if (newChar == config.getQuotechar()) {
|
||||
return new QuotedFieldState(this);
|
||||
}
|
||||
if (config.isSeparator(newChar)) {
|
||||
action.setDoTrimTail(config.isTrimWhitespace());
|
||||
action.endField();
|
||||
return this;
|
||||
}
|
||||
if (config.isLineEnd(newChar, 0)) {
|
||||
EndLineState state = new EndLineState(this);
|
||||
return state.accept(newChar, config, action);
|
||||
}
|
||||
|
||||
UnQuotedFieldState nextStep = new UnQuotedFieldState(this);
|
||||
nextStep.accept(newChar, config, action);
|
||||
return nextStep;
|
||||
}
|
||||
}
|
||||
|
||||
static class QuotedFieldState extends State {
|
||||
|
||||
private final StringBuilder next = new StringBuilder();
|
||||
|
||||
private boolean quoteClosed = false;
|
||||
|
||||
public QuotedFieldState(State preceding) {
|
||||
super(preceding);
|
||||
}
|
||||
|
||||
@Override
|
||||
public State accept(char newChar, CSVConfig config, ResultAction action) {
|
||||
action.setDoTrimTail(false);
|
||||
if (config.isQuoteChar(newChar)) {
|
||||
if (!quoteClosed) {
|
||||
quoteClosed = true;
|
||||
this.next.append(newChar);
|
||||
} else if (config.isEscapechar(newChar)) { // double quote and quote is also escape char.
|
||||
quoteClosed = false;
|
||||
this.next.append(newChar);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
if (!quoteClosed) {
|
||||
if (config.isEscapechar(newChar)) {
|
||||
return new EscapeState(this);
|
||||
}
|
||||
|
||||
action.addToCurrentField(newChar);
|
||||
return this;
|
||||
}
|
||||
if (newChar == '\0') {
|
||||
next.setLength(0);
|
||||
action.setDoTrimTail(false);
|
||||
action.endField();
|
||||
action.endRecord(config.isSkipEmptyRecords());
|
||||
return this.preceding;
|
||||
}
|
||||
if (newChar == ' ' || newChar == '\t') {
|
||||
this.next.append(newChar);
|
||||
return this;
|
||||
}
|
||||
if (config.isSeparator(newChar)) {
|
||||
next.setLength(0);
|
||||
action.endField();
|
||||
return this.preceding;
|
||||
}
|
||||
if (config.isLineEnd(newChar, 0)) {
|
||||
next.setLength(0);
|
||||
quoteClosed = false;
|
||||
action.setDoTrimTail(false);
|
||||
EndLineState state = new EndLineState(this);
|
||||
return state.accept(newChar, config, action);
|
||||
}
|
||||
|
||||
// field continue
|
||||
action.addToCurrentField(next.toString());
|
||||
action.addToCurrentField(newChar);
|
||||
next.setLength(0);
|
||||
quoteClosed = false;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static class EndLineState extends State {
|
||||
private int pos = 0;
|
||||
private final StringBuilder builder = new StringBuilder(4);
|
||||
|
||||
public EndLineState(State preceding) {
|
||||
super(preceding);
|
||||
}
|
||||
|
||||
@Override
|
||||
public State accept(char newChar, CSVConfig config, ResultAction action) {
|
||||
|
||||
// end of line continue
|
||||
this.builder.append(newChar);
|
||||
|
||||
if (config.isLineSep(this.builder.toString())) {
|
||||
// end of line complete
|
||||
action.endField();
|
||||
action.endRecord(config.isSkipEmptyRecords());
|
||||
|
||||
return this.backToStart();
|
||||
}
|
||||
|
||||
if (config.isLineEnd(newChar, pos)) {
|
||||
this.pos++;
|
||||
return this;
|
||||
}
|
||||
// not end of line.
|
||||
action.addToCurrentField(builder.toString());
|
||||
|
||||
this.pos = 0;
|
||||
this.builder.setLength(0);
|
||||
return this.preceding;
|
||||
}
|
||||
}
|
||||
|
||||
static class UnQuotedFieldState extends State {
|
||||
|
||||
public UnQuotedFieldState(State preceding) {
|
||||
super(preceding);
|
||||
}
|
||||
|
||||
@Override
|
||||
public State accept(char newChar, CSVConfig config, ResultAction action) {
|
||||
|
||||
action.setDoTrimTail(config.isTrimWhitespace());
|
||||
|
||||
if (config.isSeparator(newChar)) {
|
||||
action.endField();
|
||||
return this.preceding;
|
||||
}
|
||||
if (newChar == '\0') {
|
||||
action.endField();
|
||||
action.endRecord(config.isSkipEmptyRecords());
|
||||
return this.preceding;
|
||||
}
|
||||
if (config.isLineEnd(newChar, 0)) {
|
||||
EndLineState state = new EndLineState(this);
|
||||
return state.accept(newChar, config, action);
|
||||
}
|
||||
action.addToCurrentField(newChar);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
package com.talend.csv;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
public class Source implements AutoCloseable {
|
||||
|
||||
private static final int FETCH_SIZE = 10 * 50;
|
||||
|
||||
private static final int BUFFER_SIZE = 4 * 1024;
|
||||
|
||||
private char[] buffer = new char[FETCH_SIZE];
|
||||
|
||||
private int currentPosition = 0;
|
||||
|
||||
private int bufferCount = 0;
|
||||
|
||||
private boolean hasMoreData = true;
|
||||
|
||||
private final Reader reader;
|
||||
|
||||
private char previousChar = '\0';
|
||||
|
||||
public Source(Reader reader) {
|
||||
if (!(reader instanceof BufferedReader)) {
|
||||
this.reader = new BufferedReader(reader, BUFFER_SIZE);
|
||||
}
|
||||
else {
|
||||
this.reader = reader;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
this.reader.close();
|
||||
}
|
||||
|
||||
public char currentChar() throws IOException {
|
||||
if (this.currentPosition >= this.buffer.length || bufferCount == 0) {
|
||||
this.fill();
|
||||
if (!this.hasMoreData) {
|
||||
throw new IOException("Has no more data.");
|
||||
}
|
||||
}
|
||||
return this.buffer[ this.currentPosition ];
|
||||
}
|
||||
|
||||
public char previousChar() throws IOException {
|
||||
return this.previousChar;
|
||||
}
|
||||
|
||||
public boolean next() throws IOException {
|
||||
this.previousChar = this.currentChar();
|
||||
this.currentPosition++;
|
||||
if (this.currentPosition >= this.buffer.length || bufferCount == 0) {
|
||||
this.fill();
|
||||
}
|
||||
if (this.currentPosition >= this.bufferCount) {
|
||||
this.hasMoreData = false;
|
||||
}
|
||||
return this.hasMoreData;
|
||||
}
|
||||
|
||||
public boolean hasMoreData() {
|
||||
return hasMoreData;
|
||||
}
|
||||
|
||||
public int getCurrentPosition() {
|
||||
return currentPosition;
|
||||
}
|
||||
|
||||
public void decreaseCurrentPosition() {
|
||||
this.currentPosition--;
|
||||
}
|
||||
|
||||
private void fill() throws IOException {
|
||||
int count = reader.read(buffer, 0, buffer.length);
|
||||
currentPosition = 0;
|
||||
bufferCount = count;
|
||||
if(count == -1) {
|
||||
hasMoreData = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
package com.talend.csv;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
class CSVReaderStateTest {
|
||||
|
||||
private static final CSVConfig config = new CSVConfig();
|
||||
|
||||
@BeforeAll
|
||||
public static void init() {
|
||||
config.setSeparator(',');
|
||||
config.setEscapechar('\\');
|
||||
config.setQuotechar('"');
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void quotedField() {
|
||||
checkField(new CSVReader.QuotedFieldState(null), "Hello\",", "Hello");
|
||||
checkField(new CSVReader.QuotedFieldState(null), "He\\nllo\",", "He\nllo");
|
||||
checkField(new CSVReader.QuotedFieldState(null), "Hello\"toto\",", "Hello\"toto");
|
||||
checkField(new CSVReader.QuotedFieldState(null), "Hello\" toto\" ,", "Hello\" toto");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void outsideField() {
|
||||
CSVReader.State state = new CSVReader.StartState(null);
|
||||
String source = "Hello,\"World\" , next \n Nex,\"World \t\" , ne\txt \n";
|
||||
|
||||
final List<List<String>> records = new ArrayList<>();
|
||||
CSVReader.ResultAction action = new CSVReader.ResultAction((List<String> f) -> {
|
||||
records.add(new ArrayList<>(f));
|
||||
});
|
||||
state = this.accept(state, source, action);
|
||||
|
||||
Assertions.assertEquals(2, records.size());
|
||||
|
||||
List<String> rec1 = records.get(0);
|
||||
Assertions.assertEquals(3, rec1.size());
|
||||
Assertions.assertEquals("Hello", rec1.get(0));
|
||||
Assertions.assertEquals("World", rec1.get(1));
|
||||
Assertions.assertEquals("next", rec1.get(2));
|
||||
|
||||
List<String> rec2 = records.get(1);
|
||||
Assertions.assertEquals(3, rec2.size());
|
||||
Assertions.assertEquals("Nex", rec2.get(0));
|
||||
Assertions.assertEquals("World \t", rec2.get(1));
|
||||
Assertions.assertEquals("ne\txt", rec2.get(2));
|
||||
|
||||
}
|
||||
|
||||
private void checkField(CSVReader.State state, String from, String to) {
|
||||
CSVReader.ResultAction action = new CSVReader.ResultAction(null);
|
||||
state = this.accept(state, from, action);
|
||||
|
||||
List<String> fields = action.getFields();
|
||||
Assertions.assertEquals(1, fields.size());
|
||||
Assertions.assertEquals(to, fields.get(0));
|
||||
Assertions.assertNull(state);
|
||||
}
|
||||
|
||||
private CSVReader.State accept(CSVReader.State state, String value, CSVReader.ResultAction action) {
|
||||
for (int i = 0; i < value.length(); i++) {
|
||||
CSVReader.State state2 = state.accept(value.charAt(i), config, action);
|
||||
state = state2;
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
package com.talend.csv;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Assumptions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class CSVReaderTest {
|
||||
|
||||
@Test
|
||||
void readNext() throws IOException {
|
||||
|
||||
String lines = "\"event_id\",\"event_name\",\"event_value\",\"source\"\n" + // titles.
|
||||
"\"001\",\"CN\",\"This is some \\\\ttext\",\"event_200\"\n" + // normal line
|
||||
"\"002\",\"CN\",\"This is some text, with sep\",\"event_250\"\n" + // test field sep inside value
|
||||
"\"003\",\"CN\",\"This is some \\\"text\\\" inside value\",\"event_300\"\n" + // escape quote inside value
|
||||
"\"004\",\"CN\",\"This is some other \"text\" inside value\",\"event_400\"\n" + // unescape quote inside value
|
||||
"\"005\" , \"CN\" , \"This is some text\" , event_500\n" + // spaced field
|
||||
"006, CN , \"Text\" ,\" xx \" \n" + // unquoted fields.
|
||||
"007,,\"\",\" xx \" "; // empty record.
|
||||
|
||||
final CSVReader reader = new CSVReader(new StringReader(lines), ',');
|
||||
reader.setEscapeChar('\\').setStoreRawRecord(true).setTrimWhitespace(false);
|
||||
boolean headers1 = reader.readHeaders();
|
||||
final String[] headers = reader.getHeaders();
|
||||
Assertions.assertAll(
|
||||
() -> Assertions.assertEquals(4, headers.length),
|
||||
() -> Assertions.assertEquals("event_id", headers[0]),
|
||||
() -> Assertions.assertEquals("event_name", headers[1]),
|
||||
() -> Assertions.assertEquals("event_value", headers[2]),
|
||||
() -> Assertions.assertEquals("source", headers[3])
|
||||
);
|
||||
Assertions.assertTrue(reader.readNext());
|
||||
|
||||
Assertions.assertAll(
|
||||
() -> this.checkNextValues("normal", reader, "001", "CN", "This is some \\ttext", "event_200"),
|
||||
() -> this.checkNextValues("field sep in value", reader, "002", "CN", "This is some text, with sep", "event_250"),
|
||||
() -> this.checkNextValues("escape quote inside value", reader, "003", "CN", "This is some \"text\" inside value", "event_300"),
|
||||
() -> this.checkNextValues("unescape quote inside value", reader, "004", "CN", "This is some other \"text\" inside value", "event_400"),
|
||||
() -> this.checkNextValues("spaced field", reader, "005", " \"CN\" "," \"This is some text\" ", " event_500"),
|
||||
() -> this.checkNextValues("unquoted fields", reader, "006", " CN "," \"Text\" ", " xx "),
|
||||
() -> this.checkNextValues("empty record", reader, "007", "", "", " xx ")
|
||||
);
|
||||
Assertions.assertFalse(reader.readNext());
|
||||
|
||||
final CSVReader reader2 = new CSVReader(new StringReader(lines), ',');
|
||||
reader2.setEscapeChar('\\').setStoreRawRecord(true).setTrimWhitespace(true);
|
||||
reader2.readHeaders();
|
||||
reader2.getHeaders();
|
||||
Assertions.assertTrue(reader2.readNext());
|
||||
|
||||
Assertions.assertAll(
|
||||
() -> this.checkNextValues("normal 2", reader2, "001", "CN", "This is some \\ttext", "event_200"),
|
||||
() -> this.checkNextValues("field sep in value 2", reader2, "002", "CN", "This is some text, with sep", "event_250"),
|
||||
() -> this.checkNextValues("escape quote inside value 2", reader2, "003", "CN", "This is some \"text\" inside value", "event_300"),
|
||||
() -> this.checkNextValues("unescape quote inside value", reader2, "004", "CN", "This is some other \"text\" inside value", "event_400"),
|
||||
() -> this.checkNextValues("spaced field 2", reader2, "005", "CN","This is some text", "event_500"),
|
||||
() -> this.checkNextValues("unquoted fields 2", reader2, "006", "CN","Text", " xx "),
|
||||
() -> this.checkNextValues("empty record 2", reader2, "007", "", "", " xx ")
|
||||
);
|
||||
Assertions.assertFalse(reader2.readNext());
|
||||
}
|
||||
|
||||
@Test
|
||||
void readNextEmptyRecord() throws IOException {
|
||||
String line = "0\\t07,, \"\" ,\" x\\tx \" ";
|
||||
final CSVReader reader = new CSVReader(new StringReader(line), ',');
|
||||
reader.setEscapeChar('\\');
|
||||
reader.setTrimWhitespace(true);
|
||||
reader.setSkipEmptyRecords(true);
|
||||
Assertions.assertAll(
|
||||
() -> this.checkNextValues("empty record", reader, "0\\t07", "", "", " x\tx ")
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
void lineSepTest() throws IOException {
|
||||
|
||||
String lines = "line@1@#line#2";
|
||||
|
||||
final CSVReader reader = new CSVReader(new StringReader(lines), ',');
|
||||
reader.setLineEnd("@#");
|
||||
|
||||
Assertions.assertAll(
|
||||
() -> checkNextValues("line 1 for line sep", reader, "line@1"),
|
||||
() -> checkNextValues("line 2 for line sep", reader, "line#2")
|
||||
);
|
||||
|
||||
String lines2 = "Hello@#World@#With@butoneline@#With#butoneline\n";
|
||||
final CSVReader reader1 = new CSVReader(new StringReader(lines2), ',');
|
||||
reader1.setLineEnd("@#");
|
||||
Assertions.assertAll(
|
||||
() -> checkNextValues("line 1 for line sep", reader1, "Hello"),
|
||||
() -> checkNextValues("line 2 for line sep", reader1, "World"),
|
||||
() -> checkNextValues("line 3 for line sep", reader1, "With@butoneline"),
|
||||
() -> checkNextValues("line 4 for line sep", reader1, "With#butoneline\n")
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testEscapeIsQuote() throws IOException {
|
||||
String lines = "\"L\"\"in\"te 1\"\nLine\"t\"\"2";
|
||||
final CSVReader reader = new CSVReader(new StringReader(lines), ',');
|
||||
|
||||
Assertions.assertAll(
|
||||
() -> checkNextValues("line 1", reader, "L\"in\"te 1"),
|
||||
() -> checkNextValues("line 2", reader, "Line\"t\"\"2")
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testQuoted() throws IOException {
|
||||
String input = "\"Hello\",\"ss\"\n\"World\",\"ddzs\"\n\"OneColumn\",\"ddzs\"\n";
|
||||
File fic = new File("/home/clesaec/project/jobs/csvConv/oneCol.txt");
|
||||
|
||||
|
||||
final CSVReader reader = new CSVReader(new StringReader(input), ',');
|
||||
//final CSVReader reader = new CSVReader(new FileInputStream(fic), ',', "ISO-8859-15");
|
||||
reader.setQuoteChar('"');
|
||||
reader.setTrimWhitespace(false);
|
||||
reader.setEscapeChar('"');
|
||||
reader.setSkipEmptyRecords(false);
|
||||
|
||||
Assertions.assertAll(
|
||||
() -> checkNextValues("line 1", reader, "Hello", "ss"),
|
||||
() -> checkNextValues("line 2", reader, "World", "ddzs"),
|
||||
() -> checkNextValues("line 3", reader, "OneColumn", "ddzs")
|
||||
);
|
||||
Assertions.assertFalse(reader.readNext());
|
||||
}
|
||||
|
||||
void checkNextValues(String comment, CSVReader reader, String... excepted) throws IOException {
|
||||
Assertions.assertTrue(reader.readNext());
|
||||
String[] values = reader.getValues();
|
||||
Assertions.assertEquals(excepted.length, values.length, comment + " : wrong length");
|
||||
for (int i = 0; i < excepted.length; i++) {
|
||||
Assertions.assertEquals(excepted[i], values[i], comment + " : field " + i + " in error");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1127,7 +1127,7 @@
|
||||
|
||||
csvReader<%=cid %>.setTrimWhitespace(false);
|
||||
if ( (rowSeparator_<%=cid %>[0] != '\n') && (rowSeparator_<%=cid %>[0] != '\r') )
|
||||
csvReader<%=cid %>.setLineEnd(""+rowSeparator_<%=cid %>[0]);
|
||||
csvReader<%=cid %>.setLineEnd(new String(rowSeparator_<%=cid %>));
|
||||
<%
|
||||
if(("").equals(textEnclosure1) || textEnclosure1.startsWith("\"")){//normal situation
|
||||
%>
|
||||
@@ -1228,7 +1228,7 @@
|
||||
}
|
||||
csvReader<%=cid %>.setTrimWhitespace(false);
|
||||
if ( (rowSeparator_<%=cid %>[0] != '\n') && (rowSeparator_<%=cid %>[0] != '\r') )
|
||||
csvReader<%=cid %>.setLineEnd(""+rowSeparator_<%=cid %>[0]);
|
||||
csvReader<%=cid %>.setLineEnd(new String(rowSeparator_<%=cid %>));
|
||||
<%
|
||||
if(("").equals(textEnclosure1) || textEnclosure1.startsWith("\"")){//normal situation
|
||||
%>
|
||||
|
||||
Reference in New Issue
Block a user