public class CSVParser
extends java.lang.Object
implements com.hp.hpl.jena.util.iterator.ClosableIterator<com.hp.hpl.jena.sparql.engine.binding.Binding>
Reader
, and delivers
results as an iterator of Binding
s. Also provides
access to the variable names (which may come from row 1 or
could be auto-generated).
Adds a ROWNUM
column with the number of the
row.
Modifier and Type | Field and Description |
---|---|
private static java.lang.String |
alphabet |
private com.hp.hpl.jena.sparql.engine.binding.Binding |
binding |
private au.com.bytecode.opencsv.CSVReader |
csv |
private char |
delimiter |
private java.lang.Character |
escape |
private char |
quote |
private java.io.Reader |
reader |
private int |
rownum |
private org.apache.commons.validator.routines.UrlValidator |
urlValidator |
private java.util.List<com.hp.hpl.jena.sparql.core.Var> |
vars |
private boolean |
varsFromHeader |
Constructor and Description |
---|
CSVParser(java.io.Reader reader,
boolean varsFromHeader,
java.lang.Character delimiter,
java.lang.Character quote,
java.lang.Character escape)
Class constructor.
|
Modifier and Type | Method and Description |
---|---|
void |
close() |
private java.lang.String |
formatColumnName(java.lang.String s)
Format column names with the following rules.
|
static java.lang.String |
getColumnName(int i)
Get a default column name for a column index.
|
private com.hp.hpl.jena.sparql.core.Var |
getVar(int column)
Get variable in the CSV at a given column.
|
java.util.List<com.hp.hpl.jena.sparql.core.Var> |
getVars()
Get all variables from the CSV file.
|
boolean |
hasNext() |
private void |
init()
Init method setting up the CSV source and reading the variables from it.
|
private boolean |
isEmpty(java.lang.String[] row)
Check if the CSV row is empty.
|
private boolean |
isUnboundValue(java.lang.String value)
Checks whether a string taken from a CSV cell is considered an unbound SPARQL value.
|
private boolean |
isURL(java.lang.String s)
Check if a string is a valid URL.
|
com.hp.hpl.jena.sparql.engine.binding.Binding |
next() |
void |
remove() |
private java.lang.String |
sanitizeString(java.lang.String s)
Remove/replace weird characters known to cause problems in RDF toolkits.
|
private java.lang.String |
sanitizeUrl(java.lang.String s)
Remove '<' and '>' symbols from a URL if present.
|
private com.hp.hpl.jena.sparql.engine.binding.Binding |
toBinding(java.lang.String[] row)
Get a Binding object representing a CSV row.
|
private com.hp.hpl.jena.sparql.core.Var |
toVar(java.lang.String s)
Get a Var object from a column name.
|
private static final java.lang.String alphabet
private final java.io.Reader reader
private final boolean varsFromHeader
private final char delimiter
private final char quote
private final java.lang.Character escape
private final java.util.List<com.hp.hpl.jena.sparql.core.Var> vars
private int rownum
private org.apache.commons.validator.routines.UrlValidator urlValidator
private com.hp.hpl.jena.sparql.engine.binding.Binding binding
private au.com.bytecode.opencsv.CSVReader csv
public CSVParser(java.io.Reader reader, boolean varsFromHeader, java.lang.Character delimiter, java.lang.Character quote, java.lang.Character escape) throws java.io.IOException
reader
- Reader over the contents of a CSV filevarsFromHeader
- If true, use values of first row as column namesdelimiter
- The delimiter character to use for separating entries (e.g., ',' or ';' or '\t'), or null
for defaultquote
- The quote character used to quote values (typically double or single quote), or null
for defaultescape
- The escape character for quotes and delimiters, or null
for nonejava.io.IOException
private void init() throws java.io.IOException
java.io.IOException
private com.hp.hpl.jena.sparql.engine.binding.Binding toBinding(java.lang.String[] row)
row
- CSV row.public java.util.List<com.hp.hpl.jena.sparql.core.Var> getVars()
private com.hp.hpl.jena.sparql.core.Var getVar(int column)
column
- Column index.private com.hp.hpl.jena.sparql.core.Var toVar(java.lang.String s)
s
- Input column name.public static java.lang.String getColumnName(int i)
i
- Column index.private boolean isEmpty(java.lang.String[] row)
row
- CSV row.private boolean isUnboundValue(java.lang.String value)
value
- CSV cell value.private java.lang.String sanitizeString(java.lang.String s)
s
- Input String.private java.lang.String sanitizeUrl(java.lang.String s)
s
- Input URL.private java.lang.String formatColumnName(java.lang.String s)
s
- Input column name.private boolean isURL(java.lang.String s)
s
- Input String.public boolean hasNext()
hasNext
in interface java.util.Iterator<com.hp.hpl.jena.sparql.engine.binding.Binding>
public com.hp.hpl.jena.sparql.engine.binding.Binding next()
next
in interface java.util.Iterator<com.hp.hpl.jena.sparql.engine.binding.Binding>
public void remove()
remove
in interface java.util.Iterator<com.hp.hpl.jena.sparql.engine.binding.Binding>
public void close()
close
in interface com.hp.hpl.jena.util.iterator.ClosableIterator<com.hp.hpl.jena.sparql.engine.binding.Binding>