Mapping CSV file with multiline Strings to Java Object with Spring Batch

Parse CSV file that contains multiline strings. Spring Batch allows us to make this task easier.

Sample CSV File

"Column 1","Column 2","Column 3"
1,"This is a multiline 
String","Value 3"

Simple target data class (no need for getter/setters, just public fields is fine).

public class MyData {
    public String column1;
    public String column2;
    public String column3;
}

Unit test!

class MyDataCsvReaderFactoryTest {

    @Test
    void csvMyDataReader() throws Exception{

        ItemStreamReader<MyData> reader = MyDataCsvReaderFactory.csvMyDataReader(
                new ClassPathResource("/maptest_mydata.csv"));
        
        reader.open(new ExecutionContext());
        MyData myData = reader.read();
        assertEquals(myData.column1, "1","column1");
        assertEquals(myData.column2, "This is a multiline\n"+ 
                                     "String","column2");
        assertEquals(myData.column3, "Value 3","column3");
     
    }
}

Now the implementation.

The following enum will allow to decouple data mapping from CSV structure internal indexes.

import java.util.Arrays;

public enum MyDatasColumns {
    COLUMN_1(0, "column1"),
    COLUMN_2(1, "column2"),
    COLUMN_3(2, "column3");

    final public int columnIndex;
    final public String fieldName;


    MyDatasColumns(int columnIndex, String fieldName) {
        this.columnIndex = columnIndex;
        this.fieldName = fieldName;
    }
}

The mapping logic with Spring Batch utilities. Note the use of the enum to abstract column selection and column order.

import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemStreamReader;
import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper;
import org.springframework.batch.item.file.mapping.DefaultLineMapper;
import org.springframework.batch.item.file.separator.DefaultRecordSeparatorPolicy;
import org.springframework.batch.item.file.transform.DelimitedLineTokenizer;
import org.springframework.batch.item.support.SingleItemPeekableItemReader;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;

import java.util.Arrays;
import java.util.Comparator;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static MyDatasColumns.*;

/**
 * Creates reader for converting RAW CSV data to Data DTO.
 */
public class MyDataCsvReaderFactory {

    public static ItemStreamReader<MyData> csvMyDataReader(Resource resource) {
        // Columns sorted by their index
        SortedSet<MyDatasColumns> columns = new TreeSet<>(Comparator
                .comparingInt(MyDatasColumns -> MyDatasColumns.columnIndex));
        // Here we select the columns we want in the target object.
        columns.addAll(Stream.of(
                COLUMN_1,
                COLUMN_1,
                COLUMN_1)
                .collect(Collectors.toSet()));

        FlatFileItemReader<MyData> reader = new FlatFileItemReader<>();

        // skip header line
        reader.setLinesToSkip(1);
        // Important ! --> Allowing multi-line token support <--
        reader.setRecordSeparatorPolicy(new DefaultRecordSeparatorPolicy());
        reader.setResource(resource);
        reader.setLineMapper(new MyDataDefaultLineMapper(columns));
        return reader;
    }

    private static class MyDataDefaultLineMapper extends DefaultLineMapper<MyData> {
        public MyDataDefaultLineMapper(SortedSet<MyDatasColumns> columns) {
            String[] columnNames = columns.stream()
                    .map(MyDatasColumns -> MyDatasColumns.fieldName)
                    .toArray(String[]::new);
            int[] includedFields =
                    Arrays.stream(columns.stream()
                            .map(MyDatasColumns -> MyDatasColumns.columnIndex)
                            .toArray(Integer[]::new))
                            .mapToInt(Integer::intValue).
                            toArray();

            setLineTokenizer(new DelimitedLineTokenizer() {
                {
                    setNames(columnNames);
                    setQuoteCharacter('"');
                    setIncludedFields(includedFields);
                    setDelimiter(",");
                }
            });

            setFieldSetMapper(new BeanWrapperFieldSetMapper<MyData>() {
                {
                    setTargetType(MyData.class);

                }
            });

        }
    }
}