ExcelContext.java
package pro.verron.officestamper.excel;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.SpreadsheetMLPackage;
import org.docx4j.openpackaging.parts.SpreadsheetML.WorkbookPart;
import org.docx4j.openpackaging.parts.SpreadsheetML.WorksheetPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.jspecify.annotations.NonNull;
import org.xlsx4j.org.apache.poi.ss.usermodel.DataFormatter;
import org.xlsx4j.sml.*;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
import static java.util.Collections.emptyList;
/// ExcelContext exposes a lazy, query-oriented view over an XLSX workbook.
/// - Sheets are accessible by index via the `sheets` list, each as a [SheetContext].
/// - Sheets are also accessible by name at the root level, resolving to their default table (first row = headers).
/// - Cells on a sheet can be queried using A1 notation via `sheet.get("A1")`.
/// - A sheet's default table is available via the special key `"rows"` on a [SheetContext].
/// - Named tables, when present, are exposed at the root level by their table name, each as a list of records
/// mapping header names to row values.
///
/// The underlying XLSX data is accessed lazily: no pre-loading of all sheets occurs in the constructor; data is read
/// only when a property is actually queried.
public final class ExcelContext
extends AbstractMap<String, Object> {
static final DataFormatter FORMATTER = new DataFormatter();
private final SpreadsheetMLPackage spreadsheet;
private final Map<String, Object> rootCache = new TreeMap<>();
private List<SheetContext> sheetsCache;
/// Create a new context from a SpreadsheetMLPackage.
///
/// @param spreadsheet SpreadsheetMLPackage to use
/// @return a new [ExcelContext] instance
public static ExcelContext from(SpreadsheetMLPackage spreadsheet) {
return new ExcelContext(spreadsheet);
}
private ExcelContext(SpreadsheetMLPackage spreadsheet) {
this.spreadsheet = spreadsheet;
}
/// Create a new context from a path to an `.xlsx` file.
///
/// @param path Path to the Excel workbook file
/// @return a new [ExcelContext] instance
/// @throws ExcelException if the file cannot be read
public static ExcelContext from(Path path) {
try (InputStream is = Files.newInputStream(path)) {
return from(is);
} catch (IOException e) {
throw new ExcelException(e);
}
}
/// Create a new context from an input stream containing an `.xlsx` file.
///
/// The input stream is consumed during construction.
///
/// @param inputStream Input stream of an Excel workbook
/// @return a new [ExcelContext] instance
/// @throws ExcelException if the stream cannot be parsed
public static ExcelContext from(InputStream inputStream) {
try {
var pkg = SpreadsheetMLPackage.load(inputStream);
return new ExcelContext(pkg);
} catch (Docx4JException e) {
throw new ExcelException(e);
}
}
/// Merges all sheets in the workbook into a single list of records using an inner join.
///
/// @param joinKey the column name to join on. If null or empty, the first column of the first sheet is used.
/// @return a list of joined records
public List<Map<String, String>> joinAllSheets(String joinKey) {
var sheets = enumerateSheets();
if (sheets.isEmpty()) return Collections.emptyList();
List<Map<String, String>> result = null;
for (var sc : sheets) {
var table = defaultTable(sc.worksheetPart());
if (result == null) {
result = new ArrayList<>(table);
if (joinKey == null || joinKey.isBlank()) {
if (!result.isEmpty() && !result.getFirst()
.isEmpty()) {
joinKey = result.getFirst()
.keySet()
.iterator()
.next();
}
}
}
else {
result = innerJoin(result, table, joinKey);
}
}
return result != null ? result : Collections.emptyList();
}
static List<Map<String, String>> innerJoin(
List<Map<String, String>> left,
List<Map<String, String>> right,
String joinKey
) {
if (joinKey == null || joinKey.isBlank()) return Collections.emptyList();
var rightById = new HashMap<String, List<Map<String, String>>>();
for (var row : right) {
var val = row.get(joinKey);
if (val != null) {
rightById.computeIfAbsent(val, _ -> new ArrayList<>())
.add(row);
}
}
var joined = new ArrayList<Map<String, String>>();
for (var leftRow : left) {
var val = leftRow.get(joinKey);
if (val != null && rightById.containsKey(val)) {
for (var rightRow : rightById.get(val)) {
var newRow = new LinkedHashMap<>(leftRow);
newRow.putAll(rightRow);
joined.add(newRow);
}
}
}
return joined;
}
@Override
public Object get(Object key) {
if (!(key instanceof String name)) return null;
if ("sheets".equals(name)) return enumerateSheets();
// Sheet by name -> default table
var maybeSheet = enumerateSheets().stream()
.filter(s -> s.name()
.equals(name))
.findFirst();
if (maybeSheet.isPresent()) {
return rootCache.computeIfAbsent(name,
_ -> defaultTable(maybeSheet.get()
.worksheetPart()));
}
// Named table at root: resolve lazily on demand
return rootCache.computeIfAbsent(name, this::resolveNamedTableByNameOrNull);
}
@Override
public @NonNull Set<Entry<String, Object>> entrySet() {
// Compose a dynamic view consisting of: sheets (list), sheet names -> default tables, and discovered tables
var map = new LinkedHashMap<String, Object>();
map.put("sheets", enumerateSheets());
for (var sc : enumerateSheets()) {
map.computeIfAbsent(sc.name(), _ -> defaultTable(sc.worksheetPart()));
}
// include anything populated in cache (e.g., named tables resolved so far)
map.putAll(rootCache);
return map.entrySet();
}
private List<SheetContext> enumerateSheets() {
if (sheetsCache != null) return sheetsCache;
var wb = workbook();
var sheets = wb.getSheets()
.getSheet();
List<SheetContext> list = new ArrayList<>(sheets.size());
for (Sheet sheet : sheets) {
var ws = resolveWorksheetPart(sheet);
list.add(new SheetContext(sheet.getName(), ws));
}
sheetsCache = Collections.unmodifiableList(list);
return sheetsCache;
}
private List<Map<String, String>> defaultTable(WorksheetPart part) {
var ws = worksheetOf(part);
var rows = ws.getSheetData()
.getRow();
if (rows.isEmpty()) return emptyList();
var headers = extractHeaders(rows.getFirst());
return toRecords(headers, rows.subList(1, rows.size()));
}
private Workbook workbook() {
try {
return workbookPart().getContents();
} catch (Docx4JException e) {
throw new ExcelException(e);
}
}
private WorksheetPart resolveWorksheetPart(Sheet sheet) {
var rels = relationshipsPart();
return (WorksheetPart) rels.getPart(sheet.getId());
}
private static Worksheet worksheetOf(WorksheetPart part) {
try {
return part.getContents();
} catch (Docx4JException e) {
throw new ExcelException(e);
}
}
private static List<String> extractHeaders(Row headerRow) {
return headerRow.getC()
.stream()
.map(FORMATTER::formatCellValue)
.toList();
}
private static List<Map<String, String>> toRecords(List<String> headers, List<Row> rows) {
return rows.stream()
.map(row -> {
LinkedHashMap<String, String> map = new LinkedHashMap<>();
int bound = headers.size();
for (int i = 0; i < bound; i++) {
var cell = findCell(row, i);
map.put(headers.get(i),
cell.map(SheetContext::formatCellValueAt)
.orElse(""));
}
return map;
})
.collect(Collectors.toCollection(() -> new ArrayList<>(rows.size())));
}
private WorkbookPart workbookPart() {
return spreadsheet.getWorkbookPart();
}
private RelationshipsPart relationshipsPart() {
return workbookPart().getRelationshipsPart();
}
private static @NonNull Optional<Cell> findCell(Row row, int i) {
List<Cell> rowCells = row.getC();
Optional<Cell> cell;
if (i >= rowCells.size()) cell = Optional.empty();
else cell = Optional.of(rowCells.get(i));
return cell;
}
private Object resolveNamedTableByNameOrNull(String tableName) {
// Attempt to find a table with this name on any worksheet
for (SheetContext sc : enumerateSheets()) {
var part = sc.worksheetPart();
var ws = worksheetOf(part);
var tableParts = ws.getTableParts();
if (tableParts == null) continue;
var rels = part.getRelationshipsPart();
var tps = tableParts.getTablePart();
for (var tp : tps) {
var p = rels.getPart(tp.getId());
// Content type in xlsx4j is org.xlsx4j.sml.Table
try {
var table = (org.docx4j.openpackaging.parts.SpreadsheetML.TablePart) p;
var ct = table.getContents(); // org.xlsx4j.sml.Table
if (Objects.equals(ct.getName(), tableName)) {
var ref = ct.getRef(); // e.g., A1:C10
return SheetContext.extractRangeAsRecords(worksheetOf(part), ref);
}
} catch (ClassCastException | Docx4JException e) {
// Ignore and continue; not a standard table part
}
}
}
return null;
}
}