Bad Data
A sketch on how one might find malformed data. Note that col3 is inferred as String
.
import io.github.quafadas.table.*
val datarator : Iterator[(
origIdx: Int,
col1 : Int,
col2 : Int,
col3: String
)
] =
CSV.resource("simple_bad.csv", TypeInferrer.FromAllRows)
.zipWithIndex.map{case (r, idx) => (origIdx = idx ) ++ r}
val data = LazyList.from(datarator)
by:
- adding a row index to the output.
- Parsing a new column and checking for the
None
values.
data.consoleFormatNt(fansi = false)
// res0: String = """| |origIdx|col1|col2|col3|
// +-+-------+----+----+----+
// |0| 0| 1| 2| 7|
// |1| 1| 3| 4| 8c|
// |2| 2| 5| 6| 9|
// |3| 3| 5| 6| 9.0|
// +-+-------+----+----+----+"""
data
.addColumn["col3_parsed", Option[Int]]{_.col3.toIntOption}
.filter(_.col3_parsed.isEmpty)
.consoleFormatNt(fansi = false)
// res1: String = """| |origIdx|col1|col2|col3|col3_parsed|
// +-+-------+----+----+----+-----------+
// |0| 1| 3| 4| 8c| None|
// |1| 3| 5| 6| 9.0| None|
// +-+-------+----+----+----+-----------+"""