Skip to content

Commit 919a0b0

Browse files
authored
Merge pull request #1420 from Kotlin/implicit-char-toString-v2
Implicit char to string v2
2 parents 82bf7ba + d93c956 commit 919a0b0

File tree

21 files changed

+377
-62
lines changed

21 files changed

+377
-62
lines changed

core/api/core.api

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3549,8 +3549,12 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParseKt {
35493549
public static synthetic fun parse$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
35503550
public static final fun parseAnyFrameNullable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
35513551
public static synthetic fun parseAnyFrameNullable$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
3552+
public static final fun parseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
3553+
public static synthetic fun parseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
35523554
public static final fun tryParse (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
35533555
public static synthetic fun tryParse$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
3556+
public static final fun tryParseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
3557+
public static synthetic fun tryParseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
35543558
}
35553559

35563560
public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import java.time.format.DateTimeFormatter
2020
import java.util.Locale
2121
import kotlin.reflect.KProperty
2222
import kotlin.reflect.KType
23+
import kotlin.reflect.typeOf
2324
import kotlin.uuid.ExperimentalUuidApi
2425
import kotlin.uuid.Uuid
2526

@@ -312,6 +313,28 @@ public class ParserOptions(
312313
* @return a new column with parsed values */
313314
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)
314315

316+
/**
317+
* Tries to parse a column of chars into a column of a different type.
318+
* Each parser in [Parsers] is run in order until a valid parser is found,
319+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
320+
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
321+
* returns strings.
322+
*
323+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
324+
*
325+
* @param options options for parsing, like providing a locale or a custom date-time formatter
326+
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
327+
* @return a new column with parsed values
328+
*/
329+
@JvmName("tryParseChar")
330+
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> {
331+
// skip the Char parser, as we're trying to parse away from Char
332+
val providedSkipTypes = options?.skipTypes ?: DataFrame.parser.skipTypes
333+
val parserOptions = (options ?: ParserOptions()).copy(skipTypes = providedSkipTypes + typeOf<Char>())
334+
335+
return map { it?.toString() }.tryParse(parserOptions)
336+
}
337+
315338
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
316339
parse(options) {
317340
colsAtAnyDepth().filter { !it.isColumnGroup() }
@@ -335,6 +358,23 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
335358
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
336359
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
337360

361+
/**
362+
* Tries to parse a column of chars as strings into a column of a different type.
363+
* Each parser in [Parsers] is run in order until a valid parser is found,
364+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
365+
* fails to parse any value, the next parser is tried.
366+
*
367+
* If all fail, the column is returned as `String`, this can never fail.
368+
*
369+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
370+
*
371+
* @param options options for parsing, like providing a locale or a custom date-time formatter
372+
* @return a new column with parsed values
373+
*/
374+
@JvmName("parseChar")
375+
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
376+
tryParse(options) // no need to throw an exception, as Char can always be parsed as String
377+
338378
@JvmName("parseAnyFrameNullable")
339379
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
340380
map { it?.parse(options) }

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,13 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n
369369

370370
Char::class -> when (toClass) {
371371
Int::class -> convert<Char> { it.code }
372-
else -> null
372+
373+
else -> // convert char to string and then to target type
374+
getConverter(typeOf<String>(), to, options)?.let { stringConverter ->
375+
convert<Char> {
376+
stringConverter(it.toString())
377+
}
378+
}
373379
}
374380

375381
Int::class -> when (toClass) {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -716,29 +716,24 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column
716716
when {
717717
// when a frame column is requested to be parsed,
718718
// parse each value/frame column at any depth inside each DataFrame in the frame column
719-
col.isFrameColumn() -> {
719+
col.isFrameColumn() ->
720720
col.map {
721721
it.parseImpl(options) {
722722
colsAtAnyDepth().filter { !it.isColumnGroup() }
723723
}
724724
}
725-
}
726725

727726
// when a column group is requested to be parsed,
728727
// parse each column in the group
729-
col.isColumnGroup() -> {
728+
col.isColumnGroup() ->
730729
col.parseImpl(options) { all() }
731730
.asColumnGroup(col.name())
732731
.asDataColumn()
733-
}
734732

735733
// Base case, parse the column if it's a `String?` column
736-
col.isSubtypeOf<String?>() -> {
734+
col.isSubtypeOf<String?>() ->
737735
col.cast<String?>().tryParseImpl(options)
738-
}
739736

740-
else -> {
741-
col
742-
}
737+
else -> col
743738
}
744739
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.api
33
import io.kotest.assertions.throwables.shouldNotThrow
44
import io.kotest.assertions.throwables.shouldThrow
55
import io.kotest.matchers.shouldBe
6+
import io.kotest.matchers.shouldNotBe
67
import kotlinx.datetime.Clock
78
import kotlinx.datetime.Instant
89
import kotlinx.datetime.LocalTime
@@ -69,6 +70,20 @@ class ConvertTests {
6970
@Test
7071
fun `convert string to enum`() {
7172
columnOf("A", "B").convertTo<EnumClass>() shouldBe columnOf(EnumClass.A, EnumClass.B)
73+
74+
dataFrameOf(columnOf("A", "B") named "colA")
75+
.convert("colA").to<EnumClass>()
76+
.getColumn("colA") shouldBe columnOf(EnumClass.A, EnumClass.B).named("colA")
77+
}
78+
79+
@Test
80+
fun `convert char to enum`() {
81+
// Char -> String -> Enum
82+
columnOf('A', 'B').convertTo<EnumClass>() shouldBe columnOf(EnumClass.A, EnumClass.B)
83+
84+
dataFrameOf(columnOf('A', 'B') named "colA")
85+
.convert("colA").to<EnumClass>()
86+
.getColumn("colA") shouldBe columnOf(EnumClass.A, EnumClass.B).named("colA")
7287
}
7388

7489
@JvmInline
@@ -199,6 +214,15 @@ class ConvertTests {
199214
val col = columnOf(65, 66)
200215
col.convertTo<Char>() shouldBe columnOf('A', 'B')
201216
col.convertTo<Char>().convertTo<Int>() shouldBe col
217+
218+
// this means
219+
columnOf('1', '2').convertToInt() shouldNotBe columnOf(1, 2)
220+
columnOf('1', '2').convertToInt() shouldBe columnOf(49, 50)
221+
222+
// but
223+
columnOf('1', '2').convertToString().convertToInt() shouldBe columnOf(1, 2)
224+
// or
225+
columnOf('1', '2').parse() shouldBe columnOf(1, 2)
202226
}
203227

204228
@Test

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,22 @@ import kotlin.time.Instant as StdlibInstant
3838
import kotlinx.datetime.Instant as DeprecatedInstant
3939

4040
class ParseTests {
41+
42+
@Test
43+
fun `parse to chars`() {
44+
val char = columnOf('a', 'b', 'c')
45+
char.parse() shouldBe char
46+
char.tryParse() shouldBe char
47+
char.convertToString().parse() shouldBe char
48+
}
49+
50+
@Test
51+
fun `parse chars to int`() {
52+
val char = columnOf('1', '2', '3')
53+
char.parse() shouldBe columnOf(1, 2, 3)
54+
char.tryParse() shouldBe columnOf(1, 2, 3)
55+
}
56+
4157
@Test
4258
fun parseDate() {
4359
val currentLocale = Locale.getDefault()

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ class ParserTests {
4646
DataFrame.parser.resetToDefault()
4747
}
4848

49+
@Test
50+
fun `parse to Char`() {
51+
val col by columnOf("a", "b")
52+
col.parse().type() shouldBe typeOf<Char>()
53+
}
54+
4955
@Test(expected = IllegalStateException::class)
5056
fun `parse should throw`() {
5157
val col by columnOf("a", "bc")

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ internal interface ConvertDocs {
132132
* * [LocalDateTime], [LocalDate], [LocalTime],
133133
* `Instant` ([kotlinx.datetime][DeprecatedInstant], [kotlin.time][StdlibInstant], and [java.time]),
134134
* * [URL], [IMG], [IFRAME].
135+
*
136+
* __NOTE__: Conversion between [Int] and [Char] is done by UTF-16 [Char.code].
137+
* To convert [Char]->[Int] the way it is written, use [parse()][parse] instead, or,
138+
* in either case, use [String] as intermediary type.
135139
*/
136140
interface SupportedTypes
137141

@@ -362,7 +366,7 @@ public class Convert<T, out C>(
362366
* preserving their original names and positions within the [DataFrame].
363367
*
364368
* The target type is provided as a reified type argument.
365-
* For the full list of supported types, see [ConvertDocs.SupportedTypes].
369+
* For the full list of supported types, see [SupportedTypes][ConvertDocs.SupportedTypes].
366370
*
367371
* For more information: {@include [DocumentationUrls.Convert]}
368372
*
@@ -390,7 +394,7 @@ public class Convert<T, out C>(
390394
* preserving their original names and positions within the [DataFrame].
391395
*
392396
* The target type is provided as a [KType].
393-
* For the full list of supported types, see [ConvertDocs.SupportedTypes].
397+
* For the full list of supported types, see [SupportedTypes][ConvertDocs.SupportedTypes].
394398
*
395399
* For more information: {@include [DocumentationUrls.Convert]}
396400
*
@@ -554,7 +558,7 @@ public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
554558
*
555559
* The target type is provided as a reified type argument.
556560
*
557-
* For the full list of supported types, see [ConvertDocs.SupportedTypes].
561+
* For the full list of supported types, see [SupportedTypes][ConvertDocs.SupportedTypes].
558562
*
559563
* @param [C] The target type to convert values to.
560564
* @return A new [DataColumn] with the values converted to type [C].
@@ -564,7 +568,7 @@ public inline fun <reified C> AnyCol.convertTo(): DataColumn<C> = convertTo(type
564568
/**
565569
* Converts values in this column to the specified [type].
566570
*
567-
* For the full list of supported types, see [ConvertDocs.SupportedTypes].
571+
* For the full list of supported types, see [SupportedTypes][ConvertDocs.SupportedTypes].
568572
*
569573
* @param type The target type, provided as a [KType], to convert values to.
570574
* @return A new [DataColumn] with the values converted to [type].

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public class ConverterScope(public val fromType: KType, public val toSchema: Col
5252
* df.convertTo<SomeSchema> {
5353
* // defines how to convert Int? -> String
5454
* convert<Int?>().with { it?.toString() ?: "No input given" }
55-
* // defines how to convert String -> SomeType
55+
* // defines how to convert String/Char -> SomeType
5656
* parser { SomeType(it) }
5757
* // fill missing column `sum` with expression `a+b`
5858
* fill { sum }.with { a + b }
@@ -102,6 +102,10 @@ public fun <T, C> ConvertToFill<T, C>.with(expr: RowExpression<T, C>) {
102102

103103
/**
104104
* Defines how to convert `String` values into given type [C].
105+
*
106+
* This method is a shortcut for `convert<String>().with { }`.
107+
*
108+
* If no converter is defined for `Char` values, this converter will be used for them as well.
105109
*/
106110
public inline fun <reified C> ConvertSchemaDsl<*>.parser(noinline parser: (String) -> C): Unit =
107111
convert<String>().with(parser)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
1212
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
1313
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
1414
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
15-
import org.jetbrains.kotlinx.dataframe.typeClass
1615
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
1716
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
1817
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY
@@ -302,6 +301,23 @@ public class ParserOptions(
302301
/** @include [tryParseImpl] */
303302
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)
304303

304+
/**
305+
* Tries to parse a column of chars into a column of a different type.
306+
* Each parser in [Parsers] is run in order until a valid parser is found,
307+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
308+
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
309+
* returns strings.
310+
*
311+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
312+
*
313+
* @param options options for parsing, like providing a locale or a custom date-time formatter
314+
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
315+
* @return a new column with parsed values
316+
*/
317+
@JvmName("tryParseChar")
318+
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> =
319+
map { it?.toString() }.tryParseImpl(options)
320+
305321
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
306322
parse(options) {
307323
colsAtAnyDepth().filter { !it.isColumnGroup() }
@@ -323,7 +339,27 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
323339
* @return a new column with parsed values
324340
*/
325341
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
326-
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
342+
tryParse(options).also { if (it.isSubtypeOf<String?>()) error("Can't guess column type") }
343+
344+
/**
345+
* Tries to parse a column of chars as strings into a column of a different type.
346+
* Each parser in [Parsers] is run in order until a valid parser is found,
347+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
348+
* fails to parse any value, the next parser is tried.
349+
*
350+
* If all fail [IllegalStateException] is thrown. If you don't want this exception to be thrown,
351+
* use [tryParse] instead.
352+
*
353+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
354+
*
355+
* @param options options for parsing, like providing a locale or a custom date-time formatter
356+
* @return a new column with parsed values
357+
*/
358+
@JvmName("parseChar")
359+
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
360+
map { it?.toString() }
361+
.tryParse(options)
362+
.also { if (it.isSubtypeOf<Char?>() || it.isSubtypeOf<String?>()) error("Can't guess column type") }
327363

328364
@JvmName("parseAnyFrameNullable")
329365
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =

0 commit comments

Comments
 (0)