Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions app/controllers/WKRemoteDataStoreController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import com.scalableminds.util.tools.{Fox, Full}
import com.scalableminds.webknossos.datastore.controllers.JobExportProperties
import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLinkInfo}
import com.scalableminds.webknossos.datastore.models.UnfinishedUpload
import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, DataSource, UsableDataSource}
import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, DataSource}
import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataStoreStatus}
import com.scalableminds.webknossos.datastore.services.uploading.{
LegacyLinkedLayerIdentifier,
Expand Down Expand Up @@ -303,14 +303,17 @@ class WKRemoteDataStoreController @Inject()(

}

def updateDataSource(name: String, key: String, datasetId: ObjectId): Action[UsableDataSource] =
Action.async(validateJson[UsableDataSource]) { implicit request =>
def updateDataSource(name: String, key: String, datasetId: ObjectId): Action[DataSource] =
Action.async(validateJson[DataSource]) { implicit request =>
dataStoreService.validateAccess(name, key) { _ =>
for {
dataset <- datasetDAO.findOne(datasetId)(GlobalAccessContext) ~> NOT_FOUND
_ <- Fox.runIf(!dataset.isVirtual)(
datasetDAO.updateDataSource(datasetId, name, request.body.hashCode(), request.body, isUsable = true)(
GlobalAccessContext))
datasetDAO.updateDataSource(datasetId,
name,
request.body.hashCode(),
request.body,
isUsable = request.body.toUsable.isDefined)(GlobalAccessContext))
} yield Ok
}
}
Expand Down
5 changes: 4 additions & 1 deletion app/models/dataset/DatasetUploadToPathsService.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{
StaticSegmentationLayer,
UsableDataSource
}
import com.scalableminds.webknossos.datastore.services.DataSourceValidation
import controllers.{LinkedLayerIdentifier, ReserveAttachmentUploadToPathRequest, ReserveDatasetUploadToPathsRequest}
import models.organization.OrganizationDAO
import models.user.User
Expand All @@ -34,7 +35,8 @@ class DatasetUploadToPathsService @Inject()(datasetService: DatasetService,
dataStoreDAO: DataStoreDAO,
datasetLayerAttachmentsDAO: DatasetLayerAttachmentsDAO,
conf: WkConf)
extends FoxImplicits {
extends FoxImplicits
with DataSourceValidation {

def reserveDatasetUploadToPaths(parameters: ReserveDatasetUploadToPathsRequest,
requestingUser: User,
Expand All @@ -57,6 +59,7 @@ class DatasetUploadToPathsService @Inject()(datasetService: DatasetService,
organization._id,
parameters.pathPrefix)
dataSourceWithLayersToLink <- addLayersToLink(dataSourceWithPaths, parameters.layersToLink)
_ <- assertValidateDataSource(dataSourceWithLayersToLink).toFox
dataStore <- findReferencedDataStore(parameters.layersToLink)
dataset <- datasetService.createDataset(
dataStore,
Expand Down
2 changes: 2 additions & 0 deletions unreleased_changes/8894.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
### Changed
- DataSources are now more strictly validated when reading them from disk or when reserving manual uploads.
Original file line number Diff line number Diff line change
Expand Up @@ -662,21 +662,24 @@ class DataSourceController @Inject()(
}
}

private def refreshDataSource(datasetId: ObjectId)(implicit tc: TokenContext): Fox[UsableDataSource] =
private def refreshDataSource(datasetId: ObjectId)(implicit tc: TokenContext): Fox[DataSource] =
for {
dataSourceFromDB <- dsRemoteWebknossosClient.getDataSource(datasetId) ~> NOT_FOUND
dataSourceId = dataSourceFromDB.id
dataSourceFromDir <- Fox.runIf(dataSourceService.existsOnDisk(dataSourceId)) {
dataSourceService
.dataSourceFromDir(
dataSourceFromDirOpt = if (dataSourceService.existsOnDisk(dataSourceId)) {
Some(
dataSourceService.dataSourceFromDir(
dataSourceService.dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName),
dataSourceId.organizationId)
.toUsable
.toFox
}
_ <- Fox.runOptional(dataSourceFromDir)(ds => dsRemoteWebknossosClient.updateDataSource(ds, datasetId))
dataSourceId.organizationId))
} else None
_ <- Fox.runOptional(dataSourceFromDirOpt)(ds => dsRemoteWebknossosClient.updateDataSource(ds, datasetId))
_ = datasetCache.invalidateCache(datasetId)
dataSource <- datasetCache.getById(datasetId) ~> NOT_FOUND
} yield dataSource
newUsableFromDBBox <- datasetCache.getById(datasetId).shiftBox
dataSourceToReturn <- (newUsableFromDBBox, dataSourceFromDirOpt) match {
case (Full(newUsableFromDB), _) => Fox.successful(newUsableFromDB)
case (_, Some(dataSourceFromDir)) => Fox.successful(dataSourceFromDir)
case _ => Fox.failure("DataSource not found") ~> NOT_FOUND
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you add this change? Is there a possible scenario where the datasource cannot be retrieved from the cache but exists on disk / in dir?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the cache only handles usable datasources. If the existing datasource is unusable due to an error, the cache will just return a Failure. However, in this context, we want to re-read the one from disk in this case, re-validate it, and sent it to webknossos if it is now usable. Only after that, it will also be available through the cache. So this part is actually the bugfix for the reload button in the dashboard (for non-virtual datasets). Does that make sense?

} yield dataSourceToReturn

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import com.scalableminds.webknossos.datastore.controllers.JobExportProperties
import com.scalableminds.webknossos.datastore.helpers.{IntervalScheduler, LayerMagLinkInfo, UPath}
import com.scalableminds.webknossos.datastore.models.UnfinishedUpload
import com.scalableminds.webknossos.datastore.models.annotation.AnnotationSource
import com.scalableminds.webknossos.datastore.models.datasource.{DataSource, DataSourceId, UsableDataSource}
import com.scalableminds.webknossos.datastore.models.datasource.{DataSource, DataSourceId}
import com.scalableminds.webknossos.datastore.rpc.RPC
import com.scalableminds.webknossos.datastore.services.uploading.{
ReserveAdditionalInformation,
Expand Down Expand Up @@ -122,7 +122,7 @@ class DSRemoteWebknossosClient @Inject()(
.putJson(dataSourcePaths)

def fetchPaths(datasetId: ObjectId): Fox[List[LayerMagLinkInfo]] =
rpc(s"$webknossosUri/api/datastores/$dataStoreName/datasources/${datasetId}/paths")
rpc(s"$webknossosUri/api/datastores/$dataStoreName/datasources/$datasetId/paths")
.addQueryString("key" -> dataStoreKey)
.getWithJsonResponse[List[LayerMagLinkInfo]]

Expand All @@ -135,7 +135,7 @@ class DSRemoteWebknossosClient @Inject()(
.postJsonWithJsonResponse[ReserveUploadInformation, ReserveAdditionalInformation](info)
} yield reserveUploadInfo

def updateDataSource(dataSource: UsableDataSource, datasetId: ObjectId)(implicit tc: TokenContext): Fox[_] =
def updateDataSource(dataSource: DataSource, datasetId: ObjectId)(implicit tc: TokenContext): Fox[_] =
rpc(s"$webknossosUri/api/datastores/$dataStoreName/datasources/${datasetId.toString}")
.addQueryString("key" -> dataStoreKey)
.withTokenFromContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class DataSourceService @Inject()(
with LazyLogging
with DataSourceToDiskWriter
with FoxImplicits
with DataSourceValidation
with Formatter {

override protected def tickerEnabled: Boolean = config.Datastore.WatchFileSystem.enabled
Expand Down Expand Up @@ -226,15 +227,16 @@ class DataSourceService @Inject()(
if (new File(propertiesFile.toString).exists()) {
JsonHelper.parseFromFileAs[UsableDataSource](propertiesFile, path) match {
case Full(dataSource) =>
if (dataSource.dataLayers.nonEmpty) {
val validationErrors = validateDataSourceGetErrors(dataSource)
if (validationErrors.isEmpty) {
val dataSourceWithAttachments = dataSource.copy(
dataLayers = resolveAttachmentsAndAddScanned(path, dataSource)
)
dataSourceWithAttachments.copy(id)
} else
UnusableDataSource(id,
None,
"Error: Zero layer Dataset",
s"Error: ${validationErrors.mkString(" ")}",
Some(dataSource.scale),
Some(Json.toJson(dataSource)))
case e =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ package com.scalableminds.webknossos.datastore.services

import com.scalableminds.util.io.PathUtils
import com.scalableminds.util.time.Instant
import com.scalableminds.util.tools.{Box, Failure, Fox, FoxImplicits, Full, JsonHelper, ParamFailure}
import com.scalableminds.util.tools.{Box, Failure, Fox, FoxImplicits, Full, JsonHelper}
import com.scalableminds.webknossos.datastore.helpers.UPath
import com.scalableminds.webknossos.datastore.models.datasource.{ElementClass, UsableDataSource}
import com.scalableminds.webknossos.datastore.models.datasource.UsableDataSource
import play.api.libs.json.Json

import java.io.FileWriter
import java.nio.file.{Files, Path}
import scala.concurrent.ExecutionContext
import scala.io.Source

trait DataSourceToDiskWriter extends PathUtils with FoxImplicits {
trait DataSourceToDiskWriter extends PathUtils with DataSourceValidation with FoxImplicits {

private val propertiesFileName = Path.of(UsableDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON)
private val logFileName = Path.of("datasource-properties-backups.log")
Expand All @@ -25,7 +25,7 @@ trait DataSourceToDiskWriter extends PathUtils with FoxImplicits {
val dataSourcePath = organizationDir.resolve(dataSource.id.directoryName)

for {
_ <- Fox.runIf(validate)(validateDataSource(dataSource, organizationDir).toFox)
_ <- Fox.runIf(validate)(assertValidateDataSource(dataSource).toFox)
propertiesFile = dataSourcePath.resolve(propertiesFileName)
_ <- Fox.runIf(!expectExisting)(ensureDirectoryBox(dataSourcePath).toFox)
_ <- Fox.runIf(!expectExisting)(Fox.fromBool(!Files.exists(propertiesFile))) ?~> "dataSource.alreadyPresent"
Expand Down Expand Up @@ -69,44 +69,4 @@ trait DataSourceToDiskWriter extends PathUtils with FoxImplicits {
}
}

private def validateDataSource(dataSource: UsableDataSource, organizationDir: Path): Box[Unit] = {
def Check(expression: Boolean, msg: String): Option[String] = if (!expression) Some(msg) else None

// Check that when mags are sorted by max dimension, all dimensions are sorted.
// This means each dimension increases monotonically.
val magsSorted = dataSource.dataLayers.map(_.resolutions.sortBy(_.maxDim))
val magsXIsSorted = magsSorted.map(_.map(_.x)) == magsSorted.map(_.map(_.x).sorted)
val magsYIsSorted = magsSorted.map(_.map(_.y)) == magsSorted.map(_.map(_.y).sorted)
val magsZIsSorted = magsSorted.map(_.map(_.z)) == magsSorted.map(_.map(_.z).sorted)

val errors = List(
Check(dataSource.scale.factor.isStrictlyPositive, "DataSource voxel size (scale) is invalid"),
Check(magsXIsSorted && magsYIsSorted && magsZIsSorted, "Mags do not monotonically increase in all dimensions"),
Check(dataSource.dataLayers.nonEmpty, "DataSource must have at least one dataLayer"),
Check(dataSource.dataLayers.forall(!_.boundingBox.isEmpty), "DataSource bounding box must not be empty"),
Check(
dataSource.segmentationLayers.forall { layer =>
ElementClass.segmentationElementClasses.contains(layer.elementClass)
},
s"Invalid element class for segmentation layer"
),
Check(
dataSource.segmentationLayers.forall { layer =>
ElementClass.largestSegmentIdIsInRange(layer.largestSegmentId, layer.elementClass)
},
"Largest segment id exceeds range (must be nonnegative, within element class range, and < 2^53)"
),
Check(
dataSource.dataLayers.map(_.name).distinct.length == dataSource.dataLayers.length,
"Layer names must be unique. At least two layers have the same name."
)
).flatten

if (errors.isEmpty) {
Full(())
} else {
ParamFailure("DataSource is invalid", Json.toJson(errors.map(e => Json.obj("error" -> e))))
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package com.scalableminds.webknossos.datastore.services

import com.scalableminds.util.tools.{Box, Full, ParamFailure}
import com.scalableminds.webknossos.datastore.models.datasource.{ElementClass, UsableDataSource}
import play.api.libs.json.Json

trait DataSourceValidation {

protected def assertValidateDataSource(dataSource: UsableDataSource): Box[Unit] = {
val errors = validateDataSourceGetErrors(dataSource)
if (errors.isEmpty) {
Full(())
} else {
ParamFailure("DataSource is invalid", Json.toJson(errors.map(e => Json.obj("error" -> e))))
}
}

protected def validateDataSourceGetErrors(dataSource: UsableDataSource): Seq[String] = {
def check(expression: Boolean, msg: String): Option[String] = if (!expression) Some(msg) else None

// Check that when mags are sorted by max dimension, all dimensions are sorted.
// This means each dimension increases monotonically.
val magsSorted = dataSource.dataLayers.map(_.resolutions.sortBy(_.maxDim))
val magsXIsSorted = magsSorted.map(_.map(_.x)) == magsSorted.map(_.map(_.x).sorted)
val magsYIsSorted = magsSorted.map(_.map(_.y)) == magsSorted.map(_.map(_.y).sorted)
val magsZIsSorted = magsSorted.map(_.map(_.z)) == magsSorted.map(_.map(_.z).sorted)

val errors = List(
check(dataSource.scale.factor.isStrictlyPositive, "Voxel size (scale) is negative in at least one dimension."),
check(magsXIsSorted && magsYIsSorted && magsZIsSorted, "Mags do not monotonically increase in all dimensions."),
check(magsSorted.forall(magsOfLayer => magsOfLayer.length == magsOfLayer.distinct.length),
"There are duplicate mags in a layer."),
check(dataSource.dataLayers.nonEmpty, "No layers."),
check(dataSource.dataLayers.forall(!_.boundingBox.isEmpty), "Empty bounding box in a layer."),
check(
dataSource.segmentationLayers.forall { layer =>
ElementClass.segmentationElementClasses.contains(layer.elementClass)
},
s"Invalid element class for a segmentation layer."
),
check(
dataSource.segmentationLayers.forall { layer =>
ElementClass.largestSegmentIdIsInRange(layer.largestSegmentId, layer.elementClass)
},
"Largest segment id exceeds range (must be nonnegative, within element class range, and < 2^53)."
),
check(
dataSource.dataLayers.map(_.name).distinct.length == dataSource.dataLayers.length,
"Layer names must be unique. At least two layers have the same name."
),
check(
dataSource.dataLayers.map(_.name).forall(!_.contains("/")),
"Layer names must not contain forward slash."
),
check(
dataSource.dataLayers.map(_.name).forall(!_.startsWith(".")),
"Layer names must not start with dot."
)
).flatten

errors
}

}