Skip to content

Reuse the legacy package import process in the API pipeline #288

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 29 additions & 28 deletions ci/src/Registry/API.purs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import Registry.Prelude
import Control.Monad.Except as Except
import Data.Argonaut as Json
import Data.Array as Array
import Data.Array.NonEmpty as NEA
import Data.Generic.Rep as Generic
import Data.Map as Map
import Data.String as String
Expand All @@ -31,7 +30,7 @@ import Registry.PackageName as PackageName
import Registry.PackageUpload as Upload
import Registry.RegistryM (Env, RegistryM, closeIssue, comment, commitToTrunk, readPackagesMetadata, runRegistryM, throwWithComment, updatePackagesMetadata, uploadPackage)
import Registry.Schema (Manifest(..), Metadata, Operation(..), Repo(..), addVersionToMetadata, mkNewMetadata, isVersionInMetadata)
import Registry.Scripts.LegacyImport.Bowerfile as Bowerfile
import Registry.Scripts.LegacyImport.Error (ImportError(..), RawPackageName(..), RawVersion(..))
import Registry.Scripts.LegacyImport.Manifest as Manifest
import Sunde as Process
import Text.Parsing.StringParser as StringParser
Expand Down Expand Up @@ -159,34 +158,36 @@ addOrUpdate { ref, fromBower, packageName } metadata = do
let manifestPath = absoluteFolderPath <> "/purs.json"
log $ "Package extracted in " <> absoluteFolderPath

-- If we're importing from Bower then we need to convert the Bowerfile
-- to a Registry Manifest
-- If we're "importing from Bower" then we need to gather information about the
-- legacy package and put all of that together into a Registry Manifest
when fromBower do
liftAff (try (readJsonFile (absoluteFolderPath <> "/bower.json"))) >>= case _ of
address <- case metadata.location of
Git _ -> throwWithComment "Legacy packages can only come from GitHub. Aborting."
GitHub { owner, repo } -> pure { owner, repo }

semVer <- case SemVer.parseSemVer ref of
Nothing -> throwWithComment $ "Not a valid SemVer version: " <> ref
Just result -> pure result

let
printErrors =
Json.stringifyWithIndent 2 <<< Json.encodeJson

liftError = map (lmap ManifestImportError)

runManifest =
Except.runExceptT <<< Except.mapExceptT (liftAff <<< map (lmap printErrors))

gatherManifest :: ExceptT ImportError Aff Manifest
gatherManifest = do
manifestFields <- Manifest.constructManifestFields (RawPackageName $ show packageName) (RawVersion ref) address
Except.mapExceptT liftError $ Manifest.toManifest packageName metadata.location semVer manifestFields

runManifest gatherManifest >>= case _ of
Left err ->
throwWithComment $ "Error while reading Bowerfile: " <> Aff.message err
Right (Left err) ->
throwWithComment $ "Could not decode Bowerfile: " <> Json.printJsonDecodeError err
Right (Right bowerfile) -> do
let
printErrors =
Json.stringifyWithIndent 2 <<< Json.encodeJson <<< NEA.toArray

manifestFields =
Bowerfile.toManifestFields bowerfile

runManifest =
Except.runExceptT <<< Except.mapExceptT (liftAff <<< map (lmap printErrors))

semVer <- case SemVer.parseSemVer ref of
Nothing -> throwWithComment $ "Not a valid SemVer version: " <> ref
Just result -> pure result

runManifest (Manifest.toManifest packageName metadata.location semVer manifestFields) >>= case _ of
Left err ->
throwWithComment $ "Unable to convert Bowerfile to a manifest: " <> err
Right manifest ->
liftAff $ writeJsonFile manifestPath manifest
throwWithComment $ "Unable to convert Bowerfile to a manifest: " <> err
Right manifest ->
liftAff $ writeJsonFile manifestPath manifest

-- Try to read the manifest, typechecking it
manifest@(Manifest manifestRecord) <- liftAff (try $ FS.readTextFile UTF8 manifestPath) >>= case _ of
Expand Down
175 changes: 4 additions & 171 deletions ci/src/Registry/Scripts/LegacyImport.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,20 @@ module Registry.Scripts.LegacyImport where

import Registry.Prelude

import Affjax as Http
import Affjax.ResponseFormat as ResponseFormat
import Affjax.StatusCode (StatusCode(..))
import Control.Monad.Except as Except
import Control.Parallel (parallel, sequential)
import Data.Argonaut as Json
import Data.Array as Array
import Data.Array.NonEmpty as NEA
import Data.Interpolate (i)
import Data.Map as Map
import Data.String as String
import Data.String.NonEmpty as NES
import Data.Time.Duration (Hours(..))
import Dotenv as Dotenv
import Effect.Aff as Aff
import Effect.Ref as Ref
import Foreign.Dhall as Dhall
import Foreign.GitHub as GitHub
import Foreign.Jsonic as Jsonic
import Foreign.Licensee as Licensee
import Foreign.Object as Object
import Foreign.SemVer (SemVer)
import Foreign.SemVer as SemVer
import Foreign.Tmp as Tmp
import Node.FS.Aff as FS
import Registry.API as API
import Registry.Index (RegistryIndex)
import Registry.PackageGraph as Graph
Expand All @@ -35,13 +24,9 @@ import Registry.PackageName as PackageName
import Registry.PackageUpload as Upload
import Registry.RegistryM (Env, runRegistryM)
import Registry.Schema (Repo(..), Manifest(..), Operation(..), Metadata)
import Registry.Scripts.LegacyImport.Bowerfile as Bowerfile
import Registry.Scripts.LegacyImport.Error (APIResource(..), FileResource(..), ImportError(..), ManifestError(..), PackageFailures(..), RawPackageName(..), RawVersion(..), RemoteResource(..), RequestError(..), fileResourcePath)
import Registry.Scripts.LegacyImport.Error (APIResource(..), ImportError(..), ManifestError(..), PackageFailures(..), RawPackageName(..), RawVersion(..), RemoteResource(..), RequestError(..))
import Registry.Scripts.LegacyImport.Manifest as Manifest
import Registry.Scripts.LegacyImport.ManifestFields (ManifestFields)
import Registry.Scripts.LegacyImport.Process as Process
import Registry.Scripts.LegacyImport.SpagoJson (SpagoJson)
import Registry.Scripts.LegacyImport.SpagoJson as SpagoJson
import Registry.Scripts.LegacyImport.Stats as Stats
import Safe.Coerce (coerce)
import Text.Parsing.StringParser as StringParser
Expand Down Expand Up @@ -170,7 +155,7 @@ downloadLegacyRegistry = do

packageSemVer <- case SemVer.parseSemVer $ un RawVersion tag of
Nothing ->
throwError $ ManifestError $ NEA.singleton $ BadVersion $ un RawVersion tag
throwError $ ManifestImportError $ NEA.singleton $ BadVersion $ un RawVersion tag
Just semVer ->
pure semVer

Expand All @@ -190,11 +175,11 @@ downloadLegacyRegistry = do
}
{ semVer :: SemVer, original :: RawVersion }
Manifest <- forPackageRegistry \{ name, original: originalName, address } tag _ -> do
manifestFields <- constructManifestFields originalName tag.original address
manifestFields <- Manifest.constructManifestFields originalName tag.original address

let
repo = GitHub { owner: address.owner, repo: address.repo, subdir: Nothing }
liftError = map (lmap ManifestError)
liftError = map (lmap ManifestImportError)

Except.mapExceptT liftError $ Manifest.toManifest name repo tag.semVer manifestFields

Expand Down Expand Up @@ -254,155 +239,3 @@ readRegistryFile source = do
Right packages -> do
let toPackagesArray = Object.toArrayWithKey \k -> Tuple (RawPackageName $ stripPureScriptPrefix k)
pure $ Map.fromFoldable $ toPackagesArray packages

-- | Attempt to construct the basic fields necessary for a manifest file by reading
-- | the package version's bower.json, spago.dhall, package.json, and LICENSE
-- | files, if present.
constructManifestFields
:: RawPackageName
-> RawVersion
-> GitHub.Address
-> ExceptT ImportError Aff ManifestFields
constructManifestFields package version address = do
let cacheKey = i "manifest-fields__" (un RawPackageName package) "__" (un RawVersion version)
Process.withCache Process.jsonSerializer cacheKey Nothing do
-- We can construct a manifest from a package's bowerfile, package.json file,
-- spago.dhall file, and/or LICENSE files. A package doesn't need to have all
-- of these files; several of these files duplicate information. We try to
-- fetch all files but won't throw an exception (yet) if they're missing.
log $ "Constructing manifest fields for " <> un RawPackageName package <> " " <> un RawVersion version
let mkRequest file = parallel $ Except.runExceptT $ fileRequest file Process.stringSerializer
files <- liftAff $ sequential ado
licenseFile <- mkRequest LicenseFile
bowerJson <- mkRequest BowerJson
packageJson <- mkRequest PackageJson
in { licenseFile, bowerJson, packageJson }

-- TODO: Improve this heuristic by checking the Bower _and_ Spago files.
--
-- We can pull dependencies from the bower.json or spago.dhall files. If both
-- files are present, but their dependencies differ, then we should use the
-- file with newer dependencies; presumably, it's the more up-to-date file.
--
-- Since Bower users typically use ranges, but package sets use precise
-- versions, we could check to see whether one uses later major versions
-- than the other does; checking minor or patch versions will be inaccurate.
--
-- If the files differ but it isn't clear which file is newer, then we should
-- prefer the Bower file since it's the legacy format used for package p
-- publishing.
--
-- For now, that's exactly what we do: use the Bower file if it is present,
-- and otherwise fall back to the Spago file.
bowerManifest <- Except.runExceptT do
result <- Except.except files.bowerJson
case Jsonic.parseJson result >>= Json.decodeJson of
Left err -> do
let printed = Json.printJsonDecodeError err
log $ "Could not decode returned bower.json. " <> printed
log result
throwError $ ResourceError { resource: FileResource BowerJson, error: DecodeError printed }
Right bowerfile ->
pure $ Bowerfile.toManifestFields bowerfile

spagoJson <- liftAff $ Except.runExceptT requestSpagoJson
let spagoManifest = map SpagoJson.toManifestFields spagoJson

{ dependencies, devDependencies } <- case bowerManifest, spagoManifest of
Left _, Left _ -> do
-- TODO: We may want to report a `NonEmptyArray ImportError` so as to
-- report on multiple errors, such as the multiple missing files in this
-- situation.
throwError NoDependencyFiles
Left _, Right { dependencies, devDependencies } ->
pure { dependencies, devDependencies }
Right { dependencies, devDependencies }, _ ->
pure { dependencies, devDependencies }

-- We can detect the license for the project using a combination of `licensee`
-- and reading the license directly out of the Spago and Bower files (the
-- CLI tool will not read from either file).
licenseeOutput <- detectLicense files

let
spagoLicenses = maybe [] NEA.toArray $ _.license =<< hush spagoManifest
bowerLicenses = maybe [] NEA.toArray $ _.license =<< hush bowerManifest
licenseeLicenses = Array.catMaybes $ map NES.fromString licenseeOutput
license = NEA.fromArray $ Array.nub $ Array.concat [ licenseeLicenses, spagoLicenses, bowerLicenses ]
description = join (_.description <$> hush bowerManifest)

when (license == Nothing) do
log $ "No license available for " <> un RawPackageName package <> " " <> un RawVersion version

pure { license, dependencies, devDependencies, description }
where
detectLicense { licenseFile, packageJson } = do
licenseeResult <- liftAff $ Licensee.detectFiles $ Array.catMaybes $ map hush
-- Detection only works on these files, and won't work on Spago files,
-- Bower files, or the JSON produced by the dhall-to-json result of
-- converting the Spago file.
[ packageJson <#> { name: "package.json", contents: _ }
, licenseFile <#> { name: "LICENSE", contents: _ }
]

detectedLicenses <- case licenseeResult of
Left err -> do
log $ "Licensee decoding error, ignoring: " <> err
pure []
Right licenses ->
pure licenses

pure detectedLicenses

-- Attempt to construct a Spago JSON file by fetching the spago.dhall and
-- packages.dhall files and converting them to JSON with dhall-to-json.
requestSpagoJson :: ExceptT ImportError Aff SpagoJson
requestSpagoJson = do
files <- sequential ado
spagoDhall <- parallel $ fileRequest SpagoDhall Process.stringSerializer
packagesDhall <- parallel $ fileRequest PackagesDhall Process.stringSerializer
in { spagoDhall, packagesDhall }

tmp <- liftEffect Tmp.mkTmpDir
liftAff $ FS.writeTextFile UTF8 (tmp <> "/packages.dhall") files.packagesDhall

spagoJson <- do
let
mkError = ResourceError <<< { resource: FileResource SpagoDhall, error: _ } <<< DecodeError
runDhallJson = Dhall.dhallToJson { dhall: files.spagoDhall, cwd: Just tmp }

Except.mapExceptT (map (lmap mkError))
$ Except.ExceptT
$ map (_ >>= (Json.decodeJson >>> lmap Json.printJsonDecodeError)) runDhallJson

pure spagoJson

-- Request a file from the remote repository associated with the package
-- version. Files will be cached using the provided serializer and
-- will be read from the cache up to the cache expiry time given in `Hours`.
fileRequest :: FileResource -> Process.Serialize String String -> ExceptT ImportError Aff String
fileRequest resource serialize = do
let
name = un RawPackageName package
tag = un RawVersion version
filePath = fileResourcePath resource
url = i "https://github.com/raw/" address.owner "/" address.repo "/" tag "/" filePath
fileCacheName = String.replace (String.Pattern ".") (String.Replacement "-") filePath
cacheKey = i fileCacheName "__" name "__" tag
mkError = ResourceError <<< { resource: FileResource resource, error: _ }

Process.withCache serialize cacheKey Nothing do
liftAff (Http.get ResponseFormat.string url) >>= case _ of
Left error -> do
let printed = Http.printError error
log $ i "Unable to retrieve " filePath " because the request failed: " printed
throwError $ mkError BadRequest
Right { status: StatusCode status, body }
| status == 404 -> do
log $ i "Unable to retrieve " filePath " because none exists (404 error)."
throwError $ mkError $ BadStatus status
| status /= 200 -> do
log $ i "Unable to retrieve " filePath " because of a bad status code: " body
throwError $ mkError $ BadStatus status
| otherwise ->
pure body
4 changes: 2 additions & 2 deletions ci/src/Registry/Scripts/LegacyImport/Error.purs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ data ImportError
| NoDependencyFiles
| NonRegistryDependencies (NonEmptyArray RawPackageName)
| NoManifests
| ManifestError (NonEmptyArray ManifestError)
| ManifestImportError (NonEmptyArray ManifestError)

derive instance Eq ImportError
derive instance Generic ImportError _
Expand All @@ -92,7 +92,7 @@ printImportErrorKey = case _ of
NoDependencyFiles -> ImportErrorKey "noDependencyFiles"
NonRegistryDependencies _ -> ImportErrorKey "nonRegistryDependencies"
NoManifests -> ImportErrorKey "noManifests"
ManifestError _ -> manifestErrorKey
ManifestImportError _ -> manifestErrorKey

-- | An error fetching a resource necessary to produce a Manifest for a
-- | given package.
Expand Down
Loading