From 0348fde1c7b87f104f9678ac8f1c0f1c53ab1e0d Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 22 Mar 2023 17:00:27 -0400 Subject: [PATCH 01/28] DEV-667: WIP: stage & index item Almost working - just need to get it to index the output from solr (probably an issue with load_into_solr.sh) --- stage-item/Gemfile | 19 +++++ stage-item/Gemfile.lock | 54 +++++++++++++ stage-item/stage_item.rb | 160 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 233 insertions(+) create mode 100644 stage-item/Gemfile create mode 100644 stage-item/Gemfile.lock create mode 100644 stage-item/stage_item.rb diff --git a/stage-item/Gemfile b/stage-item/Gemfile new file mode 100644 index 000000000..d3f44df92 --- /dev/null +++ b/stage-item/Gemfile @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } + +# gem "rails" + +gem "marc", "~> 1.2" +gem "faraday", "~> 2.7" +gem "faraday-follow_redirects" +gem "ht-pairtree", git: "../../ht-pairtree" +gem "mysql2" +gem "sequel" + +group :development do + gem "pry" + gem "pry-byebug" +end diff --git a/stage-item/Gemfile.lock b/stage-item/Gemfile.lock new file mode 100644 index 000000000..cbb6d3a90 --- /dev/null +++ b/stage-item/Gemfile.lock @@ -0,0 +1,54 @@ +GIT + remote: ../../ht-pairtree + revision: 56c5d0517588065b1dbd8d7b208dfcb9a0d61793 + specs: + ht-pairtree (0.1.0) + rpairtree + +GEM + remote: https://rubygems.org/ + specs: + byebug (11.1.3) + coderay (1.1.3) + faraday (2.7.4) + faraday-net_http (>= 2.0, < 3.1) + ruby2_keywords (>= 0.0.4) + faraday-follow_redirects (0.3.0) + faraday (>= 1, < 3) + faraday-net_http (3.0.2) + marc (1.2.0) + rexml + scrub_rb (>= 1.0.1, < 2) + unf + method_source (1.0.0) + mysql2 (0.5.5) + pry (0.14.2) + coderay (~> 1.1) + method_source (~> 1.0) + pry-byebug (3.10.1) + byebug (~> 11.0) + pry (>= 0.13, < 0.15) + rexml (3.2.5) + rpairtree (0.2.0) + ruby2_keywords (0.0.5) + scrub_rb (1.0.1) + sequel (5.66.0) + unf (0.1.4) + unf_ext + unf_ext (0.0.8.2) + +PLATFORMS + x86_64-linux + +DEPENDENCIES + faraday (~> 2.7) + faraday-follow_redirects + ht-pairtree! + marc (~> 1.2) + mysql2 + pry + pry-byebug + sequel + +BUNDLED WITH + 2.2.22 diff --git a/stage-item/stage_item.rb b/stage-item/stage_item.rb new file mode 100644 index 000000000..678b7f201 --- /dev/null +++ b/stage-item/stage_item.rb @@ -0,0 +1,160 @@ +#!ruby + +require "faraday" +require "faraday/follow_redirects" +require "fileutils" +require "ht/pairtree" +require "marc" +require "pry" +require "pry-byebug" +require "sequel" + +# The parent of babel-local-dev where all the HathiTrust repos are checked out +HTDEV_ROOT = ENV["HTDEV_ROOT"] || File.realpath(File.join(__dir__,"..","..")) + +METADATA_ROOT = ENV["METADATA_ROOT"] || File.join(HTDEV_ROOT,"imgsrv-sample-data","metadata") +SDRDATAROOT = ENV["SDRDATAROOT"] || File.join(HTDEV_ROOT,"imgsrv-sample-data","sdr1") +CATALOG_BASE = ENV["CATALOG_BASE"] || "https://catalog.hathitrust.org" +MYSQL_URL = ENV["MYSQL_URL"] || "mysql2://mdp-admin:mdp-admin@127.0.0.1:3307/ht" +CATALOG_SOLR = ENV["CATALOG_SOLR"] || "http://localhost:9033/solr/catalog" +LSS_SOLR = ENV["LSS_SOLR"] || "http://localhost:8983/solr/core-x" + +class StageItem + attr_reader :htid, :namespace, :objid, :zip, :mets, :pt + + def self.main + usage unless ARGV.length == 3 + + StageItem.new(*ARGV).run + end + + def initialize(htid, zip, mets) + @htid = htid + (@namespace, @objid) = htid.split(".",2) + @zip = zip + @mets = mets + + usage unless [@zip, @mets].all? { |f| File.exist?(f) } && + @zip.match?(/\.zip$/) && @mets.match?(/\.xml$/) + + @pt = HathiTrust::Pairtree.new(root: SDRDATAROOT) + end + + def run + stage_content + stage_metadata + index_full_text + end + + def stage_metadata + Tempfile.create(["metadata", ".json"], File.realpath(METADATA_ROOT)) do |f| + metadata = fetch_metadata(f) + populate_database(metadata) + index_metadata(File.basename(f.path)) + end + end + + def stage_content + pt.create(htid, new_namespace_allowed: true) + repo_path = pt.path_for(htid) + puts("↪️ Copying to repo\n") + FileUtils.cp([zip,mets],repo_path) + end + + def fetch_metadata(tempfile) + url = "/Record/HTID/#{htid}.json" + puts "📙 Getting metadata #{CATALOG_BASE}#{url} and saving to tempfile #{tempfile.path}\n" + + conn = Faraday.new(CATALOG_BASE) do |f| + f.response :follow_redirects + end + + json = conn.get("/Record/HTID/#{htid}.json").body + + tempfile.write(json) + tempfile.flush + + MARC::Record.new_from_hash(JSON.parse(json)) + end + + def populate_database(record) + catalog_id = record["001"].value + + # each item has a 974 field; the HTID is in 974$u + item_data = record.fields("974").find { |f| f["u"] == htid } + raise "Can't find item data for #{htid} in record #{catalog_id}" unless item_data + + rights_attr = item_data["r"] + rights_reason = item_data["q"] + rights_source = item_data["s"] + zephir_update_date = item_data["d"] + + # Simplification for the purposes of testing data: for now, the access + # profile is 2 ('google') if the item was digitized by google and 1 + # ('open') otherwise. We can add options later to override all the rights + # stuff for purposes of testing. + access_profile = (rights_source == 'google') ? 2 : 1 + + dbh = Sequel.connect(MYSQL_URL) + + sql = <<~SQL + REPLACE INTO rights_current (namespace, id, attr, reason, source, access_profile, user, note) VALUES + (?, ?, + (SELECT id FROM attributes WHERE name = ?), + (SELECT id FROM reasons WHERE name = ?), + (SELECT id FROM sources WHERE name = ?), + ?,'stage-item','staged from catalog record by stage_item.rb') + SQL + + values = [namespace, objid, rights_attr, rights_reason, rights_source, access_profile] + dbh[sql,*values].insert + + sql = <<~SQL + REPLACE INTO slip_rights (nid, attr, reason, source, user, time, sysid, update_time) + SELECT concat(namespace, '.', id), attr, reason, source, user, time, ?, ? FROM rights_current WHERE namespace = ? and id = ? + SQL + + values = [catalog_id, zephir_update_date, namespace, objid] + dbh[sql,*values].insert + end + + def index_metadata(file) + puts "📕 Indexing metadata..." + + catalog_utils_sh = File.join(HTDEV_ROOT,"hathitrust_catalog_indexer","bin","utils.sh") + system("docker-compose run traject bin/index_file metadata/#{file}") + system("bash -c 'source #{catalog_utils_sh}; solr_url; commit'") + end + + def index_full_text + puts "📖 Indexing full text..." + + system("docker-compose run slip index/docs-j -r11 -I#{htid}") + + slip_sample_dir = File.join(HTDEV_ROOT,"slip","sample") + load_into_solr_sh = File.join(HTDEV_ROOT,"slip","sample","load_into_solr.sh") + pt_objid = File.basename(mets,".mets.xml") + + puts("#{slip_sample_dir}/load_into_solr.sh #{slip_sample_dir}/#{pt_objid}*.solr.xml") + + # slip/sample/load_into_solr.sh + end + + def self.usage + <<~EOT + Usage: $0 namespace.barcode some_item.zip some_item.mets.xml + + where htid is something like "namespace.objid". + + Stages an item into the sample repository from a given zip and XML file. It: + * fetches metadata from the catalog + * indexes this into the sample catalog + * populates the rights_current and slip_rights table + * indexes the full text + EOT + end +end + +if $0 == __FILE__ + StageItem.main +end From b6272216bdf62d161b8cc214b7d31bc353c7724c Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 22 Mar 2023 17:01:18 -0400 Subject: [PATCH 02/28] Reorganize docker compose for staging item Get it up to date with new imgsrv & slip stuff --- docker-compose.yml | 158 +++++++++++++++++++++++++++++++++------------ 1 file changed, 118 insertions(+), 40 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 82dc2e360..c397f640e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,51 +2,77 @@ version: '3' services: - nginx: +# This starts up the following services exposing ports: +# +# catalog front-end: localhost:8080 +# imgsrv (and eventually more) via apache: localhost:8888 +# +# catalog solr: localhost:9033 +# full-text solr: localhost:8983 +# mysql: localhost:3307 +# + +### CATALOG FRONT-END + + nginx-catalog: image: nginx ports: - "8080:8080" volumes: - - ./docker/nginx-default.conf:/etc/nginx/conf.d/default.conf + - ../catalog/docker/nginx-default.conf:/etc/nginx/conf.d/default.conf - ../catalog:/app - ../common:/app/common-dev depends_on: - vufind - - imgsrv + profiles: + - catalog + - default + + vufind: + build: ../catalog + hostname: 'catalog-dev' + volumes: + - ../catalog:/app + depends_on: + - mysql-sdr + - solr-sdr-catalog + profiles: + - catalog + - default + +### BABEL APPS - apache-cgi: + apache-babel: build: - context: ".." - dockerfile: "./babel-local-dev/Dockerfile" + context: . + dockerfile: ./docker/Dockerfile.apache + image: hathitrust-imsgsrv-apache + ports: + - "8888:80" volumes: - - ../catalog:/htapps/app - - ../common:/htapps/babel/common - - ../imgsrv:/htapps/babel/imgsrv - - ../pt:/htapps/babel/pt - - ../mdp-web:/htapps/babel/mdp-web - - ../mdp-lib:/htapps/babel/mdp-lib - - ../slip-lib:/htapps/babel/slip-lib - - ../plack-lib:/htapps/babel/plack-lib + - "../imgsrv:/htapps/babel/imgsrv" + - "../imgsrv/docker/sites-available:/etc/apache2/sites-available" - "../imgsrv-sample-data/sdr1:/sdr1" - "../imgsrv-sample-data/etc:/htapps/babel/etc" - "../imgsrv-sample-data/watermarks:/htapps/babel/watermarks" - "../imgsrv-sample-data:/tmp/imgsrv-sample-data" - environment: - - SDRROOT=/htapps/babel - - SDRDATAROOT=/sdr1 - - HT_DEV= - - MARIADB_USER=ht_web - - REMOTE_ADDR=127.0.0.1 - - HTTP_HOST=127.0.0.1 depends_on: + - imgsrv - mysql-sdr - solr-sdr-catalog - ports: - - "41028:41028" + environment: + - SDRROOT=/htapps/babel + - SDRDATAROOT=/sdr1 + - HT_DEV=docker + command: bash -c "/htapps/babel/imgsrv/docker/bin/apache.sh" + profiles: + - imgsrv + - default # assumes imgsrv-sample-data has been checked out next to "imgsrv" imgsrv: build: ../imgsrv + image: hathitrust-imgsrv volumes: - ../imgsrv:/htapps/babel/imgsrv - "../imgsrv-sample-data/sdr1:/sdr1" @@ -56,8 +82,7 @@ services: environment: - SDRROOT=/htapps/babel - SDRDATAROOT=/sdr1 - - HT_DEV= - - MARIADB_USER=ht_web + - HT_DEV=docker - REMOTE_ADDR=127.0.0.1 - HTTP_HOST=127.0.0.1 depends_on: @@ -66,35 +91,88 @@ services: ports: - "31028:31028" command: bash -c "/htapps/babel/imgsrv/bin/startup_imgsrv" + profiles: + - imgsrv + - default + +#### DATA STORES solr-sdr-catalog: image: ghcr.io/hathitrust/catalog-solr-sample ports: - "9033:9033" + profiles: + - catalog + - slip + - traject + - imgsrv + - default + + solr-lss-dev: + image: solr:6 + ports: + - "8983:8983" + volumes: + - ../lss_solr_configs/lss-dev/core-x:/opt/solr/server/solr/core-x + - ../lss_solr_configs/lss-dev/core-y:/opt/solr/server/solr/core-y + - ../lss_solr_configs:/opt/lss_solr_configs + - ../lss_solr_configs/lib:/opt/solr/server/solr/lib + profiles: + - slip + - default mysql-sdr: - image: mariadb + image: ghcr.io/hathitrust/db-image:latest volumes: - - ../catalog/docker/vufind.sql:/docker-entrypoint-initdb.d/vufind.sql - - ../imgsrv/vendor/common-lib/lib/sql/000_ht_schema.sql:/docker-entrypoint-initdb.d/0000_ht_schema.sql - - ../imgsrv/vendor/common-lib/lib/sql/001_ht_ht_namespaces.sql:/docker-entrypoint-initdb.d/001_ht_ht_namespaces.sql - - ../imgsrv/vendor/common-lib/lib/sql/002_ht_rights_current.sql:/docker-entrypoint-initdb.d/002_ht_rights_current.sql - - ../imgsrv/sql/100_ht_web_schema.sql:/docker-entrypoint-initdb.d/100_ht_web_schema.sql - - ../imgsrv/sql/200_users.sql:/docker-entrypoint-initdb.d/200_users.sql + - ../slip/etc/sql/100_slip.sql:/docker-entrypoint-initdb.d/100_slip.sql + - ../catalog/docker/vufind.sql:/docker-entrypoint-initdb.d/101_vufind.sql + ports: + - "3307:3306" + profiles: + - catalog + - imgsrv + - slip + - default environment: - # - MARIADB_RANDOM_ROOT_PASSWORD=1 + # - mysql-sdr_RANDOM_ROOT_PASSWORD=1 MYSQL_ROOT_PASSWORD: TIMTOWTDIBSCINABTE - vufind: - build: ../catalog - hostname: 'catalog-dev' +#### INDEXING + + slip: + build: ../slip + image: hathitrust-slip volumes: - - ../catalog:/app + - ../slip:/htapps/babel/slip + # this is where docs-j saves output + - ../slip/sample:/htapps/babel/logs/tmp + - "../imgsrv-sample-data/sdr1:/sdr1" + - "../imgsrv-sample-data/etc:/htapps/babel/etc" + - "../imgsrv-sample-data:/tmp/imgsrv-sample-data" + environment: + - SDRROOT=/htapps/babel + - SDRDATAROOT=/sdr1 + - HT_DEV=docker depends_on: - mysql-sdr - solr-sdr-catalog + command: bash + profiles: + - slip + + traject: + image: ghcr.io/hathitrust/catalog-indexer-unstable + environment: + - SOLR_URL=http://solr-sdr-catalog:9033/solr/catalog + - redirect_file=/dev/null + - NO_DB=1 + - DDIR=/app/metadata + depends_on: + - solr-sdr-catalog + volumes: + - "../imgsrv-sample-data/metadata:/app/metadata" + profiles: + - traject -volumes: - # sdr1: - data_db: + # todo: ingest, bound to imgsrv-sample-data From a1bfc62452e1b9465d3239456a71e9e7c0452a26 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 23 Mar 2023 09:40:00 -0400 Subject: [PATCH 03/28] actually index the item into solr --- .gitignore | 2 ++ stage-item/stage_item.rb | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..d880d8c5e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +vendor +.bundle diff --git a/stage-item/stage_item.rb b/stage-item/stage_item.rb index 678b7f201..faf297742 100644 --- a/stage-item/stage_item.rb +++ b/stage-item/stage_item.rb @@ -135,9 +135,7 @@ def index_full_text load_into_solr_sh = File.join(HTDEV_ROOT,"slip","sample","load_into_solr.sh") pt_objid = File.basename(mets,".mets.xml") - puts("#{slip_sample_dir}/load_into_solr.sh #{slip_sample_dir}/#{pt_objid}*.solr.xml") - - # slip/sample/load_into_solr.sh + system("bash #{slip_sample_dir}/load_into_solr.sh #{slip_sample_dir}/#{pt_objid}*.solr.xml") end def self.usage From 59b05bf08ee4d5e6ca11a3420c2c2326f34db9f3 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 23 Mar 2023 16:09:16 -0400 Subject: [PATCH 04/28] Update README & docker-compose (provisional) --- README.md | 66 ++++++++++++++++++++++++++-------------------- docker-compose.yml | 43 ++++-------------------------- 2 files changed, 42 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 6c3d1b93b..a6f4e8a65 100644 --- a/README.md +++ b/README.md @@ -8,27 +8,20 @@ Clone all the repositories in a working directory. We're going to be running docker from this working directory, so `babel-local-dev` has access to the other repositories. -There's a lot, because we're replicating running on the -dev servers with `debug_local=1` enabled. - -``` -$ mkdir workdir -$ cd workdir -$ git clone git@github.com:hathitrust/babel-local-dev.git -$ git clone git@github.com:hathitrust/catalog.git -$ git clone git@github.com:hathitrust/common.git -$ git clone git@github.com:hathitrust/imgsrv.git -$ git clone git@github.com:hathitrust/pt.git -$ git clone git@github.com:hathitrust/mdp-lib.git -$ git clone git@github.com:hathitrust/slip-lib.git -$ git clone git@github.com:hathitrust/plack-lib.git -$ git clone git@github.com:hathitrust/imgsrv-sample-data.git -# more to come +First clone this repository: +```bash +git clone git@github.com:hathitrust/babel-local-dev.git ``` -## Step 2: intialize all the submodules +Then run: + +```bash +babel-local-dev/setup.sh +``` -*Insert fancy one liner if available.* +This will check out the other repositories along with their submodules. +There's a lot, because we're replicating running on the dev servers with +`debug_local=1` enabled. ## Step 3: build the `babel-local-dev` environment @@ -48,24 +41,39 @@ docker-compose -f ./babel-local-dev/docker-compose.yml up In your browser: -* http://localhost:8080/Search/Home -* http://localhost:8080/cgi/pt?id=test.pd_open +* catalog: `http://localhost:8080/Search/Home` +* catalog solr: `http://localhost:9033` +* full-text solr: `http://localhost:8983` + +imgsrv: + +* `http://localhost:8888/cgi/imgsrv/cover?id=test.pd_open` +* `http://localhost:8888/cgi/imgsrv/image?id=test.pd_open&seq=1` +* `http://localhost:8888/cgi/imgsrv/html?id=test.pd_open&seq=1` +* `http://localhost:8888/cgi/imgsrv/download/pdf?id=test.pd_open&seq=1&attachment=0` +mysql is exposed at 127.0.0.1:3307. The default username & password with write +access is `mdp-admin` / `mdp-admin` (needless to say, do not use this image in +production!) + +```bash +mysql -h 127.0.0.1 -p 3307 -u mdp-admin -p +``` Huzzah! -## How this works (for now) +Not yet configured: +* `http://localhost:8888/cgi/pt?id=test.pd_open` +* `http://localhost:8888/cgi/mb` +* `http://localhost:8888/cgi/whoami` +* `http://localhost:8888/cgi/ping` -The `docker-commpose` provides a custom catalog configuration to the `nginx` service to -proxy `babel` CGI requests to the `apache-cgi` service, and serve `common` requests from -the local `common` checkout. +## How this works (for now) -`apache-cgi` is there because `nginx` can only speak FastCGI/HTTP and running *all* the babel -apps under FastCGI/HTTP is still aspirational. +* catalog runs nginx + php +* babel cgi apps run under apache in a single container +* imgsrv plack/psgi process runs in its own container ## TODO -- [ ] merge the `imgsrv` DEV-231-grok branch and update the `Dockerfile`s to include `grok` -- [ ] update `slip-lib/Searcher.pm` to set `wt=xml` because the new solr defaults return JSON - [ ] adding `pt` requires filling out more of the `ht_web` tables (namely `mb_*`) - [ ] easy mechanism to generate placeholder volumes in `imgsrv-sample-data` that correspond to the records in the catalog - diff --git a/docker-compose.yml b/docker-compose.yml index c397f640e..28daa2123 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,16 +2,6 @@ version: '3' services: -# This starts up the following services exposing ports: -# -# catalog front-end: localhost:8080 -# imgsrv (and eventually more) via apache: localhost:8888 -# -# catalog solr: localhost:9033 -# full-text solr: localhost:8983 -# mysql: localhost:3307 -# - ### CATALOG FRONT-END nginx-catalog: @@ -24,9 +14,6 @@ services: - ../common:/app/common-dev depends_on: - vufind - profiles: - - catalog - - default vufind: build: ../catalog @@ -36,9 +23,6 @@ services: depends_on: - mysql-sdr - solr-sdr-catalog - profiles: - - catalog - - default ### BABEL APPS @@ -65,9 +49,6 @@ services: - SDRDATAROOT=/sdr1 - HT_DEV=docker command: bash -c "/htapps/babel/imgsrv/docker/bin/apache.sh" - profiles: - - imgsrv - - default # assumes imgsrv-sample-data has been checked out next to "imgsrv" imgsrv: @@ -91,9 +72,6 @@ services: ports: - "31028:31028" command: bash -c "/htapps/babel/imgsrv/bin/startup_imgsrv" - profiles: - - imgsrv - - default #### DATA STORES @@ -101,12 +79,6 @@ services: image: ghcr.io/hathitrust/catalog-solr-sample ports: - "9033:9033" - profiles: - - catalog - - slip - - traject - - imgsrv - - default solr-lss-dev: image: solr:6 @@ -117,9 +89,6 @@ services: - ../lss_solr_configs/lss-dev/core-y:/opt/solr/server/solr/core-y - ../lss_solr_configs:/opt/lss_solr_configs - ../lss_solr_configs/lib:/opt/solr/server/solr/lib - profiles: - - slip - - default mysql-sdr: image: ghcr.io/hathitrust/db-image:latest @@ -128,11 +97,6 @@ services: - ../catalog/docker/vufind.sql:/docker-entrypoint-initdb.d/101_vufind.sql ports: - "3307:3306" - profiles: - - catalog - - imgsrv - - slip - - default environment: # - mysql-sdr_RANDOM_ROOT_PASSWORD=1 @@ -140,6 +104,9 @@ services: #### INDEXING + # We add the 'indexing' profile to keep these from starting automatically + # when we do 'docker-compose up' + slip: build: ../slip image: hathitrust-slip @@ -159,7 +126,7 @@ services: - solr-sdr-catalog command: bash profiles: - - slip + - indexing traject: image: ghcr.io/hathitrust/catalog-indexer-unstable @@ -173,6 +140,6 @@ services: volumes: - "../imgsrv-sample-data/metadata:/app/metadata" profiles: - - traject + - indexing # todo: ingest, bound to imgsrv-sample-data From 5b3d9d9b3cd2f65604d34d42d8ca4c95c2c7658f Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 23 Mar 2023 16:16:53 -0400 Subject: [PATCH 05/28] setup script to clone needed repos --- setup.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 setup.sh diff --git a/setup.sh b/setup.sh new file mode 100755 index 000000000..3c6aff884 --- /dev/null +++ b/setup.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +echo "Checking out into $PWD - enter to continue, ctrl-C to abort" + +read + +git clone --recurse-submodules git@github.com:hathitrust/imgsrv +git clone --recurse-submodules git@github.com:hathitrust/catalog +git clone --recurse-submodules git@github.com:hathitrust/common +git clone --recurse-submodules -b DEV-667-stage-item git@github.com:hathitrust/ht-pairtree +git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/slip +git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/imgsrv-sample-data +git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/lss_solr_configs + +# Not yet covered in the apache config although maybe it was before +# git clone git@github.com:hathitrust/pt.git + +# Do we need these separately? +# git clone git@github.com:hathitrust/mdp-lib.git +# git clone git@github.com:hathitrust/slip-lib.git +# git clone git@github.com:hathitrust/plack-lib.git From 9fd1c160f08f666371fd972cbbcbc434a4d7c515 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 23 Mar 2023 16:56:20 -0400 Subject: [PATCH 06/28] README for stage item; fix path in docker-compose trying to get it all working w/ a clean checkout --- README.md | 30 ++++++++++++++++++++++++++++++ docker-compose.yml | 2 +- setup.sh | 1 + stage-item/stage_item.rb | 2 +- 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a6f4e8a65..93c795e43 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,36 @@ Not yet configured: * babel cgi apps run under apache in a single container * imgsrv plack/psgi process runs in its own container +## Staging an Item + +First, get a HathiTrust ZIP and METS. The easiest way to do this is probably by +using the [Data API client](https://babel.hathitrust.org/cgi/htdc) to download +a public domain item unencumbered by any contractual restrictions, for example +`uc2.ark:/13960/t4mk66f1d`. Select "Download" and in turn select "Item METS +file" and "entire item" and submit the form; this will download the ZIP and +METS respectively. + +Running the stage item script requires a Ruby runtime. It will automate putting +the item in the appropriate location under `imgsrv-sample-data`, fetch the +bibliographic data, and extract and index the full text. + +First make sure all the dependencies are running: + +```bash +docker-compose build +docker-compose up +``` + +Then, install dependencies for the `stage-item` script and run it with the +downloaded zip and METS: + +```bash +cd babel-local-dev/stage-item +bundle config set --local path 'vendor/bundle' +bundle install +bundle exec ruby stage_item.rb uc2.ark:/13960/t4mk66f1d ark+=13960=t4mk66f1d.zip ark+=13960=t4mk66f1d.mets.xml +``` + ## TODO - [ ] adding `pt` requires filling out more of the `ht_web` tables (namely `mb_*`) diff --git a/docker-compose.yml b/docker-compose.yml index 28daa2123..ac99275f3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,7 +29,7 @@ services: apache-babel: build: context: . - dockerfile: ./docker/Dockerfile.apache + dockerfile: ../imgsrv/docker/Dockerfile.apache image: hathitrust-imsgsrv-apache ports: - "8888:80" diff --git a/setup.sh b/setup.sh index 3c6aff884..65c0a1047 100755 --- a/setup.sh +++ b/setup.sh @@ -7,6 +7,7 @@ read git clone --recurse-submodules git@github.com:hathitrust/imgsrv git clone --recurse-submodules git@github.com:hathitrust/catalog git clone --recurse-submodules git@github.com:hathitrust/common +git clone --recurse-submodules git@github.com:hathitrust/hathitrust_catalog_indexer git clone --recurse-submodules -b DEV-667-stage-item git@github.com:hathitrust/ht-pairtree git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/slip git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/imgsrv-sample-data diff --git a/stage-item/stage_item.rb b/stage-item/stage_item.rb index faf297742..0ec98b2d8 100644 --- a/stage-item/stage_item.rb +++ b/stage-item/stage_item.rb @@ -57,7 +57,7 @@ def stage_metadata def stage_content pt.create(htid, new_namespace_allowed: true) repo_path = pt.path_for(htid) - puts("↪️ Copying to repo\n") + puts("↪️ Copying zip and mets to repo #{repo_path}\n") FileUtils.cp([zip,mets],repo_path) end From 57edd7ac434e2b47ddb22382dc10b0a80253f42e Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Fri, 24 Mar 2023 16:30:33 -0400 Subject: [PATCH 07/28] DEV-667: reconcile this and imgsrv This attempts to reconcile earlier work here with later work in the imgsrv repo and more recent work in this branch. It uses: - nginx for catalog, imgsrv fastcgi, and static files - proxy to apache for cgi So far working: - catalog incl. CSS & JS (via shared checked-out common repo) - imgsrv fcgi - imgsrv cgi I was also able to clone pt & see that it at least attempted it (it had an error about missing GeoIP data) --- Dockerfile | 240 ++++---------------------------------- docker-compose.yml | 69 +++++------ docker/000-default.conf | 67 ++++------- docker/apache.sh | 13 +++ docker/nginx-default.conf | 10 +- setup.sh | 5 + 6 files changed, 97 insertions(+), 307 deletions(-) create mode 100755 docker/apache.sh diff --git a/Dockerfile b/Dockerfile index aa7a80e81..89dc74eaf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,219 +1,21 @@ -FROM debian:bullseye - -RUN sed -i 's/main.*/main contrib non-free/' /etc/apt/sources.list - -RUN apt-get update && apt-get install -y \ - perl \ - libxerces-c3.2 \ - libxerces-c3-dev \ - sqlite3 \ - file \ - libalgorithm-diff-xs-perl \ - libany-moose-perl \ - libapache-session-perl \ - libarchive-zip-perl \ - libclass-accessor-perl \ - libclass-c3-perl \ - libclass-data-accessor-perl \ - libclass-data-inheritable-perl \ - libclass-errorhandler-perl \ - libclass-load-perl \ - libcommon-sense-perl \ - libcompress-raw-zlib-perl \ - libconfig-auto-perl \ - libconfig-inifiles-perl \ - libconfig-tiny-perl \ - libcrypt-openssl-random-perl \ - libcrypt-openssl-rsa-perl \ - libcrypt-ssleay-perl \ - libdata-optlist-perl \ - libdata-page-perl \ - libdate-calc-perl \ - libdate-manip-perl \ - libdbd-mock-perl \ - libdbd-mysql-perl \ - libdbd-sqlite3-perl \ - libdevel-globaldestruction-perl \ - libdigest-sha-perl \ - libemail-date-format-perl \ - libencode-locale-perl \ - liberror-perl \ - libeval-closure-perl \ - libexcel-writer-xlsx-perl \ - libfcgi-perl \ - libfcgi-procmanager-perl \ - libfile-listing-perl \ - libfile-slurp-perl \ - libfilesys-df-perl \ - libgeo-ip-perl \ - libhtml-parser-perl \ - libhtml-tree-perl \ - libhttp-browserdetect-perl \ - libhttp-cookies-perl \ - libhttp-daemon-perl \ - libhttp-date-perl \ - libhttp-dav-perl \ - libhttp-message-perl \ - libhttp-negotiate-perl \ - libimage-exiftool-perl \ - libimage-info-perl \ - libimage-size-perl \ - libinline-perl \ - libio-html-perl \ - libio-socket-ssl-perl \ - libio-string-perl \ - libipc-run-perl \ - libjson-perl \ - libjson-pp-perl \ - libjson-xs-perl \ - liblist-compare-perl \ - liblist-moreutils-perl \ - liblog-log4perl-perl \ - liblwp-authen-oauth2-perl \ - liblwp-mediatypes-perl \ - libmail-sendmail-perl \ - libmailtools-perl \ - libmime-lite-perl \ - libmime-types-perl \ - libmodule-implementation-perl \ - libmodule-runtime-perl \ - libmoose-perl \ - libmouse-perl \ - libmro-compat-perl \ - libnet-dns-perl \ - libnet-http-perl \ - libnet-libidn-perl \ - libnet-oauth-perl \ - libnet-ssleay-perl \ - libpackage-deprecationmanager-perl \ - libpackage-stash-perl \ - libparse-recdescent-perl \ - libplack-perl \ - libpod-simple-perl \ - libproc-processtable-perl \ - libreadonly-perl \ - libreadonly-xs-perl \ - libroman-perl \ - libsoap-lite-perl \ - libspreadsheet-writeexcel-perl \ - libsub-exporter-progressive-perl \ - libsub-name-perl \ - libtemplate-perl \ - libterm-readkey-perl \ - libterm-readline-gnu-perl \ - libtest-requiresinternet-perl \ - libtest-simple-perl \ - libtie-ixhash-perl \ - libtimedate-perl \ - libtry-tiny-perl \ - libuniversal-require-perl \ - liburi-encode-perl \ - libuuid-perl \ - libuuid-tiny-perl \ - libversion-perl \ - libwww-perl \ - libwww-robotrules-perl \ - libxml-dom-perl \ - libxml-libxml-perl \ - libxml-libxslt-perl \ - libxml-sax-perl \ - libxml-simple-perl \ - libxml-writer-perl \ - libyaml-appconfig-perl \ - libyaml-libyaml-perl \ - libyaml-perl \ - libmarc-record-perl \ - libmarc-xml-perl - -RUN apt-get install -y \ - autoconf \ - bison \ - build-essential \ - git \ - libdevel-cover-perl \ - libffi-dev \ - libgdbm-dev \ - libncurses5-dev \ - libreadline6-dev \ - libsqlite3-dev \ - libssl-dev \ - libyaml-dev \ - openssh-server \ - unzip \ - wget \ - zip \ - zlib1g-dev \ - netcat \ - libperl-critic-perl - -RUN apt-get install -y libtest-class-perl libswitch-perl libtest-spec-perl libtest-mockobject-perl - -RUN apt-get install -y apache2 apache2-utils vim - -RUN cpan -T \ - File::Pairtree \ - URI::Escape \ - CGI::PSGI \ - IP::Geolocation::MMDB - -WORKDIR htapps/babel/geoip -RUN wget https://github.com/maxmind/MaxMind-DB/blob/main/test-data/GeoIP2-Country-Test.mmdb?raw=true -O GeoIP2-Country.mmdb - -RUN ln -s /tmp /ram - -RUN mkdir -p /l/local/bin -RUN ln -s /usr/bin/unzip /l/local/bin/unzip -RUN ln -s /usr/bin/plackup /l/local/bin/plackup - -WORKDIR /tmp -COPY ./imgsrv/vendor/KDU7A2_Demo_Apps_for_Ubuntu-x86-64_170827.zip /tmp -RUN unzip -j -d /tmp/kakadu KDU7A2_Demo_Apps_for_Ubuntu-x86-64_170827.zip -# RUN wget https://kakadusoftware.com/wp-content/uploads/2014/06/KDU7A2_Demo_Apps_for_Ubuntu-x86-64_170827.zip -# RUN unzip -j -d kakadu KDU7A2_Demo_Apps_for_Ubuntu-x86-64_170827.zip -RUN mv /tmp/kakadu/*.so /usr/local/lib -RUN mv /tmp/kakadu/kdu* /usr/local/bin -RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/kakadu.conf -RUN ldconfig - -RUN mkdir -p /l/local/bin -RUN ln -s /usr/bin/convert /l/local/bin/convert -RUN ln -s /usr/local/bin/kdu_expand /l/local/bin/kdu_expand -RUN ln -s /usr/local/bin/kdu_compress /l/local/bin/kdu_compress -RUN /bin/bash -c 'for cmd in pamflip jpegtopnm tifftopnm bmptopnm pngtopam ppmmake pamcomp pnmscalefixed pamscale pnmrotate pnmpad pamtotiff pnmtotiff pnmtojpeg pamrgbatopng ppmtopgm pnmtopng; do ln -s /usr/bin/$cmd /l/local/bin; done' - -WORKDIR /htapps/babel/cache -RUN mkdir imgsrv -RUN chown -R www-data . -RUN chmod -R 4777 . - -WORKDIR /htapps/babel/logs -RUN chown -R www-data . -RUN chmod -R 4777 . - -COPY ./mdp-lib /htapps/babel/mdp-lib -COPY ./plack-lib /htapps/babel/plack-lib -COPY ./slip-lib /htapps/babel/slip-lib -COPY ./mdp-web /htapps/babel/mdp-web - -WORKDIR /htapps/babel/pt -RUN ln -s /htapps/babel /htapps/test.babel - -COPY ./pt /htapps/babel/pt -RUN echo -e "debug_local = 1\ndebug_enabled = 1\nmdpitem_use_cache=false\n" > lib/Config/local.conf -RUN chgrp -R www-data /htapps/babel/pt - -WORKDIR /htapps/babel/imgsrv -COPY ./imgsrv /htapps/babel/imgsrv -RUN echo -e "debug_local=1\ndebug_enabled=1\nmdpitem_use_cache=false\n" > lib/Config/local.conf -RUN chgrp -R www-data /htapps/babel/imgsrv - -RUN ln -s /etc/apache2/mods-available/rewrite.load /etc/apache2/mods-enabled -RUN ln -s /etc/apache2/mods-available/cgi.load /etc/apache2/mods-enabled -RUN ln -s /etc/apache2/mods-available/proxy.load /etc/apache2/mods-enabled -RUN ln -s /etc/apache2/mods-available/proxy_fcgi.load /etc/apache2/mods-enabled -RUN ln -s /etc/apache2/mods-available/proxy_http.load /etc/apache2/mods-enabled - -COPY ./babel-local-dev/docker/000-default.conf /etc/apache2/sites-enabled/000-default.conf - -# CMD [ "/usr/sbin/apache2", "-D", "FOREGROUND"] -CMD [ "/usr/sbin/apache2ctl", "-D", "FOREGROUND" ] +FROM hathitrust-imgsrv + +RUN apt-get -y install apache2 libapache2-mod-fcgid +RUN rm /etc/apache2/sites-available/* + +RUN /usr/sbin/a2dismod 'mpm_*' +RUN a2enmod headers +RUN a2enmod env +RUN a2enmod mpm_prefork +RUN a2enmod rewrite +RUN a2enmod proxy +RUN a2enmod proxy_fcgi +RUN a2enmod proxy_http +RUN a2enmod cgi + +COPY ./docker/000-default.conf /etc/apache2/sites-enabled +STOPSIGNAL SIGWINCH + +COPY docker/apache.sh / +RUN chmod +x /apache.sh +ENTRYPOINT ["/apache.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index ac99275f3..049646a1d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,55 +2,19 @@ version: '3' services: -### CATALOG FRONT-END - - nginx-catalog: + nginx: image: nginx ports: - "8080:8080" volumes: - - ../catalog/docker/nginx-default.conf:/etc/nginx/conf.d/default.conf + - ./docker/nginx-default.conf:/etc/nginx/conf.d/default.conf - ../catalog:/app - ../common:/app/common-dev depends_on: - vufind - - vufind: - build: ../catalog - hostname: 'catalog-dev' - volumes: - - ../catalog:/app - depends_on: - - mysql-sdr - - solr-sdr-catalog - -### BABEL APPS - - apache-babel: - build: - context: . - dockerfile: ../imgsrv/docker/Dockerfile.apache - image: hathitrust-imsgsrv-apache - ports: - - "8888:80" - volumes: - - "../imgsrv:/htapps/babel/imgsrv" - - "../imgsrv/docker/sites-available:/etc/apache2/sites-available" - - "../imgsrv-sample-data/sdr1:/sdr1" - - "../imgsrv-sample-data/etc:/htapps/babel/etc" - - "../imgsrv-sample-data/watermarks:/htapps/babel/watermarks" - - "../imgsrv-sample-data:/tmp/imgsrv-sample-data" - depends_on: - imgsrv - - mysql-sdr - - solr-sdr-catalog - environment: - - SDRROOT=/htapps/babel - - SDRDATAROOT=/sdr1 - - HT_DEV=docker - command: bash -c "/htapps/babel/imgsrv/docker/bin/apache.sh" + - apache-cgi - # assumes imgsrv-sample-data has been checked out next to "imgsrv" imgsrv: build: ../imgsrv image: hathitrust-imgsrv @@ -73,6 +37,33 @@ services: - "31028:31028" command: bash -c "/htapps/babel/imgsrv/bin/startup_imgsrv" + apache-cgi: + build: . + volumes: + - "./docker/000-default.conf:/etc/apache2/sites-enabled/000-default.conf" + - ..:/htapps/babel + - "../imgsrv-sample-data/sdr1:/sdr1" + - "../imgsrv-sample-data/etc:/htapps/babel/etc" + - "../imgsrv-sample-data/watermarks:/htapps/babel/watermarks" + - "../imgsrv-sample-data:/tmp/imgsrv-sample-data" + environment: + - SDRROOT=/htapps/babel + - SDRDATAROOT=/sdr1 + depends_on: + - mysql-sdr + - solr-sdr-catalog + ports: + - "41028:41028" + + vufind: + build: ../catalog + hostname: 'catalog-dev' + volumes: + - ../catalog:/app + depends_on: + - mysql-sdr + - solr-sdr-catalog + #### DATA STORES solr-sdr-catalog: diff --git a/docker/000-default.conf b/docker/000-default.conf index a23ff5ec2..a0b2c1288 100644 --- a/docker/000-default.conf +++ b/docker/000-default.conf @@ -1,37 +1,12 @@ Listen 41028 - # The ServerName directive sets the request scheme, hostname and port that - # the server uses to identify itself. This is used when creating - # redirection URLs. In the context of virtual hosts, the ServerName - # specifies what hostname must appear in the request's Host: header to - # match this virtual host. For the default virtual host (this file) this - # value is not decisive as it is used as a last resort host regardless. - # However, you must set it for any further virtual host explicitly. - #ServerName www.example.com + ServerAdmin hathitrust@localhost + DocumentRoot /htapps/babel - ServerAdmin webmaster@localhost - DocumentRoot /htapps/babel + LogLevel debug + ErrorLog /dev/stdout + CustomLog /dev/stdout combined - # Available loglevels: trace8, ..., trace1, debug, info, notice, warn, - # error, crit, alert, emerg. - # It is also possible to configure the loglevel for particular - # modules, e.g. - #LogLevel info ssl:warn - - # ErrorLog ${APACHE_LOG_DIR}/error.log - # CustomLog ${APACHE_LOG_DIR}/access.log combined - - LogLevel trace8 - ErrorLog /dev/stderr - CustomLog /dev/stdout combined - - # For most configuration files from conf-available/, which are - # enabled or disabled at a global level, it is possible to - # include a line for only one particular virtual host. For example the - # following line enables the CGI configuration for this host only - # after it has been globally disabled with "a2disconf". - - Include conf-available/serve-cgi-bin.conf RewriteEngine On ## SetEnv/SetEnvIf for environment variables @@ -44,32 +19,32 @@ Listen 41028 # SetEnv PTSEARCH_SOLR https://testing.ptsearch.kubernetes.hathitrust.org:8443/solr/ptsearch # SetEnv PTSEARCH_SOLR_BASIC_AUTH c29scjpwY1hoMVQxTVF4eExoRUNjSVZPME43MDc2Vk1WdzdUYms= - - - Require not env badrobot - Require not env loadbalancer - Require all granted - - + + Options Indexes FollowSymLinks + AllowOverride All + Require all granted + Options +ExecCGI SetHandler cgi-script - RewriteCond %{DOCUMENT_ROOT}/babel/$1/web/$2 -f + RewriteCond %{DOCUMENT_ROOT}/$1/web/$2 -f RewriteRule ^/([^/]+)/(.*) /$1/web/$2 [last] - RewriteCond %{DOCUMENT_ROOT}/babel/$1/web/ -d + RewriteCond %{DOCUMENT_ROOT}/$1/web/ -d RewriteRule ^/([^/]+)/?$ /$1/web/ [last] - RewriteCond %{DOCUMENT_ROOT}/babel/$2/cgi/$3 -f - RewriteRule ^/(shcgi|cgi)/([^/]+)/([^/]+)(.*)$ /$2/cgi/$3$4 [skip] + RewriteCond %{DOCUMENT_ROOT}/$2/cgi/$3 -f + RewriteRule ^/(cgi)/([^/]+)/([^/]+)(.*)$ /$2/cgi/$3$4 [skip] + + RewriteCond %{DOCUMENT_ROOT}/$2/cgi/$2 -f + RewriteRule ^/(cgi)/([^/]+)(.*)$ /$2/cgi/$2$3 - RewriteCond %{DOCUMENT_ROOT}/babel/$2/cgi/$2 -f - RewriteRule ^/(shcgi|cgi)/([^/]+)(.*)$ /$2/cgi/$2$3 + RewriteCond %{DOCUMENT_ROOT}/$1/cgi/$3.choke -f + RewriteRule ^/([^/]+)/(cgi)/([^/]+)(.*)$ /$1/cgi/$3.choke$4 [last] - RewriteCond %{DOCUMENT_ROOT}/babel/$1/cgi/$3.choke -f - RewriteRule ^/([^/]+)/(shcgi|cgi)/([^/]+)(.*)$ /$1/cgi/$3.choke$4 [last] + # TODO include for auth env vars - \ No newline at end of file + diff --git a/docker/apache.sh b/docker/apache.sh new file mode 100755 index 000000000..4fa3b70f3 --- /dev/null +++ b/docker/apache.sh @@ -0,0 +1,13 @@ +#! /bin/bash + +# Apache gets grumpy about PID files pre-existing +if [ ! -d /var/run/apache2 ] +then + mkdir -p /var/run/apache2 +fi + +rm -f /var/run/apache2/apache2*.pid + +source /etc/apache2/envvars + +exec apache2 -DFOREGROUND diff --git a/docker/nginx-default.conf b/docker/nginx-default.conf index 417a954fa..69637be9f 100644 --- a/docker/nginx-default.conf +++ b/docker/nginx-default.conf @@ -76,9 +76,13 @@ server { } location ~ /cgi/imgsrv/(image|thumbnail|meta|html|ocr|cover)(.*) { - rewrite /cgi/imgsrv/(.*)$ /$1 break; - proxy_pass http://imgsrv:31028; - # proxy_pass http://imgsrv:31028/$1$2; + fastcgi_split_path_info ^(/cgi/imgsrv)(/.+)$; + # try_files $fastcgi_script_name =404; + set $path_info $fastcgi_path_info; + fastcgi_param PATH_INFO $path_info; + fastcgi_pass imgsrv:31028; + fastcgi_param SCRIPT_FILENAME $fastcgi_script_name; + include fastcgi_params; } location /cgi/imgsrv { diff --git a/setup.sh b/setup.sh index 65c0a1047..9e9890af3 100755 --- a/setup.sh +++ b/setup.sh @@ -7,12 +7,17 @@ read git clone --recurse-submodules git@github.com:hathitrust/imgsrv git clone --recurse-submodules git@github.com:hathitrust/catalog git clone --recurse-submodules git@github.com:hathitrust/common +git clone --recurse-submodules git@github.com:hathitrust/pt git clone --recurse-submodules git@github.com:hathitrust/hathitrust_catalog_indexer git clone --recurse-submodules -b DEV-667-stage-item git@github.com:hathitrust/ht-pairtree git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/slip git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/imgsrv-sample-data git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/lss_solr_configs +# Directories the web server needs to write to under /htapps/babel +mkdir cache logs +chmod a+w cache logs + # Not yet covered in the apache config although maybe it was before # git clone git@github.com:hathitrust/pt.git From eddf216082817603fa0d069a7681d3ffe6e97eb1 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Mon, 27 Mar 2023 11:40:20 -0400 Subject: [PATCH 08/28] DEV-663: pt, ssd config * Add ssd to checkout list * Enumerate directories to mount for apache - otherwise directories we have in the image (geoip, cache, etc) get masked. Could change this in the future if we move more of the infrastructure directly to this repo rather than relying on checkouts in the parent dir --- docker-compose.yml | 5 ++++- docker/nginx-default.conf | 10 ++++++++++ setup.sh | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 049646a1d..ea2ee7233 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -41,7 +41,10 @@ services: build: . volumes: - "./docker/000-default.conf:/etc/apache2/sites-enabled/000-default.conf" - - ..:/htapps/babel + - ../imgsrv:/htapps/babel/imgsrv + - ../pt:/htapps/babel/pt + - ../ssd:/htapps/babel/ssd + - ../common:/htapps/babel/common - "../imgsrv-sample-data/sdr1:/sdr1" - "../imgsrv-sample-data/etc:/htapps/babel/etc" - "../imgsrv-sample-data/watermarks:/htapps/babel/watermarks" diff --git a/docker/nginx-default.conf b/docker/nginx-default.conf index 69637be9f..1b6b71aaa 100644 --- a/docker/nginx-default.conf +++ b/docker/nginx-default.conf @@ -100,6 +100,16 @@ server { proxy_pass http://apache-cgi:41028/pt; } + location /cgi/ssd { + # rewrite /cgi/pt/(.*)$ /cgi/pt/$1 break; + proxy_pass http://apache-cgi:41028/cgi/ssd; + } + + location /ssd { + rewrite /ssd/(.*)$ /ssd/$1 break; + proxy_pass http://apache-cgi:41028/ssd; + } + location / { try_files $uri @rewrite; } diff --git a/setup.sh b/setup.sh index 9e9890af3..8899e872f 100755 --- a/setup.sh +++ b/setup.sh @@ -8,6 +8,7 @@ git clone --recurse-submodules git@github.com:hathitrust/imgsrv git clone --recurse-submodules git@github.com:hathitrust/catalog git clone --recurse-submodules git@github.com:hathitrust/common git clone --recurse-submodules git@github.com:hathitrust/pt +git clone --recurse-submodules git@github.com:hathitrust/ssd git clone --recurse-submodules git@github.com:hathitrust/hathitrust_catalog_indexer git clone --recurse-submodules -b DEV-667-stage-item git@github.com:hathitrust/ht-pairtree git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/slip From e0f13c1b00d10b45c6ea05fcfb42a862793f856b Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Mon, 27 Mar 2023 11:50:58 -0400 Subject: [PATCH 09/28] remove branch for imgsrv-sample-data merged https://github.com/hathitrust/imgsrv-sample-data/pull/1 --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 8899e872f..5e758f624 100755 --- a/setup.sh +++ b/setup.sh @@ -5,6 +5,7 @@ echo "Checking out into $PWD - enter to continue, ctrl-C to abort" read git clone --recurse-submodules git@github.com:hathitrust/imgsrv +git clone --recurse-submodules git@github.com:hathitrust/imgsrv-sample-data git clone --recurse-submodules git@github.com:hathitrust/catalog git clone --recurse-submodules git@github.com:hathitrust/common git clone --recurse-submodules git@github.com:hathitrust/pt @@ -12,7 +13,6 @@ git clone --recurse-submodules git@github.com:hathitrust/ssd git clone --recurse-submodules git@github.com:hathitrust/hathitrust_catalog_indexer git clone --recurse-submodules -b DEV-667-stage-item git@github.com:hathitrust/ht-pairtree git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/slip -git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/imgsrv-sample-data git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/lss_solr_configs # Directories the web server needs to write to under /htapps/babel From b8c741bcd7899c617db8789390eddb476590d90d Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Mon, 27 Mar 2023 12:59:04 -0400 Subject: [PATCH 10/28] Ensure solr core is writable --- setup.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.sh b/setup.sh index 5e758f624..b70815c02 100755 --- a/setup.sh +++ b/setup.sh @@ -19,6 +19,9 @@ git clone --recurse-submodules -b DEV-661-docker git@github.com:hathitrust/lss_s mkdir cache logs chmod a+w cache logs +# Directory solr needs to write to +chmod a+w lss_solr_configs/lss-dev/core-x/data + # Not yet covered in the apache config although maybe it was before # git clone git@github.com:hathitrust/pt.git From af9b381ad0a0c3555195e537ec4c4eb31dab106c Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Mon, 27 Mar 2023 12:59:17 -0400 Subject: [PATCH 11/28] usage & pairtree creation with stage-item * ensure usage is actually printed * update ht-pairtree to make sure that namespace dir is created with correct prefix --- stage-item/Gemfile.lock | 2 +- stage-item/stage_item.rb | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/stage-item/Gemfile.lock b/stage-item/Gemfile.lock index cbb6d3a90..4fa2a9417 100644 --- a/stage-item/Gemfile.lock +++ b/stage-item/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: ../../ht-pairtree - revision: 56c5d0517588065b1dbd8d7b208dfcb9a0d61793 + revision: 7021c21c07d7cc018f23b3dfd7f3f3954a109f57 specs: ht-pairtree (0.1.0) rpairtree diff --git a/stage-item/stage_item.rb b/stage-item/stage_item.rb index 0ec98b2d8..59bfbeab2 100644 --- a/stage-item/stage_item.rb +++ b/stage-item/stage_item.rb @@ -5,9 +5,8 @@ require "fileutils" require "ht/pairtree" require "marc" -require "pry" -require "pry-byebug" require "sequel" +require "tempfile" # The parent of babel-local-dev where all the HathiTrust repos are checked out HTDEV_ROOT = ENV["HTDEV_ROOT"] || File.realpath(File.join(__dir__,"..","..")) @@ -34,10 +33,10 @@ def initialize(htid, zip, mets) @zip = zip @mets = mets - usage unless [@zip, @mets].all? { |f| File.exist?(f) } && + self.class.usage unless [@zip, @mets].all? { |f| File.exist?(f) } && @zip.match?(/\.zip$/) && @mets.match?(/\.xml$/) - @pt = HathiTrust::Pairtree.new(root: SDRDATAROOT) + @pt = HathiTrust::Pairtree.new(root: File.join(SDRDATAROOT,'obj')) end def run @@ -57,6 +56,7 @@ def stage_metadata def stage_content pt.create(htid, new_namespace_allowed: true) repo_path = pt.path_for(htid) + puts("↪️ Copying zip and mets to repo #{repo_path}\n") FileUtils.cp([zip,mets],repo_path) end @@ -139,7 +139,7 @@ def index_full_text end def self.usage - <<~EOT + STDERR.puts <<~EOT Usage: $0 namespace.barcode some_item.zip some_item.mets.xml where htid is something like "namespace.objid". @@ -150,6 +150,8 @@ def self.usage * populates the rights_current and slip_rights table * indexes the full text EOT + + exit 1 end end From a66feeb43e2c12eb2c7cf4dbb6aae596e54e31e4 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Mon, 27 Mar 2023 14:02:16 -0400 Subject: [PATCH 12/28] use geoip branch for imgsrv --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index b70815c02..47c461038 100755 --- a/setup.sh +++ b/setup.sh @@ -4,7 +4,7 @@ echo "Checking out into $PWD - enter to continue, ctrl-C to abort" read -git clone --recurse-submodules git@github.com:hathitrust/imgsrv +git clone --recurse-submodules -b DEV-663-geoip git@github.com:hathitrust/imgsrv git clone --recurse-submodules git@github.com:hathitrust/imgsrv-sample-data git clone --recurse-submodules git@github.com:hathitrust/catalog git clone --recurse-submodules git@github.com:hathitrust/common From 80fbcab98e1e5bb20346a4a800ec027357cf9138 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Tue, 28 Mar 2023 13:37:15 -0400 Subject: [PATCH 13/28] (WIP, untested) check out and build everything here * Don't give instructions to clutter parent dir * Move dockerfile for perl apps here --- .gitignore | 2 + README.md | 5 +- docker-compose.yml | 68 ++++++++++----------- docker/apache-cgi/Dockerfile | 21 +++++++ docker/{ => apache-cgi}/apache.sh | 0 Dockerfile => docker/apache/Dockerfile | 0 docker/babel-base/Dockerfile | 82 ++++++++++++++++++++++++++ setup.sh | 49 ++++++++++----- 8 files changed, 175 insertions(+), 52 deletions(-) create mode 100644 docker/apache-cgi/Dockerfile rename docker/{ => apache-cgi}/apache.sh (100%) rename Dockerfile => docker/apache/Dockerfile (100%) create mode 100644 docker/babel-base/Dockerfile diff --git a/.gitignore b/.gitignore index d880d8c5e..8f1a82ca6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ vendor .bundle +stage-item/*.xml +stage-item/*.zip diff --git a/README.md b/README.md index 93c795e43..3395cf785 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,14 @@ so `babel-local-dev` has access to the other repositories. First clone this repository: ```bash -git clone git@github.com:hathitrust/babel-local-dev.git +git clone git@github.com:hathitrust/babel-local-dev.git babel ``` Then run: ```bash -babel-local-dev/setup.sh +cd babel +./setup.sh ``` This will check out the other repositories along with their submodules. diff --git a/docker-compose.yml b/docker-compose.yml index ea2ee7233..42c1271e2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,22 +8,22 @@ services: - "8080:8080" volumes: - ./docker/nginx-default.conf:/etc/nginx/conf.d/default.conf - - ../catalog:/app - - ../common:/app/common-dev + - ./catalog:/app + - ./common:/app/common-dev depends_on: - vufind - imgsrv - apache-cgi imgsrv: - build: ../imgsrv - image: hathitrust-imgsrv + build: ./docker/babel-base + image: hathitrust-babel-base volumes: - - ../imgsrv:/htapps/babel/imgsrv - - "../imgsrv-sample-data/sdr1:/sdr1" - - "../imgsrv-sample-data/etc:/htapps/babel/etc" - - "../imgsrv-sample-data/watermarks:/htapps/babel/watermarks" - - "../imgsrv-sample-data:/tmp/imgsrv-sample-data" + - ./imgsrv:/htapps/babel/imgsrv + - "./sample-data/sdr1:/sdr1" + - "./sample-data/etc:/htapps/babel/etc" + - "./sample-data/watermarks:/htapps/babel/watermarks" + - "./sample-data:/tmp/sample-data" environment: - SDRROOT=/htapps/babel - SDRDATAROOT=/sdr1 @@ -38,17 +38,17 @@ services: command: bash -c "/htapps/babel/imgsrv/bin/startup_imgsrv" apache-cgi: - build: . + build: ./docker/apache-cgi volumes: - "./docker/000-default.conf:/etc/apache2/sites-enabled/000-default.conf" - - ../imgsrv:/htapps/babel/imgsrv - - ../pt:/htapps/babel/pt - - ../ssd:/htapps/babel/ssd - - ../common:/htapps/babel/common - - "../imgsrv-sample-data/sdr1:/sdr1" - - "../imgsrv-sample-data/etc:/htapps/babel/etc" - - "../imgsrv-sample-data/watermarks:/htapps/babel/watermarks" - - "../imgsrv-sample-data:/tmp/imgsrv-sample-data" + - ./imgsrv:/htapps/babel/imgsrv + - ./pt:/htapps/babel/pt + - ./ssd:/htapps/babel/ssd + - ./common:/htapps/babel/common + - "./sample-data/sdr1:/sdr1" + - "./sample-data/etc:/htapps/babel/etc" + - "./sample-data/watermarks:/htapps/babel/watermarks" + - "./sample-data:/tmp/sample-data" environment: - SDRROOT=/htapps/babel - SDRDATAROOT=/sdr1 @@ -59,10 +59,10 @@ services: - "41028:41028" vufind: - build: ../catalog + build: ./catalog hostname: 'catalog-dev' volumes: - - ../catalog:/app + - ./catalog:/app depends_on: - mysql-sdr - solr-sdr-catalog @@ -79,16 +79,16 @@ services: ports: - "8983:8983" volumes: - - ../lss_solr_configs/lss-dev/core-x:/opt/solr/server/solr/core-x - - ../lss_solr_configs/lss-dev/core-y:/opt/solr/server/solr/core-y - - ../lss_solr_configs:/opt/lss_solr_configs - - ../lss_solr_configs/lib:/opt/solr/server/solr/lib + - ./lss_solr_configs/lss-dev/core-x:/opt/solr/server/solr/core-x + - ./lss_solr_configs/lss-dev/core-y:/opt/solr/server/solr/core-y + - ./lss_solr_configs:/opt/lss_solr_configs + - ./lss_solr_configs/lib:/opt/solr/server/solr/lib mysql-sdr: image: ghcr.io/hathitrust/db-image:latest volumes: - - ../slip/etc/sql/100_slip.sql:/docker-entrypoint-initdb.d/100_slip.sql - - ../catalog/docker/vufind.sql:/docker-entrypoint-initdb.d/101_vufind.sql + - ./slip/etc/sql/100_slip.sql:/docker-entrypoint-initdb.d/100_slip.sql + - ./catalog/docker/vufind.sql:/docker-entrypoint-initdb.d/101_vufind.sql ports: - "3307:3306" @@ -102,15 +102,15 @@ services: # when we do 'docker-compose up' slip: - build: ../slip + build: ./slip image: hathitrust-slip volumes: - - ../slip:/htapps/babel/slip + - ./slip:/htapps/babel/slip # this is where docs-j saves output - - ../slip/sample:/htapps/babel/logs/tmp - - "../imgsrv-sample-data/sdr1:/sdr1" - - "../imgsrv-sample-data/etc:/htapps/babel/etc" - - "../imgsrv-sample-data:/tmp/imgsrv-sample-data" + - ./slip/sample:/htapps/babel/logs/tmp + - "./sample-data/sdr1:/sdr1" + - "./sample-data/etc:/htapps/babel/etc" + - "./sample-data:/tmp/sample-data" environment: - SDRROOT=/htapps/babel - SDRDATAROOT=/sdr1 @@ -132,8 +132,8 @@ services: depends_on: - solr-sdr-catalog volumes: - - "../imgsrv-sample-data/metadata:/app/metadata" + - "./sample-data/metadata:/app/metadata" profiles: - indexing - # todo: ingest, bound to imgsrv-sample-data + # todo: ingest, bound to sample-data diff --git a/docker/apache-cgi/Dockerfile b/docker/apache-cgi/Dockerfile new file mode 100644 index 000000000..6fe68efe8 --- /dev/null +++ b/docker/apache-cgi/Dockerfile @@ -0,0 +1,21 @@ +FROM hathitrust-babel-base + +RUN apt-get -y install apache2 libapache2-mod-fcgid +RUN rm /etc/apache2/sites-available/* + +RUN /usr/sbin/a2dismod 'mpm_*' +RUN a2enmod headers +RUN a2enmod env +RUN a2enmod mpm_prefork +RUN a2enmod rewrite +RUN a2enmod proxy +RUN a2enmod proxy_fcgi +RUN a2enmod proxy_http +RUN a2enmod cgi + +COPY ./docker/000-default.conf /etc/apache2/sites-enabled +STOPSIGNAL SIGWINCH + +COPY docker/apache.sh / +RUN chmod +x /apache.sh +ENTRYPOINT ["/apache.sh"] diff --git a/docker/apache.sh b/docker/apache-cgi/apache.sh similarity index 100% rename from docker/apache.sh rename to docker/apache-cgi/apache.sh diff --git a/Dockerfile b/docker/apache/Dockerfile similarity index 100% rename from Dockerfile rename to docker/apache/Dockerfile diff --git a/docker/babel-base/Dockerfile b/docker/babel-base/Dockerfile new file mode 100644 index 000000000..b7866ebba --- /dev/null +++ b/docker/babel-base/Dockerfile @@ -0,0 +1,82 @@ +FROM debian:bookworm + +# # does not work bookworm - evaluate if it's needed +# RUN sed -i 's/main.*/main contrib non-free/' /etc/apt/sources.list + +RUN apt-get update && apt-get install -y \ + autoconf \ + bison \ + build-essential \ + cpanminus \ + curl \ + file \ + git \ + grokj2k-tools \ + imagemagick \ + libapache-session-perl \ + libconfig-tiny-perl \ + libdate-calc-perl \ + libdate-manip-perl \ + libdbd-mysql-perl \ + libdevel-cover-perl \ + libfcgi-perl \ + libfcgi-procmanager-perl \ + libimage-exiftool-perl \ + libimage-info-perl \ + libimage-size-perl \ + libio-string-perl \ + libipc-run-perl \ + libjson-xs-perl \ + liblist-moreutils-perl \ + libmailtools-perl \ + libmime-types-perl \ + libnet-dns-perl \ + libplack-perl \ + libtest-class-perl \ + libtry-tiny-perl \ + libxml-libxml-perl \ + libxml-libxslt-perl \ + netcat-traditional \ + netpbm \ + perl \ + procps \ + starman \ + unzip \ + uuid-dev \ + zip \ + zlib1g-dev + +RUN cpanm --notest \ + File::Pairtree \ + URI::Escape \ + CGI::PSGI \ + IP::Geolocation::MMDB \ + UUID + +WORKDIR /htapps/babel/geoip +ADD --chmod=644 https://github.com/maxmind/MaxMind-DB/blob/main/test-data/GeoIP2-Country-Test.mmdb?raw=true GeoIP2-Country.mmdb + +RUN ln -s /tmp /ram + +RUN mkdir -p /l/local/bin +RUN ln -s /usr/bin/unzip /l/local/bin/unzip +RUN ln -s /usr/bin/convert /l/local/bin/convert +RUN ln -s /usr/bin/plackup /l/local/bin/plackup +RUN /bin/bash -c 'for cmd in pamflip jpegtopnm tifftopnm bmptopnm pngtopam ppmmake pamcomp pnmscalefixed pamscale pnmrotate pnmpad pamtotiff pnmtotiff pnmtojpeg pamrgbatopng ppmtopgm pnmtopng; do ln -s /usr/bin/$cmd /l/local/bin; done' + +WORKDIR /htapps/babel/imgsrv + +RUN mkdir /htapps/babel/cache +RUN chmod 4777 /htapps/babel/cache + +RUN mkdir /htapps/babel/logs +RUN chmod 4777 /htapps/babel/logs + +RUN ln -s /htapps/babel /htapps/test.babel +RUN cd /htapps/babel + +COPY . /htapps/babel/imgsrv +RUN ln -s imgsrv/vendor/common-lib/lib ../mdp-lib +RUN ln -s imgsrv/web/common-web ../mdp-web + +CMD ["/htapps/babel/imgsrv/bin/startup_imgsrv"] diff --git a/setup.sh b/setup.sh index 47c461038..8c900a5a1 100755 --- a/setup.sh +++ b/setup.sh @@ -1,19 +1,36 @@ #!/bin/bash -echo "Checking out into $PWD - enter to continue, ctrl-C to abort" +cat < Date: Tue, 28 Mar 2023 13:58:45 -0400 Subject: [PATCH 14/28] use branches for pt, ssd, imgsrv --- setup.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.sh b/setup.sh index 8c900a5a1..7b4ec505d 100755 --- a/setup.sh +++ b/setup.sh @@ -21,12 +21,12 @@ fi echo $GIT_BASE exit -git clone --recurse-submodules -b DEV-663-geoip $GIT_BASE/imgsrv +git clone --recurse-submodules -b DEV-667-remove-docker-compose $GIT_BASE/imgsrv git clone --recurse-submodules $GIT_BASE/imgsrv-sample-data ./sample-data git clone --recurse-submodules $GIT_BASE/catalog git clone --recurse-submodules $GIT_BASE/common -git clone --recurse-submodules $GIT_BASE/pt -git clone --recurse-submodules $GIT_BASE/ssd +git clone --recurse-submodules -b DEV-663-docker $GIT_BASE/pt +git clone --recurse-submodules -b DEV-663-docker $GIT_BASE/ssd git clone --recurse-submodules $GIT_BASE/hathitrust_catalog_indexer git clone --recurse-submodules -b DEV-667-stage-item $GIT_BASE/ht-pairtree git clone --recurse-submodules -b DEV-661-docker $GIT_BASE/slip From 98d9d81f7727def1dff846cf815ef7108b384dc7 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Tue, 28 Mar 2023 15:11:37 -0400 Subject: [PATCH 15/28] fixup over-aggressive search & replace in setup.sh --- setup.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/setup.sh b/setup.sh index 7b4ec505d..dabbafbd3 100755 --- a/setup.sh +++ b/setup.sh @@ -5,7 +5,7 @@ Checking out into $PWD What should the git URL be? [1] HTTPS: https://github.com/hathitrust (default) - [2] SSH: $GIT_BASE + [2] SSH: git@github.com:hathitrust Enter 1, 2, or ctrl-C to abort. EOT @@ -15,12 +15,9 @@ read proto GIT_BASE="https://github.com/hathitrust" if [[ "$proto" == "2" ]]; then - GIT_BASE="$GIT_BASE" + GIT_BASE="git@github.com:hathitrust" fi -echo $GIT_BASE -exit - git clone --recurse-submodules -b DEV-667-remove-docker-compose $GIT_BASE/imgsrv git clone --recurse-submodules $GIT_BASE/imgsrv-sample-data ./sample-data git clone --recurse-submodules $GIT_BASE/catalog From 01877a9ff4234b52241972f3336eb93cf9ac35c4 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Tue, 28 Mar 2023 16:00:04 -0400 Subject: [PATCH 16/28] Fixes for getting everything under this directory --- .gitignore | 15 +++++++++++++++ README.md | 6 +++--- docker-compose.yml | 2 +- docker/{ => apache-cgi}/000-default.conf | 0 docker/apache-cgi/Dockerfile | 4 ++-- docker/apache/Dockerfile | 21 --------------------- stage-item/Gemfile | 2 +- stage-item/Gemfile.lock | 4 ++-- stage-item/stage_item.rb | 6 +++--- 9 files changed, 27 insertions(+), 33 deletions(-) rename docker/{ => apache-cgi}/000-default.conf (100%) delete mode 100644 docker/apache/Dockerfile diff --git a/.gitignore b/.gitignore index 8f1a82ca6..a98d75eab 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,18 @@ vendor .bundle stage-item/*.xml stage-item/*.zip + +# other repositories + +catalog/ +common/ +hathitrust_catalog_indexer/ +ht-pairtree/ +imgsrv-sample-data/ +imgsrv/ +lss_solr_configs/ +pt/ +sample-data/ +slip/ +ssd/ + diff --git a/README.md b/README.md index 3395cf785..3222a3555 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ There's a lot, because we're replicating running on the dev servers with In your workdir: ``` -docker-compose -f ./babel-local-dev/docker-compose.yml build +docker-compose build ``` ## Step 4: run `babel-local-dev`: @@ -37,7 +37,7 @@ docker-compose -f ./babel-local-dev/docker-compose.yml build In your workdir: ``` -docker-compose -f ./babel-local-dev/docker-compose.yml up +docker-compose up ``` In your browser: @@ -98,7 +98,7 @@ Then, install dependencies for the `stage-item` script and run it with the downloaded zip and METS: ```bash -cd babel-local-dev/stage-item +cd stage-item bundle config set --local path 'vendor/bundle' bundle install bundle exec ruby stage_item.rb uc2.ark:/13960/t4mk66f1d ark+=13960=t4mk66f1d.zip ark+=13960=t4mk66f1d.mets.xml diff --git a/docker-compose.yml b/docker-compose.yml index 42c1271e2..6df30aafe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,7 +40,7 @@ services: apache-cgi: build: ./docker/apache-cgi volumes: - - "./docker/000-default.conf:/etc/apache2/sites-enabled/000-default.conf" + - "./docker/apache-cgi/000-default.conf:/etc/apache2/sites-enabled/000-default.conf" - ./imgsrv:/htapps/babel/imgsrv - ./pt:/htapps/babel/pt - ./ssd:/htapps/babel/ssd diff --git a/docker/000-default.conf b/docker/apache-cgi/000-default.conf similarity index 100% rename from docker/000-default.conf rename to docker/apache-cgi/000-default.conf diff --git a/docker/apache-cgi/Dockerfile b/docker/apache-cgi/Dockerfile index 6fe68efe8..8b65b3929 100644 --- a/docker/apache-cgi/Dockerfile +++ b/docker/apache-cgi/Dockerfile @@ -13,9 +13,9 @@ RUN a2enmod proxy_fcgi RUN a2enmod proxy_http RUN a2enmod cgi -COPY ./docker/000-default.conf /etc/apache2/sites-enabled +COPY 000-default.conf /etc/apache2/sites-enabled STOPSIGNAL SIGWINCH -COPY docker/apache.sh / +COPY apache.sh / RUN chmod +x /apache.sh ENTRYPOINT ["/apache.sh"] diff --git a/docker/apache/Dockerfile b/docker/apache/Dockerfile deleted file mode 100644 index 89dc74eaf..000000000 --- a/docker/apache/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -FROM hathitrust-imgsrv - -RUN apt-get -y install apache2 libapache2-mod-fcgid -RUN rm /etc/apache2/sites-available/* - -RUN /usr/sbin/a2dismod 'mpm_*' -RUN a2enmod headers -RUN a2enmod env -RUN a2enmod mpm_prefork -RUN a2enmod rewrite -RUN a2enmod proxy -RUN a2enmod proxy_fcgi -RUN a2enmod proxy_http -RUN a2enmod cgi - -COPY ./docker/000-default.conf /etc/apache2/sites-enabled -STOPSIGNAL SIGWINCH - -COPY docker/apache.sh / -RUN chmod +x /apache.sh -ENTRYPOINT ["/apache.sh"] diff --git a/stage-item/Gemfile b/stage-item/Gemfile index d3f44df92..b26ef897f 100644 --- a/stage-item/Gemfile +++ b/stage-item/Gemfile @@ -9,7 +9,7 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } gem "marc", "~> 1.2" gem "faraday", "~> 2.7" gem "faraday-follow_redirects" -gem "ht-pairtree", git: "../../ht-pairtree" +gem "ht-pairtree", git: "../ht-pairtree" gem "mysql2" gem "sequel" diff --git a/stage-item/Gemfile.lock b/stage-item/Gemfile.lock index 4fa2a9417..5beaeb1d7 100644 --- a/stage-item/Gemfile.lock +++ b/stage-item/Gemfile.lock @@ -1,6 +1,6 @@ GIT - remote: ../../ht-pairtree - revision: 7021c21c07d7cc018f23b3dfd7f3f3954a109f57 + remote: ../ht-pairtree + revision: b03be8927efc93a3c8c28a300b4927b24972daa2 specs: ht-pairtree (0.1.0) rpairtree diff --git a/stage-item/stage_item.rb b/stage-item/stage_item.rb index 59bfbeab2..b99cd3dc5 100644 --- a/stage-item/stage_item.rb +++ b/stage-item/stage_item.rb @@ -9,10 +9,10 @@ require "tempfile" # The parent of babel-local-dev where all the HathiTrust repos are checked out -HTDEV_ROOT = ENV["HTDEV_ROOT"] || File.realpath(File.join(__dir__,"..","..")) +HTDEV_ROOT = ENV["HTDEV_ROOT"] || File.realpath(File.join(__dir__,"..")) -METADATA_ROOT = ENV["METADATA_ROOT"] || File.join(HTDEV_ROOT,"imgsrv-sample-data","metadata") -SDRDATAROOT = ENV["SDRDATAROOT"] || File.join(HTDEV_ROOT,"imgsrv-sample-data","sdr1") +METADATA_ROOT = ENV["METADATA_ROOT"] || File.join(HTDEV_ROOT,"sample-data","metadata") +SDRDATAROOT = ENV["SDRDATAROOT"] || File.join(HTDEV_ROOT,"sample-data","sdr1") CATALOG_BASE = ENV["CATALOG_BASE"] || "https://catalog.hathitrust.org" MYSQL_URL = ENV["MYSQL_URL"] || "mysql2://mdp-admin:mdp-admin@127.0.0.1:3307/ht" CATALOG_SOLR = ENV["CATALOG_SOLR"] || "http://localhost:9033/solr/catalog" From 6fe655e337f5d4399fb6a8ae3c4c116d01ae80d2 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 29 Mar 2023 14:07:32 -0400 Subject: [PATCH 17/28] Use released version of ht-pairtree --- setup.sh | 1 - stage-item/Gemfile | 2 +- stage-item/Gemfile.lock | 9 +++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/setup.sh b/setup.sh index dabbafbd3..b513bce14 100755 --- a/setup.sh +++ b/setup.sh @@ -25,7 +25,6 @@ git clone --recurse-submodules $GIT_BASE/common git clone --recurse-submodules -b DEV-663-docker $GIT_BASE/pt git clone --recurse-submodules -b DEV-663-docker $GIT_BASE/ssd git clone --recurse-submodules $GIT_BASE/hathitrust_catalog_indexer -git clone --recurse-submodules -b DEV-667-stage-item $GIT_BASE/ht-pairtree git clone --recurse-submodules -b DEV-661-docker $GIT_BASE/slip git clone --recurse-submodules -b DEV-661-docker $GIT_BASE/lss_solr_configs diff --git a/stage-item/Gemfile b/stage-item/Gemfile index b26ef897f..04cc7d17a 100644 --- a/stage-item/Gemfile +++ b/stage-item/Gemfile @@ -9,7 +9,7 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } gem "marc", "~> 1.2" gem "faraday", "~> 2.7" gem "faraday-follow_redirects" -gem "ht-pairtree", git: "../ht-pairtree" +gem "ht-pairtree", github: "hathitrust/ht-pairtree", tag: 'v0.1.0' gem "mysql2" gem "sequel" diff --git a/stage-item/Gemfile.lock b/stage-item/Gemfile.lock index 5beaeb1d7..68f602c2b 100644 --- a/stage-item/Gemfile.lock +++ b/stage-item/Gemfile.lock @@ -1,9 +1,10 @@ GIT - remote: ../ht-pairtree - revision: b03be8927efc93a3c8c28a300b4927b24972daa2 + remote: https://github.com/hathitrust/ht-pairtree + revision: a8620c464438c8de7ad9a492b1fd5bd7f0756079 + tag: v0.1.0 specs: ht-pairtree (0.1.0) - rpairtree + pairtree (~> 0.3) GEM remote: https://rubygems.org/ @@ -22,6 +23,7 @@ GEM unf method_source (1.0.0) mysql2 (0.5.5) + pairtree (0.3.0) pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) @@ -29,7 +31,6 @@ GEM byebug (~> 11.0) pry (>= 0.13, < 0.15) rexml (3.2.5) - rpairtree (0.2.0) ruby2_keywords (0.0.5) scrub_rb (1.0.1) sequel (5.66.0) From a4a8b710fcc0aa9fd8fdbffbfc5580c6056388da Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 29 Mar 2023 14:26:45 -0400 Subject: [PATCH 18/28] Remove branches from slip & imgsrv --- setup.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.sh b/setup.sh index b513bce14..ad9da9227 100755 --- a/setup.sh +++ b/setup.sh @@ -18,14 +18,14 @@ if [[ "$proto" == "2" ]]; then GIT_BASE="git@github.com:hathitrust" fi -git clone --recurse-submodules -b DEV-667-remove-docker-compose $GIT_BASE/imgsrv +git clone --recurse-submodules $GIT_BASE/imgsrv git clone --recurse-submodules $GIT_BASE/imgsrv-sample-data ./sample-data git clone --recurse-submodules $GIT_BASE/catalog git clone --recurse-submodules $GIT_BASE/common git clone --recurse-submodules -b DEV-663-docker $GIT_BASE/pt git clone --recurse-submodules -b DEV-663-docker $GIT_BASE/ssd git clone --recurse-submodules $GIT_BASE/hathitrust_catalog_indexer -git clone --recurse-submodules -b DEV-661-docker $GIT_BASE/slip +git clone --recurse-submodules $GIT_BASE/slip git clone --recurse-submodules -b DEV-661-docker $GIT_BASE/lss_solr_configs # Directories the web server needs to write to under /htapps/babel From 4d69341d7210e3c9dfe66166f417c52a576d758a Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 29 Mar 2023 14:39:32 -0400 Subject: [PATCH 19/28] Try to run solr as current user - ensures data dir is owned by current user - mount log dir outside We could try to use a Docker volume for this, but the problem is that it still wouldn't be owned by the solr user by default. If we were using a Dockerfile instead of mounting config directories in, we would have some other options. There might be other ways to work around this in the future, but this works for now. --- .gitignore | 4 +++- docker-compose.yml | 2 ++ logs/.keep | 0 logs/solr/.keep | 0 setup.sh | 10 +--------- 5 files changed, 6 insertions(+), 10 deletions(-) create mode 100644 logs/.keep create mode 100644 logs/solr/.keep diff --git a/.gitignore b/.gitignore index a98d75eab..11c996036 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ vendor .bundle +.env stage-item/*.xml stage-item/*.zip @@ -16,4 +17,5 @@ pt/ sample-data/ slip/ ssd/ - +logs/ +cache/ diff --git a/docker-compose.yml b/docker-compose.yml index 6df30aafe..16613f240 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -78,11 +78,13 @@ services: image: solr:6 ports: - "8983:8983" + user: ${CURRENT_USER} volumes: - ./lss_solr_configs/lss-dev/core-x:/opt/solr/server/solr/core-x - ./lss_solr_configs/lss-dev/core-y:/opt/solr/server/solr/core-y - ./lss_solr_configs:/opt/lss_solr_configs - ./lss_solr_configs/lib:/opt/solr/server/solr/lib + - ./logs/solr:/opt/solr/server/logs mysql-sdr: image: ghcr.io/hathitrust/db-image:latest diff --git a/logs/.keep b/logs/.keep new file mode 100644 index 000000000..e69de29bb diff --git a/logs/solr/.keep b/logs/solr/.keep new file mode 100644 index 000000000..e69de29bb diff --git a/setup.sh b/setup.sh index ad9da9227..556d5d684 100755 --- a/setup.sh +++ b/setup.sh @@ -28,15 +28,7 @@ git clone --recurse-submodules $GIT_BASE/hathitrust_catalog_indexer git clone --recurse-submodules $GIT_BASE/slip git clone --recurse-submodules -b DEV-661-docker $GIT_BASE/lss_solr_configs -# Directories the web server needs to write to under /htapps/babel -mkdir cache logs -chmod a+w cache logs - -# Directory solr needs to write to -chmod a+w lss_solr_configs/lss-dev/core-x/data - -# Not yet covered in the apache config although maybe it was before -# git clone $GIT_BASE/pt.git +echo "CURRENT_USER=$(id -u):$(id -g)" >> .env # Do we need these separately? # git clone $GIT_BASE/mdp-lib.git From 278c28abf84192c43f78b378559a848de926102d Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 29 Mar 2023 16:25:03 -0400 Subject: [PATCH 20/28] Run apache as current user Avoids issues with permissions w/ cache, logs, etc --- docker-compose.yml | 11 ++++++++++- docker/apache-cgi/000-default.conf | 4 ++++ docker/apache-cgi/Dockerfile | 20 ++++++++++---------- docker/apache-cgi/apache.sh | 16 ++++++++++++---- setup.sh | 2 ++ 5 files changed, 38 insertions(+), 15 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 16613f240..b46848dc8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,9 +17,12 @@ services: imgsrv: build: ./docker/babel-base + user: ${CURRENT_USER} image: hathitrust-babel-base volumes: - ./imgsrv:/htapps/babel/imgsrv + - ./logs:/htapps/babel/logs + - ./cache:/htapps/babel/cache - "./sample-data/sdr1:/sdr1" - "./sample-data/etc:/htapps/babel/etc" - "./sample-data/watermarks:/htapps/babel/watermarks" @@ -30,6 +33,7 @@ services: - HT_DEV=docker - REMOTE_ADDR=127.0.0.1 - HTTP_HOST=127.0.0.1 + - APACHE_LOG_DIR=/tmp depends_on: - mysql-sdr - solr-sdr-catalog @@ -39,9 +43,12 @@ services: apache-cgi: build: ./docker/apache-cgi + user: ${CURRENT_USER} volumes: - "./docker/apache-cgi/000-default.conf:/etc/apache2/sites-enabled/000-default.conf" - ./imgsrv:/htapps/babel/imgsrv + - ./logs:/htapps/babel/logs + - ./cache:/htapps/babel/cache - ./pt:/htapps/babel/pt - ./ssd:/htapps/babel/ssd - ./common:/htapps/babel/common @@ -106,10 +113,11 @@ services: slip: build: ./slip image: hathitrust-slip + user: ${CURRENT_USER} volumes: - ./slip:/htapps/babel/slip # this is where docs-j saves output - - ./slip/sample:/htapps/babel/logs/tmp + - ./logs:/htapps/babel/logs - "./sample-data/sdr1:/sdr1" - "./sample-data/etc:/htapps/babel/etc" - "./sample-data:/tmp/sample-data" @@ -126,6 +134,7 @@ services: traject: image: ghcr.io/hathitrust/catalog-indexer-unstable + user: ${CURRENT_USER} environment: - SOLR_URL=http://solr-sdr-catalog:9033/solr/catalog - redirect_file=/dev/null diff --git a/docker/apache-cgi/000-default.conf b/docker/apache-cgi/000-default.conf index a0b2c1288..318f32a94 100644 --- a/docker/apache-cgi/000-default.conf +++ b/docker/apache-cgi/000-default.conf @@ -1,4 +1,8 @@ +ServerName apache-cgi +ErrorLog /dev/stdout +CustomLog /dev/stdout combined Listen 41028 + ServerAdmin hathitrust@localhost DocumentRoot /htapps/babel diff --git a/docker/apache-cgi/Dockerfile b/docker/apache-cgi/Dockerfile index 8b65b3929..d9d5529c6 100644 --- a/docker/apache-cgi/Dockerfile +++ b/docker/apache-cgi/Dockerfile @@ -1,17 +1,17 @@ FROM hathitrust-babel-base RUN apt-get -y install apache2 libapache2-mod-fcgid -RUN rm /etc/apache2/sites-available/* -RUN /usr/sbin/a2dismod 'mpm_*' -RUN a2enmod headers -RUN a2enmod env -RUN a2enmod mpm_prefork -RUN a2enmod rewrite -RUN a2enmod proxy -RUN a2enmod proxy_fcgi -RUN a2enmod proxy_http -RUN a2enmod cgi +RUN a2dissite '*' +RUN a2disconf other-vhosts-access-log +RUN a2dismod 'mpm_*' +RUN a2enmod headers \ + mpm_prefork \ + rewrite \ + proxy \ + proxy_fcgi \ + proxy_http \ + cgi COPY 000-default.conf /etc/apache2/sites-enabled STOPSIGNAL SIGWINCH diff --git a/docker/apache-cgi/apache.sh b/docker/apache-cgi/apache.sh index 4fa3b70f3..a42d1f1ba 100755 --- a/docker/apache-cgi/apache.sh +++ b/docker/apache-cgi/apache.sh @@ -1,13 +1,21 @@ #! /bin/bash # Apache gets grumpy about PID files pre-existing -if [ ! -d /var/run/apache2 ] +if [ ! -d /tmp/apache2 ] then - mkdir -p /var/run/apache2 + mkdir -p /tmp/apache2/{run,lock,log} fi -rm -f /var/run/apache2/apache2*.pid +rm -f /tmp/apache2/apache2*.pid -source /etc/apache2/envvars +export APACHE_PID_FILE=/tmp/apache2/run/apache2.pid +export APACHE_RUN_DIR=/tmp/apache2/run +export APACHE_LOCK_DIR=/tmp/apache2/lock +export APACHE_LOG_DIR=/tmp/apache2/log + +# Won't be effective if we pass user from docker-compose; that's OK - hence +# shenanigans above +export APACHE_RUN_USER=www-data +export APACHE_RUN_GROUP=www-data exec apache2 -DFOREGROUND diff --git a/setup.sh b/setup.sh index 556d5d684..43611704a 100755 --- a/setup.sh +++ b/setup.sh @@ -29,6 +29,8 @@ git clone --recurse-submodules $GIT_BASE/slip git clone --recurse-submodules -b DEV-661-docker $GIT_BASE/lss_solr_configs echo "CURRENT_USER=$(id -u):$(id -g)" >> .env +echo "APACHE_RUN_USER=$(id -u)" >> .env +echo "APACHE_RUN_GROUP=$(id -g)" >> .env # Do we need these separately? # git clone $GIT_BASE/mdp-lib.git From b831a1c1756f2468e56ccc96478fd855ee060b82 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 30 Mar 2023 12:09:00 -0400 Subject: [PATCH 21/28] Indicate pt is available and ls is not --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3222a3555..ef4f8c472 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,9 @@ In your browser: * catalog solr: `http://localhost:9033` * full-text solr: `http://localhost:8983` -imgsrv: +PageZTurner & imgsrv: +* `http://localhost:8888/cgi/pt?id=test.pd_open` * `http://localhost:8888/cgi/imgsrv/cover?id=test.pd_open` * `http://localhost:8888/cgi/imgsrv/image?id=test.pd_open&seq=1` * `http://localhost:8888/cgi/imgsrv/html?id=test.pd_open&seq=1` @@ -63,10 +64,11 @@ mysql -h 127.0.0.1 -p 3307 -u mdp-admin -p Huzzah! Not yet configured: -* `http://localhost:8888/cgi/pt?id=test.pd_open` * `http://localhost:8888/cgi/mb` +* `http://localhost:8888/cgi/ls` * `http://localhost:8888/cgi/whoami` * `http://localhost:8888/cgi/ping` +* etc ## How this works (for now) From 871ef4b6e46364b54e58370426877e4834b918fb Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 30 Mar 2023 12:10:27 -0400 Subject: [PATCH 22/28] correct port in README --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ef4f8c472..8ca3fa7f5 100644 --- a/README.md +++ b/README.md @@ -46,13 +46,13 @@ In your browser: * catalog solr: `http://localhost:9033` * full-text solr: `http://localhost:8983` -PageZTurner & imgsrv: +PageTurner & imgsrv: -* `http://localhost:8888/cgi/pt?id=test.pd_open` -* `http://localhost:8888/cgi/imgsrv/cover?id=test.pd_open` -* `http://localhost:8888/cgi/imgsrv/image?id=test.pd_open&seq=1` -* `http://localhost:8888/cgi/imgsrv/html?id=test.pd_open&seq=1` -* `http://localhost:8888/cgi/imgsrv/download/pdf?id=test.pd_open&seq=1&attachment=0` +* `http://localhost:8080/cgi/pt?id=test.pd_open` +* `http://localhost:8080/cgi/imgsrv/cover?id=test.pd_open` +* `http://localhost:8080/cgi/imgsrv/image?id=test.pd_open&seq=1` +* `http://localhost:8080/cgi/imgsrv/html?id=test.pd_open&seq=1` +* `http://localhost:8080/cgi/imgsrv/download/pdf?id=test.pd_open&seq=1&attachment=0` mysql is exposed at 127.0.0.1:3307. The default username & password with write access is `mdp-admin` / `mdp-admin` (needless to say, do not use this image in @@ -64,10 +64,10 @@ mysql -h 127.0.0.1 -p 3307 -u mdp-admin -p Huzzah! Not yet configured: -* `http://localhost:8888/cgi/mb` -* `http://localhost:8888/cgi/ls` -* `http://localhost:8888/cgi/whoami` -* `http://localhost:8888/cgi/ping` +* `http://localhost:8080/cgi/mb` +* `http://localhost:8080/cgi/ls` +* `http://localhost:8080/cgi/whoami` +* `http://localhost:8080/cgi/ping` * etc ## How this works (for now) From 580d9b6da0e93f253799a825df72d38e9ecb8c73 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 30 Mar 2023 12:11:49 -0400 Subject: [PATCH 23/28] ensure cache directory is present the web server now runs as the user running setup.sh, so cache needs to be writable by that user - it was being created & owned by root --- cache/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 cache/.keep diff --git a/cache/.keep b/cache/.keep new file mode 100644 index 000000000..e69de29bb From 9d0d5b4564a3ddc90d48fb8cc53414e2b80e5e20 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 30 Mar 2023 12:28:26 -0400 Subject: [PATCH 24/28] Update instructions for sample item * Fix mount for slip output --- README.md | 4 ++++ docker-compose.yml | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ca3fa7f5..16c7fcb55 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,10 @@ bundle install bundle exec ruby stage_item.rb uc2.ark:/13960/t4mk66f1d ark+=13960=t4mk66f1d.zip ark+=13960=t4mk66f1d.mets.xml ``` +To enhance later: The zip and METS must be named as they are in the actual +repository -- if you name them "foo.zip" or "foo.xml" they will not be renamed, +and full-text indexing and PageTurner will not be able to find the item. + ## TODO - [ ] adding `pt` requires filling out more of the `ht_web` tables (namely `mb_*`) diff --git a/docker-compose.yml b/docker-compose.yml index b46848dc8..e275b55a0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -116,8 +116,9 @@ services: user: ${CURRENT_USER} volumes: - ./slip:/htapps/babel/slip - # this is where docs-j saves output - ./logs:/htapps/babel/logs + # this is where docs-j saves output + - ./slip/sample:/htapps/babel/logs/tmp - "./sample-data/sdr1:/sdr1" - "./sample-data/etc:/htapps/babel/etc" - "./sample-data:/tmp/sample-data" From 5bf43443129c490acc7fe72ac6bddeb892397dbd Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 30 Mar 2023 13:03:34 -0400 Subject: [PATCH 25/28] link parent dir env file --- stage-item/.env | 1 + 1 file changed, 1 insertion(+) create mode 120000 stage-item/.env diff --git a/stage-item/.env b/stage-item/.env new file mode 120000 index 000000000..4a82335f5 --- /dev/null +++ b/stage-item/.env @@ -0,0 +1 @@ +../.env \ No newline at end of file From 718cb4261a91c215c140c6f102c4353537578370 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 30 Mar 2023 13:07:16 -0400 Subject: [PATCH 26/28] update TODO --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 16c7fcb55..8236deef4 100644 --- a/README.md +++ b/README.md @@ -106,11 +106,12 @@ bundle install bundle exec ruby stage_item.rb uc2.ark:/13960/t4mk66f1d ark+=13960=t4mk66f1d.zip ark+=13960=t4mk66f1d.mets.xml ``` -To enhance later: The zip and METS must be named as they are in the actual +Note that the zip and METS must be named as they are in the actual repository -- if you name them "foo.zip" or "foo.xml" they will not be renamed, and full-text indexing and PageTurner will not be able to find the item. ## TODO -- [ ] adding `pt` requires filling out more of the `ht_web` tables (namely `mb_*`) +- [ ] add `mb` and `ls` - [ ] easy mechanism to generate placeholder volumes in `imgsrv-sample-data` that correspond to the records in the catalog +- [ ] make it easier to fetch real volumes From 5719d799120e5178252196a37e7fb0eabc6e3eb6 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 30 Mar 2023 15:07:50 -0400 Subject: [PATCH 27/28] Add additional TODOs to README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 8236deef4..362176a98 100644 --- a/README.md +++ b/README.md @@ -113,5 +113,7 @@ and full-text indexing and PageTurner will not be able to find the item. ## TODO - [ ] add `mb` and `ls` +- [ ] ensure database user can write to relevant tables +- [ ] link to documentation for important tasks - e.g. running apps under debugging, updating css/js, etc - [ ] easy mechanism to generate placeholder volumes in `imgsrv-sample-data` that correspond to the records in the catalog - [ ] make it easier to fetch real volumes From 3c3e05bb8dbc0a6e7449ebc957eb0b5e086d65d8 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Fri, 31 Mar 2023 11:02:14 -0400 Subject: [PATCH 28/28] continue taking stuff off of branches - build indexer rather than using image (which may not exist) - take pt & ssd off of branches --- docker-compose.yml | 2 +- setup.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index e275b55a0..3ceeb1753 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -134,7 +134,7 @@ services: - indexing traject: - image: ghcr.io/hathitrust/catalog-indexer-unstable + build: ./hathitrust_catalog_indexer user: ${CURRENT_USER} environment: - SOLR_URL=http://solr-sdr-catalog:9033/solr/catalog diff --git a/setup.sh b/setup.sh index 43611704a..c398a5b03 100755 --- a/setup.sh +++ b/setup.sh @@ -22,8 +22,8 @@ git clone --recurse-submodules $GIT_BASE/imgsrv git clone --recurse-submodules $GIT_BASE/imgsrv-sample-data ./sample-data git clone --recurse-submodules $GIT_BASE/catalog git clone --recurse-submodules $GIT_BASE/common -git clone --recurse-submodules -b DEV-663-docker $GIT_BASE/pt -git clone --recurse-submodules -b DEV-663-docker $GIT_BASE/ssd +git clone --recurse-submodules $GIT_BASE/pt +git clone --recurse-submodules $GIT_BASE/ssd git clone --recurse-submodules $GIT_BASE/hathitrust_catalog_indexer git clone --recurse-submodules $GIT_BASE/slip git clone --recurse-submodules -b DEV-661-docker $GIT_BASE/lss_solr_configs