Skip to content

feat: cache eviction strategy #76

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions apps/proxy/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ AWS_ENDPOINT_URL_S3=http://host.docker.internal:54321/storage/v1/s3
AWS_S3_BUCKET=s3fs
AWS_SECRET_ACCESS_KEY=850181e4652dd023b7a98c58ae0d2d34bd487ee0cc3254aed6eda37307425907
AWS_REGION=local
# Cache disk usage threshold in percentage of the total disk space
CACHE_DISK_USAGE_THRESHOLD=90
CACHE_PATH=/var/lib/postgres-new/cache
# Cache schedule interval in hours
CACHE_SCHEDULE_INTERVAL=1
CACHE_TIMESTAMP_FILE=/var/lib/postgres-new/delete_cache_last_run
# Cache time to live in hours
CACHE_TTL=24
S3FS_MOUNT=/mnt/s3
SUPABASE_SERVICE_ROLE_KEY="<service-role-key>"
SUPABASE_URL="<supabase-url>"
Expand Down
9 changes: 9 additions & 0 deletions apps/proxy/fly.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ primary_region = 'iad'
dockerfile = "Dockerfile"
ignorefile = ".dockerignore"

[env]
CACHE_DISK_USAGE_THRESHOLD = "90"
CACHE_SCHEDULE_INTERVAL = "1"
CACHE_TIMESTAMP_FILE = "/var/lib/postgres-new/delete_cache_last_run"
CACHE_TTL = "24"
CACHE_PATH = "/var/lib/postgres-new/cache"
S3FS_MOUNT = "/mnt/s3"
WILDCARD_DOMAIN = "db.postgres.new"

[[services]]
internal_port = 5432
protocol = "tcp"
Expand Down
3 changes: 2 additions & 1 deletion apps/proxy/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"@supabase/supabase-js": "^2.45.1",
"find-up": "^7.0.0",
"pg-gateway": "0.3.0-alpha.6",
"tar": "^7.4.3"
"tar": "^7.4.3",
"zod": "^3.23.8"
},
"devDependencies": {
"@postgres-new/supabase": "*",
Expand Down
110 changes: 110 additions & 0 deletions apps/proxy/src/delete-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import * as fs from 'node:fs/promises'
import * as path from 'node:path'
import { exec } from 'node:child_process'
import { promisify } from 'node:util'
import { env } from './env.js'
const execAsync = promisify(exec)

async function deleteOldFolders() {
const now = Date.now()
const ttlInMillis = env.CACHE_TTL * 60 * 60 * 1000

try {
const folders = await fs.readdir(env.CACHE_PATH)
for (const folder of folders) {
const folderPath = path.join(env.CACHE_PATH, folder)
const stats = await fs.stat(folderPath)

if (stats.isDirectory() && now - stats.mtimeMs > ttlInMillis) {
await fs.rm(folderPath, { recursive: true, force: true })
console.log(`Deleted folder: ${folderPath}`)
}
}
} catch (err) {
console.error('Failed to delete old folders:', err)
}
}

async function scriptAlreadyRan() {
try {
const lastRun = parseInt(await fs.readFile(env.CACHE_TIMESTAMP_FILE, 'utf8'))
const now = Math.floor(Date.now() / 1000)
const diff = now - lastRun
return diff < env.CACHE_SCHEDULE_INTERVAL * 60 * 60 * 1000
} catch (err) {
// File does not exist
if (err instanceof Error && 'code' in err && err.code === 'ENOENT') {
return false
}
throw err
}
}

async function updateTimestampFile() {
const now = Math.floor(Date.now() / 1000).toString()
await fs.writeFile(env.CACHE_TIMESTAMP_FILE, now)
}

/**
* Get the disk usage of the root directory
*/
async function getDiskUsage() {
// awk 'NR==2 {print $5}' prints the 5th column of the df command which contains the percentage of the total disk space used
// sed 's/%//' removes the % from the output
const command = `df / | awk 'NR==2 {print $5}' | sed 's/%//'`
const { stdout } = await execAsync(command)
return parseInt(stdout.trim(), 10)
}

async function getFoldersByModificationTime() {
const folders = await fs.readdir(env.CACHE_PATH, { withFileTypes: true })
const folderStats = await Promise.all(
folders
.filter((dirent) => dirent.isDirectory())
.map(async (dirent) => {
const fullPath = path.join(env.CACHE_PATH, dirent.name)
const stats = await fs.stat(fullPath)
return { path: fullPath, mtime: stats.mtime.getTime() }
})
)
return folderStats.sort((a, b) => a.mtime - b.mtime).map((folder) => folder.path)
}

export async function deleteCache() {
if (await scriptAlreadyRan()) {
console.log(`Script already ran in the last ${env.CACHE_SCHEDULE_INTERVAL} hours, skipping.`)
return
}

await updateTimestampFile()

// Always delete old folders based on TTL
await deleteOldFolders()

let diskUsage = await getDiskUsage()

// If disk usage exceeds the threshold, delete additional old folders
if (diskUsage >= env.CACHE_DISK_USAGE_THRESHOLD) {
console.log(
`Disk usage is at ${diskUsage}%, which is above the threshold of ${env.CACHE_DISK_USAGE_THRESHOLD}%.`
)

const folders = await getFoldersByModificationTime()

// Loop through the folders and delete them one by one until disk usage is below the threshold
for (const folder of folders) {
console.log(`Deleting folder: ${folder}`)
await fs.rm(folder, { recursive: true, force: true })

diskUsage = await getDiskUsage()
if (diskUsage < env.CACHE_DISK_USAGE_THRESHOLD) {
console.log(`Disk usage is now at ${diskUsage}%, which is below the threshold.`)
break
}
}
} else {
console.log(
`Disk usage is at ${diskUsage}%, which is below the threshold of ${env.CACHE_DISK_USAGE_THRESHOLD}%.`
)
}
}
21 changes: 21 additions & 0 deletions apps/proxy/src/env.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { z } from 'zod'

export const env = z
.object({
AWS_ACCESS_KEY_ID: z.string(),
AWS_ENDPOINT_URL_S3: z.string(),
AWS_S3_BUCKET: z.string(),
AWS_SECRET_ACCESS_KEY: z.string(),
AWS_REGION: z.string(),
CACHE_DISK_USAGE_THRESHOLD: z.string().transform((val) => parseInt(val, 10)),
CACHE_PATH: z.string(),
CACHE_SCHEDULE_INTERVAL: z.string().transform((val) => parseInt(val, 10)),
CACHE_TIMESTAMP_FILE: z.string(),
CACHE_TTL: z.string().transform((val) => parseInt(val, 10)),
DATA_MOUNT: z.string(),
S3FS_MOUNT: z.string(),
SUPABASE_SERVICE_ROLE_KEY: z.string(),
SUPABASE_URL: z.string(),
WILDCARD_DOMAIN: z.string(),
})
.parse(process.env)
24 changes: 15 additions & 9 deletions apps/proxy/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@ import { PostgresConnection, ScramSha256Data, TlsOptions } from 'pg-gateway'
import { createClient } from '@supabase/supabase-js'
import type { Database } from '@postgres-new/supabase'
import { findUp } from 'find-up'
import { env } from './env.js'
import { deleteCache } from './delete-cache.js'
import path from 'node:path'

const supabaseUrl = env.SUPABASE_URL
const supabaseKey = env.SUPABASE_SERVICE_ROLE_KEY
const s3fsMount = env.S3FS_MOUNT
const wildcardDomain = env.WILDCARD_DOMAIN

const supabaseUrl = process.env.SUPABASE_URL ?? 'http://127.0.0.1:54321'
const supabaseKey = process.env.SUPABASE_SERVICE_ROLE_KEY ?? ''
const dataMount = process.env.DATA_MOUNT ?? './data'
const s3fsMount = process.env.S3FS_MOUNT ?? './s3'
const wildcardDomain = process.env.WILDCARD_DOMAIN ?? 'db.example.com'
const packageLockJsonPath = await findUp('package-lock.json')
if (!packageLockJsonPath) {
throw new Error('package-lock.json not found')
Expand All @@ -31,10 +34,9 @@ const pgliteVersion = `(PGlite ${packageLockJson.packages['node_modules/@electri

const dumpDir = `${s3fsMount}/dbs`
const tlsDir = `${s3fsMount}/tls`
const dbDir = `${dataMount}/dbs`

await mkdir(dumpDir, { recursive: true })
await mkdir(dbDir, { recursive: true })
await mkdir(env.CACHE_PATH, { recursive: true })
await mkdir(tlsDir, { recursive: true })

const tls: TlsOptions = {
Expand Down Expand Up @@ -77,6 +79,10 @@ const supabase = createClient<Database>(supabaseUrl, supabaseKey)
const server = net.createServer((socket) => {
let db: PGliteInterface

deleteCache().catch((err) => {
console.error(`Error deleting cache: ${err}`)
})

const connection = new PostgresConnection(socket, {
serverVersion: async () => {
const {
Expand Down Expand Up @@ -161,12 +167,12 @@ const server = net.createServer((socket) => {

console.log(`Serving database '${databaseId}'`)

const dbPath = `${dbDir}/${databaseId}`
const dbPath = path.join(env.CACHE_PATH, databaseId)

if (!(await fileExists(dbPath))) {
console.log(`Database '${databaseId}' is not cached, downloading...`)

const dumpPath = `${dumpDir}/${databaseId}.tar.gz`
const dumpPath = path.join(dumpDir, `${databaseId}.tar.gz`)

if (!(await fileExists(dumpPath))) {
connection.sendError({
Expand Down
3 changes: 2 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.