From 10de798ec7041840c3ab51d72cb7332fd457e0e5 Mon Sep 17 00:00:00 2001 From: msukkari Date: Wed, 18 Sep 2024 17:27:17 -0700 Subject: [PATCH 1/7] remove page view provider --- src/app/layout.tsx | 5 ----- src/app/posthogPageView.tsx | 28 ---------------------------- 2 files changed, 33 deletions(-) delete mode 100644 src/app/posthogPageView.tsx diff --git a/src/app/layout.tsx b/src/app/layout.tsx index d698c6ec..c807ad82 100644 --- a/src/app/layout.tsx +++ b/src/app/layout.tsx @@ -9,10 +9,6 @@ import dynamic from "next/dynamic"; const inter = Inter({ subsets: ["latin"] }); -const PostHogPageView = dynamic(() => import('./posthogPageView'), { - ssr: false, - }) - export const metadata: Metadata = { title: "Sourcebot", description: "Sourcebot", @@ -31,7 +27,6 @@ export default function RootLayout({ > - { - // Track pageviews - if (pathname && posthog) { - let url = window.origin + pathname - if (searchParams.toString()) { - url = url + `?${searchParams.toString()}` - } - posthog.capture( - '$pageview', - { - '$current_url': url, - } - ) - } - }, [pathname, searchParams, posthog]) - - return null -} \ No newline at end of file From 7ff02ebdd20d7d4826ce852494fac9e4a7ff1ff7 Mon Sep 17 00:00:00 2001 From: msukkari Date: Wed, 18 Sep 2024 19:29:14 -0700 Subject: [PATCH 2/7] sanatize current_url and ip properties in all posthog events --- src/app/posthogProvider.tsx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/app/posthogProvider.tsx b/src/app/posthogProvider.tsx index 22ffc745..cc741890 100644 --- a/src/app/posthogProvider.tsx +++ b/src/app/posthogProvider.tsx @@ -9,7 +9,19 @@ if (typeof window !== 'undefined') { api_host: "/ingest", ui_host: NEXT_PUBLIC_POSTHOG_UI_HOST, person_profiles: 'identified_only', - capture_pageview: false, // Disable automatic pageview capture, as we capture manually + capture_pageview: false, // Disable automatic pageview capture + autocapture: false, // Disable automatic event capture + sanitize_properties: (properties: Record, _event: string) => { + if (properties['$current_url']) { + properties['$current_url'] = null; + } + if (properties['$ip']) { + properties['$ip'] = "null"; + } + + + return properties; + } }); } else { console.log("PostHog telemetry disabled"); From d08a50f06f269dfc05cc9eb3bac521fc9c12cb7c Mon Sep 17 00:00:00 2001 From: msukkari Date: Wed, 18 Sep 2024 19:38:11 -0700 Subject: [PATCH 3/7] add posthog usage info in README --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4b644a67..b8c34cf3 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,9 @@ The zoekt binaries and web dependencies are placed into `bin` and `node_modules` ## Disabling Telemetry -By default, Sourcebot collects anonymous usage data using [PostHog](https://posthog.com/). You can disable this by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `1` in the docker run command: +By default, Sourcebot collects anonymous usage data using [PostHog](https://posthog.com/). We [sanatize all events](https://github.com/TaqlaAI/sourcebot/blob/main/src/app/posthogProvider.tsx) to ensure that no information about your code base leaves your machine. We don't include any information about your codebase in [any of the events we send](https://github.com/search?q=repo%3ATaqlaAI%2Fsourcebot++captureEvent&type=code). We send these events purely to track how many people our using Sourcebot and to ensure its health through query metadata (ex. search time, crashes, etc). + +You can disable this by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `1` in the docker run command: ```sh docker run -e SOURCEBOT_TELEMETRY_DISABLED=1 ...stuff... ghcr.io/taqlaai/sourcebot:main ``` From ba9d5a6b7edf61252f758c3ef3745feab642df23 Mon Sep 17 00:00:00 2001 From: msukkari Date: Wed, 18 Sep 2024 20:53:46 -0700 Subject: [PATCH 4/7] remove unneccessary ip property removal since we disable ip collection on posthog side --- src/app/posthogProvider.tsx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/app/posthogProvider.tsx b/src/app/posthogProvider.tsx index cc741890..6f9bdea4 100644 --- a/src/app/posthogProvider.tsx +++ b/src/app/posthogProvider.tsx @@ -15,13 +15,9 @@ if (typeof window !== 'undefined') { if (properties['$current_url']) { properties['$current_url'] = null; } - if (properties['$ip']) { - properties['$ip'] = "null"; - } - return properties; - } + } }); } else { console.log("PostHog telemetry disabled"); From 43d35b8be71cb0d7020501eca8ad8182c9352eea Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 19 Sep 2024 13:14:33 -0700 Subject: [PATCH 5/7] add back ip sanitization on client side (we disabled it on server side but may as well also clear it on client) and revise README on telemetry --- README.md | 10 +++++----- src/app/posthogProvider.tsx | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b8c34cf3..c3a7ef99 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ The zoekt binaries and web dependencies are placed into `bin` and `node_modules` TODO -
+
**** BitBucket TODO
@@ -185,13 +185,13 @@ The zoekt binaries and web dependencies are placed into `bin` and `node_modules` 6. Go to `http://localhost:3000` - once a index has been created, you can start searching. -## Disabling Telemetry +## Telemetry -By default, Sourcebot collects anonymous usage data using [PostHog](https://posthog.com/). We [sanatize all events](https://github.com/TaqlaAI/sourcebot/blob/main/src/app/posthogProvider.tsx) to ensure that no information about your code base leaves your machine. We don't include any information about your codebase in [any of the events we send](https://github.com/search?q=repo%3ATaqlaAI%2Fsourcebot++captureEvent&type=code). We send these events purely to track how many people our using Sourcebot and to ensure its health through query metadata (ex. search time, crashes, etc). +By default, Sourcebot collects anonymized usage data through [PostHog](https://posthog.com/) to help us improve the performance and reliability of our tool. We do not collect or transmit [any information related to your codebase](https://github.com/search?q=repo:TaqlaAI/sourcebot++captureEvent&type=code). All events are [sanitized](https://github.com/TaqlaAI/sourcebot/blob/main/src/app/posthogProvider.tsx) to ensure that no sensitive or identifying details leave your machine. The data we collect includes general usage statistics and metadata such as query performance (e.g., search duration, error rates) to monitor the application's health and functionality. This information helps us better understand how Sourcebot is used and where improvements can be made :) -You can disable this by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `1` in the docker run command: +If you'd like to disable all telemetry, you can do so by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `1` in the docker run command: ```sh -docker run -e SOURCEBOT_TELEMETRY_DISABLED=1 ...stuff... ghcr.io/taqlaai/sourcebot:main +docker run -e SOURCEBOT_TELEMETRY_DISABLED=1 /* additional args */ ghcr.io/taqlaai/sourcebot:main ``` Or if you are building locally, add the following to your [.env](./.env) file: diff --git a/src/app/posthogProvider.tsx b/src/app/posthogProvider.tsx index 6f9bdea4..8c146ae2 100644 --- a/src/app/posthogProvider.tsx +++ b/src/app/posthogProvider.tsx @@ -12,9 +12,13 @@ if (typeof window !== 'undefined') { capture_pageview: false, // Disable automatic pageview capture autocapture: false, // Disable automatic event capture sanitize_properties: (properties: Record, _event: string) => { + // https://posthog.com/docs/libraries/js#config if (properties['$current_url']) { properties['$current_url'] = null; } + if (properties['$ip']) { + properties['$ip'] = null; + } return properties; } From 1b3e7308fbf29039b3fe1a50a8752923bab8e189 Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 19 Sep 2024 13:16:24 -0700 Subject: [PATCH 6/7] add typo with asterisks in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c3a7ef99..cef6bb9f 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ The zoekt binaries and web dependencies are placed into `bin` and `node_modules` TODO
-
**** +
BitBucket TODO
From 4fcd1f2549eaa10ee53bcfbdb83f3d1d8e8171b9 Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 19 Sep 2024 13:17:27 -0700 Subject: [PATCH 7/7] small grammar fix in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cef6bb9f..39eadf40 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ Sourcebot also supports indexing GitLab & BitBucket. Checkout the [index.json](. zoekt will now index your repositories (at `HEAD`). By default, it will re-index existing repositories every hour, and discover new repositories every 24 hours. -4. Go to `http://localhost:3000` - once a index has been created, you can start searching. +4. Go to `http://localhost:3000` - once an index has been created, you can start searching. ## Building Sourcebot @@ -182,7 +182,7 @@ The zoekt binaries and web dependencies are placed into `bin` and `node_modules` A `.sourcebot` directory will be created and zoekt will begin to index the repositories found given `config.json`. -6. Go to `http://localhost:3000` - once a index has been created, you can start searching. +6. Go to `http://localhost:3000` - once an index has been created, you can start searching. ## Telemetry