diff --git a/delphi/latents/constructors.py b/delphi/latents/constructors.py index 95aee37c..fa857bef 100644 --- a/delphi/latents/constructors.py +++ b/delphi/latents/constructors.py @@ -47,7 +47,6 @@ def prepare_non_activating_examples( NonActivatingExample( tokens=toks, activations=acts, - normalized_activations=None, distance=distance, str_tokens=tokenizer.batch_decode(toks), ) @@ -281,7 +280,6 @@ def constructor( ActivatingExample( tokens=toks, activations=acts, - normalized_activations=None, ) for toks, acts in zip(token_windows, act_windows) ] diff --git a/delphi/latents/latents.py b/delphi/latents/latents.py index 5e5611c2..91a4b176 100644 --- a/delphi/latents/latents.py +++ b/delphi/latents/latents.py @@ -75,12 +75,6 @@ class Example: activations: Float[Tensor, "ctx_len"] """Activation values for the input sequence.""" - str_tokens: list[str] | None = None - """Tokenized input sequence as strings.""" - - normalized_activations: Optional[Float[Tensor, "ctx_len"]] = None - """Activations quantized to integers in [0, 10].""" - @property def max_activation(self) -> float: """ @@ -98,6 +92,12 @@ class ActivatingExample(Example): An example of a latent that activates a model. """ + normalized_activations: Optional[Float[Tensor, "ctx_len"]] = None + """Activations quantized to integers in [0, 10].""" + + str_tokens: Optional[list[str]] = None + """Tokenized input sequence as strings.""" + quantile: int = 0 """The quantile of the activating example.""" @@ -108,6 +108,9 @@ class NonActivatingExample(Example): An example of a latent that does not activate a model. """ + str_tokens: list[str] + """Tokenized input sequence as strings.""" + distance: float = 0.0 """ The distance from the neighbouring latent. @@ -125,7 +128,7 @@ class LatentRecord: """The latent associated with the record.""" examples: list[ActivatingExample] = field(default_factory=list) - """Example sequences where the latent activations, assumed to be sorted in + """Example sequences where the latent activates, assumed to be sorted in descending order by max activation.""" not_active: list[NonActivatingExample] = field(default_factory=list) diff --git a/delphi/scorers/embedding/embedding.py b/delphi/scorers/embedding/embedding.py index 2de89874..ed911866 100644 --- a/delphi/scorers/embedding/embedding.py +++ b/delphi/scorers/embedding/embedding.py @@ -1,9 +1,9 @@ import asyncio import random from dataclasses import dataclass -from typing import NamedTuple +from typing import NamedTuple, Sequence -from transformers import PreTrainedTokenizer +from delphi.latents.latents import ActivatingExample, NonActivatingExample from ...latents import Example, LatentRecord from ..scorer import Scorer, ScorerResult @@ -33,56 +33,53 @@ class EmbeddingScorer(Scorer): def __init__( self, model, - tokenizer: PreTrainedTokenizer | None = None, verbose: bool = False, **generation_kwargs, ): self.model = model self.verbose = verbose - self.tokenizer = tokenizer self.generation_kwargs = generation_kwargs - async def __call__( # type: ignore - self, # type: ignore - record: LatentRecord, # type: ignore - ) -> ScorerResult: # type: ignore + async def __call__( + self, + record: LatentRecord, + ) -> ScorerResult: samples = self._prepare(record) random.shuffle(samples) results = self._query( record.explanation, - samples, # type: ignore + samples, ) return ScorerResult(record=record, score=results) - def call_sync(self, record: LatentRecord) -> list[EmbeddingOutput]: - return asyncio.run(self.__call__(record)) # type: ignore + def call_sync(self, record: LatentRecord) -> ScorerResult: + return asyncio.run(self.__call__(record)) - def _prepare(self, record: LatentRecord) -> list[list[Sample]]: + def _prepare(self, record: LatentRecord) -> list[Sample]: """ Prepare and shuffle a list of samples for classification. """ + samples = [] + + assert ( + record.extra_examples is not None + ), "Extra (non-activating) examples need to be provided" - defaults = { - "tokenizer": self.tokenizer, - } - samples = examples_to_samples( - record.extra_examples, # type: ignore - distance=-1, - **defaults, # type: ignore + samples.extend( + examples_to_samples( + record.extra_examples, + ) ) - for i, examples in enumerate(record.test): - samples.extend( - examples_to_samples( - examples, # type: ignore - distance=i + 1, - **defaults, # type: ignore - ) + samples.extend( + examples_to_samples( + record.test, ) + ) - return samples # type: ignore + return samples def _query(self, explanation: str, samples: list[Sample]) -> list[EmbeddingOutput]: explanation_string = ( @@ -93,38 +90,39 @@ def _query(self, explanation: str, samples: list[Sample]) -> list[EmbeddingOutpu query_embeding = self.model.encode(explanation_prompt) samples_text = [sample.text for sample in samples] - # # Temporary batching - # sample_embedings = [] - # for i in range(0, len(samples_text), 10): - # sample_embedings.extend(self.model.encode(samples_text[i:i+10])) sample_embedings = self.model.encode(samples_text) similarity = self.model.similarity(query_embeding, sample_embedings)[0] results = [] for i in range(len(samples)): - # print(i) samples[i].data.similarity = similarity[i].item() results.append(samples[i].data) return results def examples_to_samples( - examples: list[Example], - tokenizer: PreTrainedTokenizer, - **sample_kwargs, + examples: Sequence[Example], ) -> list[Sample]: samples = [] for example in examples: - if tokenizer is not None: - text = "".join(tokenizer.batch_decode(example.tokens)) - else: - text = "".join(example.tokens) + assert isinstance(example, ActivatingExample) or isinstance( + example, NonActivatingExample + ) + assert example.str_tokens is not None + text = "".join(str(token) for token in example.str_tokens) activations = example.activations.tolist() samples.append( Sample( text=text, activations=activations, - data=EmbeddingOutput(text=text, **sample_kwargs), + data=EmbeddingOutput( + text=text, + distance=( + example.quantile + if isinstance(example, ActivatingExample) + else example.distance + ), + ), ) ) diff --git a/delphi/scorers/surprisal/surprisal.py b/delphi/scorers/surprisal/surprisal.py index ee92b1c1..7f42be04 100644 --- a/delphi/scorers/surprisal/surprisal.py +++ b/delphi/scorers/surprisal/surprisal.py @@ -1,15 +1,17 @@ import random from dataclasses import dataclass -from typing import NamedTuple +from typing import NamedTuple, Sequence import torch from simple_parsing import field from torch.nn.functional import cross_entropy -from transformers import PreTrainedTokenizer -from delphi.utils import assert_type - -from ...latents import ActivatingExample, Example, LatentRecord +from ...latents import ( + ActivatingExample, + Example, + LatentRecord, + NonActivatingExample, +) from ..scorer import Scorer, ScorerResult from .prompts import BASEPROMPT as base_prompt @@ -44,21 +46,19 @@ class SurprisalScorer(Scorer): def __init__( self, model, - tokenizer, verbose: bool, batch_size: int, **generation_kwargs, ): self.model = model self.verbose = verbose - self.tokenizer = tokenizer self.batch_size = batch_size self.generation_kwargs = generation_kwargs - async def __call__( # type: ignore - self, # type: ignore - record: LatentRecord, # type: ignore - ) -> ScorerResult: # type: ignore + async def __call__( + self, + record: LatentRecord, + ) -> ScorerResult: samples = self._prepare(record) random.shuffle(samples) @@ -74,35 +74,25 @@ def _prepare(self, record: LatentRecord) -> list[Sample]: Prepare and shuffle a list of samples for classification. """ - defaults = { - "tokenizer": self.tokenizer, - } - assert record.extra_examples is not None, "No extra examples provided" samples = examples_to_samples( record.extra_examples, - distance=-1, - **defaults, ) - for i, examples in enumerate(record.test): - examples = assert_type(list, examples) - samples.extend( - examples_to_samples( - examples, - distance=i + 1, - **defaults, - ) + samples.extend( + examples_to_samples( + record.test, ) + ) return samples def compute_loss_with_kv_cache( self, explanation: str, samples: list[Sample], batch_size=2 ): - # print(explanation_prompt) model = self.model tokenizer = self.model.tokenizer + assert tokenizer is not None, "Tokenizer is not set in model.tokenizer" # Tokenize explanation tokenizer.padding_side = "right" tokenizer.pad_token = tokenizer.eos_token @@ -187,20 +177,28 @@ def _query(self, explanation: str, samples: list[Sample]) -> list[SurprisalOutpu def examples_to_samples( - examples: list[Example] | list[ActivatingExample], - tokenizer: PreTrainedTokenizer, - **sample_kwargs, + examples: Sequence[Example], ) -> list[Sample]: samples = [] for example in examples: - text = "".join(tokenizer.batch_decode(example.tokens)) + assert isinstance(example, ActivatingExample) or isinstance( + example, NonActivatingExample + ) + assert example.str_tokens is not None + text = "".join(str(token) for token in example.str_tokens) activations = example.activations.tolist() samples.append( Sample( text=text, activations=activations, data=SurprisalOutput( - activations=activations, text=text, **sample_kwargs + activations=activations, + text=text, + distance=( + example.quantile + if isinstance(example, ActivatingExample) + else example.distance + ), ), ) )