@@ -107,8 +107,8 @@ def parse_top_logprobs(top_logprobs: dict[str, float]) -> OrderedDict[int, float
107
107
"""
108
108
probabilities_by_distribution_value = OrderedDict ()
109
109
for token , contents in top_logprobs .items ():
110
- logprob = contents .logprob
111
- decoded_token = contents .decoded_token
110
+ logprob = contents .logprob # type: ignore
111
+ decoded_token = contents .decoded_token # type: ignore
112
112
if decoded_token in VALID_ACTIVATION_TOKENS :
113
113
token_as_int = int (decoded_token )
114
114
probabilities_by_distribution_value [token_as_int ] = np .exp (logprob )
@@ -134,7 +134,7 @@ def compute_predicted_activation_stats_for_token(
134
134
135
135
136
136
def parse_simulation_response (
137
- response : dict [ str , Any ] ,
137
+ response : Any ,
138
138
tokenized_prompt : list [int ],
139
139
tab_token : int ,
140
140
tokens : Sequence [str ],
@@ -250,11 +250,11 @@ async def simulate(
250
250
else :
251
251
assert isinstance (prompt , str )
252
252
253
- response = await self .client .generate (prompt , ** sampling_params )
254
- tokenized_prompt = self .client .tokenizer .apply_chat_template (
253
+ response = await self .client .generate (prompt , ** sampling_params ) # type: ignore
254
+ tokenized_prompt = self .client .tokenizer .apply_chat_template ( # type: ignore
255
255
prompt , add_generation_prompt = True
256
256
)
257
- tab_token = self .client .tokenizer .encode ("\t " )[1 ]
257
+ tab_token = self .client .tokenizer .encode ("\t " )[1 ] # type: ignore
258
258
logger .debug ("response in score_explanation_by_activations is %s" , response )
259
259
try :
260
260
result = parse_simulation_response (
@@ -287,7 +287,7 @@ def make_simulation_prompt(
287
287
# Consider reconciling them.
288
288
prompt_builder = PromptBuilder ()
289
289
prompt_builder .add_message (
290
- "system" ,
290
+ "system" , # type: ignore
291
291
"""We're studying neurons in a neural network.
292
292
Each neuron looks for some particular thing in a short document.
293
293
Look at summary of what the neuron does, and try to predict how it will fire on each token.
@@ -299,7 +299,7 @@ def make_simulation_prompt(
299
299
few_shot_examples = self .few_shot_example_set .get_examples ()
300
300
for i , example in enumerate (few_shot_examples ):
301
301
prompt_builder .add_message (
302
- "user" ,
302
+ "user" , # type: ignore
303
303
f"\n \n Neuron { i + 1 } \n Explanation of neuron { i + 1 } behavior: { EXPLANATION_PREFIX } "
304
304
f"{ example .explanation } " ,
305
305
)
@@ -309,17 +309,17 @@ def make_simulation_prompt(
309
309
start_indices = example .first_revealed_activation_indices ,
310
310
)
311
311
prompt_builder .add_message (
312
- "assistant" , f"\n Activations: { formatted_activation_records } \n "
312
+ "assistant" , f"\n Activations: { formatted_activation_records } \n " # type: ignore
313
313
)
314
314
315
315
prompt_builder .add_message (
316
- "user" ,
316
+ "user" , # type: ignore
317
317
f"\n \n Neuron { len (few_shot_examples ) + 1 } \n Explanation of neuron "
318
318
f"{ len (few_shot_examples ) + 1 } behavior: { EXPLANATION_PREFIX } "
319
319
f"{ self .explanation .strip ()} " ,
320
320
)
321
321
prompt_builder .add_message (
322
- "assistant" ,
322
+ "assistant" , # type: ignore
323
323
f"\n Activations: { format_sequences_for_simulation ([tokens ])} " ,
324
324
)
325
325
return prompt_builder .build (self .prompt_format )
@@ -595,7 +595,7 @@ async def simulate(self, tokens: Sequence[str]) -> SequenceSimulation:
595
595
596
596
result = SequenceSimulation (
597
597
activation_scale = ActivationScale .SIMULATED_NORMALIZED_ACTIVATIONS ,
598
- expected_activations = predicted_activations ,
598
+ expected_activations = predicted_activations , # type: ignore
599
599
# Since the predicted activation is just a sampled token, we don't have a distribution.
600
600
distribution_values = [],
601
601
distribution_probabilities = [],
@@ -614,7 +614,7 @@ def _make_simulation_prompt_json(
614
614
assert explanation != ""
615
615
prompt_builder = PromptBuilder ()
616
616
prompt_builder .add_message (
617
- "system" ,
617
+ "system" , # type: ignore
618
618
"""We're studying neurons in a neural network. Each neuron looks for certain things in a short document. Your task is to read the explanation of what the neuron does, and predict the neuron's activations for each token in the document.
619
619
620
620
For each document, you will see the full text of the document, then the tokens in the document with the activation left blank. You will print, in valid json, the exact same tokens verbatim, but with the activation values filled in according to the explanation. Pay special attention to the explanation's description of the context and order of tokens or words.
@@ -638,7 +638,7 @@ def _make_simulation_prompt_json(
638
638
}
639
639
"""
640
640
prompt_builder .add_message (
641
- "user" ,
641
+ "user" , # type: ignore
642
642
_format_record_for_logprob_free_simulation_json (
643
643
explanation = example .explanation ,
644
644
activation_record = example .activation_records [0 ],
@@ -658,7 +658,7 @@ def _make_simulation_prompt_json(
658
658
}
659
659
"""
660
660
prompt_builder .add_message (
661
- "assistant" ,
661
+ "assistant" , # type: ignore
662
662
_format_record_for_logprob_free_simulation_json (
663
663
explanation = example .explanation ,
664
664
activation_record = example .activation_records [0 ],
@@ -678,10 +678,10 @@ def _make_simulation_prompt_json(
678
678
}
679
679
"""
680
680
prompt_builder .add_message (
681
- "user" ,
681
+ "user" , # type: ignore
682
682
_format_record_for_logprob_free_simulation_json (
683
683
explanation = explanation ,
684
- activation_record = ActivationRecord (tokens = tokens , activations = []),
684
+ activation_record = ActivationRecord (tokens = tokens , activations = []), # type: ignore
685
685
include_activations = False ,
686
686
),
687
687
)
@@ -698,7 +698,7 @@ def _make_simulation_prompt(
698
698
assert explanation != ""
699
699
prompt_builder = PromptBuilder ()
700
700
prompt_builder .add_message (
701
- "system" ,
701
+ "system" , # type: ignore
702
702
"""We're studying neurons in a neural network. Each neuron looks for some particular thing in a short document. Look at an explanation of what the neuron does, and try to predict its activations on a particular token.
703
703
704
704
The activation format is token<tab>activation, and activations range from 0 to 10. Most activations will be 0.
@@ -716,7 +716,7 @@ def _make_simulation_prompt(
716
716
example .activation_records [0 ], include_activations = False
717
717
)
718
718
prompt_builder .add_message (
719
- "user" ,
719
+ "user" , # type: ignore
720
720
f"Neuron { i + 1 } \n Explanation of neuron { i + 1 } behavior: { EXPLANATION_PREFIX } "
721
721
f"{ example .explanation } \n \n "
722
722
f"Sequence 1 Tokens without Activations:\n { tokens_without_activations } \n \n "
@@ -728,7 +728,7 @@ def _make_simulation_prompt(
728
728
max_activation = few_shot_example_max_activation ,
729
729
)
730
730
prompt_builder .add_message (
731
- "assistant" ,
731
+ "assistant" , # type: ignore
732
732
f"{ tokens_with_activations } \n \n " ,
733
733
)
734
734
@@ -737,7 +737,7 @@ def _make_simulation_prompt(
737
737
record , include_activations = False
738
738
)
739
739
prompt_builder .add_message (
740
- "user" ,
740
+ "user" , # type: ignore
741
741
f"Sequence { record_index + 2 } Tokens without Activations:\n { tks_without } \n \n "
742
742
f"Sequence { record_index + 2 } Tokens with Activations:\n " ,
743
743
)
@@ -747,16 +747,16 @@ def _make_simulation_prompt(
747
747
max_activation = few_shot_example_max_activation ,
748
748
)
749
749
prompt_builder .add_message (
750
- "assistant" ,
750
+ "assistant" , # type: ignore
751
751
f"{ tokens_with_activations } \n \n " ,
752
752
)
753
753
754
754
neuron_index = len (few_shot_examples ) + 1
755
755
tokens_without_activations = _format_record_for_logprob_free_simulation (
756
- ActivationRecord (tokens = tokens , activations = []), include_activations = False
756
+ ActivationRecord (tokens = tokens , activations = []), include_activations = False # type: ignore
757
757
)
758
758
prompt_builder .add_message (
759
- "user" ,
759
+ "user" , # type: ignore
760
760
f"Neuron { neuron_index } \n Explanation of neuron { neuron_index } behavior: { EXPLANATION_PREFIX } "
761
761
f"{ explanation } \n \n "
762
762
f"Sequence 1 Tokens without Activations:\n { tokens_without_activations } \n \n "
0 commit comments