Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 2 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ You can always update the name later. If you want to cancel the prompt, select
1. **Write the prompt**. In the box labeled "Template," enter a Jinja expression.
See the [getting started guide](#getting-started-using-jinja-to-write-prompts)
and [cookbook](#jinja-cookbook) for details on how to write templates.
1. **Fill in metadata**. Fill in the metadata for the current prompt: reference, original task, choices in templates, and answer choices.
1. **Fill in metadata**. Fill in the metadata for the current prompt: reference, original task, choices in templates, metrics, languages, and answer choices.
See [Metadata](#metadata) for more details about these fields.
1. **Save the prompt**. Hit the "Save" button. The output of the prompt
applied to the current example will appear in the right sidebar.
Expand Down Expand Up @@ -124,6 +124,7 @@ to generate a question for a given answer would not.
the options for the possible outputs (regardless of whether `answer_choices` is used).
* **Metrics.** Use the multiselect widget to select all metrics commonly used to evaluate
this task. Choose “Other” if there is one that is not included in the list.
* **Languages.** Use the multiselect widget to select all languages used in the prompt. This is independent of what languages are used in the underlying dataset. For example, you could have an English prompt for a Spanish dataset.
* **Answer Choices.** If the prompt has a small set of possible outputs (e.g., Yes/No,
class labels, entailment judgements, etc.), then the prompt should define and use answer
choices as follows. This allows evaluation to consider just the possible targets for
Expand Down
Binary file modified assets/promptsource_app.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
51 changes: 28 additions & 23 deletions promptsource/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from promptsource import DEFAULT_PROMPTSOURCE_CACHE_HOME
from promptsource.session import _get_state
from promptsource.templates import INCLUDED_USERS, DatasetTemplates, Template, TemplateCollection
from promptsource.templates import INCLUDED_USERS, LANGUAGES, METRICS, DatasetTemplates, Template, TemplateCollection
from promptsource.utils import (
get_dataset,
get_dataset_confs,
Expand Down Expand Up @@ -57,6 +57,17 @@ def get_infos(all_infos, d_name):
all_infos[d_name] = infos_dict


def format_language(tag):
"""
Formats a language tag for display in the UI.

For example, if the tag is "en", then the function returns "en (English)"
:param tag: language tag
:return: formatted language name
"""
return tag + " (" + LANGUAGES[tag] + ")"


# add an argument for read-only
# At the moment, streamlit does not handle python script arguments gracefully.
# Thus, for read-only mode, you have to type one of the below two:
Expand Down Expand Up @@ -421,6 +432,11 @@ def show_text(t, width=WIDTH, with_markdown=False):
st.text(template.metadata.choices_in_prompt)
st.markdown("##### Metrics")
st.text(", ".join(template.metadata.metrics) if template.metadata.metrics else None)
st.markdown("##### Prompt Languages")
if template.metadata.languages:
st.text(", ".join([format_language(tag) for tag in template.metadata.languages]))
else:
st.text(None)
st.markdown("##### Answer Choices")
if template.get_answer_choices_expr() is not None:
show_jinja(template.get_answer_choices_expr())
Expand Down Expand Up @@ -559,35 +575,24 @@ def show_text(t, width=WIDTH, with_markdown=False):
help="Prompt explicitly lists choices in the template for the output.",
)

# Metrics from here:
# https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py
metrics_choices = [
"BLEU",
"ROUGE",
"Squad",
"Trivia QA",
"Accuracy",
"Pearson Correlation",
"Spearman Correlation",
"MultiRC",
"AUC",
"COQA F1",
"Edit Distance",
]
# Add mean reciprocal rank
metrics_choices.append("Mean Reciprocal Rank")
# Add generic other
metrics_choices.append("Other")
# Sort alphabetically
metrics_choices = sorted(metrics_choices)
state.metadata.metrics = st.multiselect(
"Metrics",
metrics_choices,
sorted(METRICS),
default=template.metadata.metrics,
help="Select all metrics that are commonly used (or should "
"be used if a new task) to evaluate this prompt.",
)

state.metadata.languages = st.multiselect(
"Prompt Languages",
sorted(LANGUAGES.keys()),
default=template.metadata.languages,
format_func=format_language,
help="Select all languages used in this prompt. "
"This annotation is independent from the language(s) "
"of the dataset.",
)

# Answer choices
if template.get_answer_choices_expr() is not None:
answer_choices = template.get_answer_choices_expr()
Expand Down
211 changes: 211 additions & 0 deletions promptsource/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,212 @@

INCLUDED_USERS = {"Zaid", "craffel", "GEM", "aps", "khalidalt", "shanya", "rbawden", "BigScienceBiasEval", "gsarti"}

# These are the metrics with which templates can be tagged
METRICS = {
"BLEU",
"ROUGE",
"Squad",
"Trivia QA",
"Accuracy",
"Pearson Correlation",
"Spearman Correlation",
"MultiRC",
"AUC",
"COQA F1",
"Edit Distance",
"Mean Reciprocal Rank",
"Other",
}

# These are the languages with which templates can be tagged. Keys are ISO 639-1
# tags, which are the actual tags we use. Values are English names shown in the
# UI for convenience.
LANGUAGES = {
"ab": "Abkhazian",
"aa": "Afar",
"af": "Afrikaans",
"ak": "Akan",
"sq": "Albanian",
"am": "Amharic",
"ar": "Arabic",
"an": "Aragonese",
"hy": "Armenian",
"as": "Assamese",
"av": "Avaric",
"ae": "Avestan",
"ay": "Aymara",
"az": "Azerbaijani",
"bm": "Bambara",
"ba": "Bashkir",
"eu": "Basque",
"be": "Belarusian",
"bn": "Bengali",
"bi": "Bislama",
"bs": "Bosnian",
"br": "Breton",
"bg": "Bulgarian",
"my": "Burmese",
"ca": "Catalan, Valencian",
"ch": "Chamorro",
"ce": "Chechen",
"ny": "Chichewa, Chewa, Nyanja",
"zh": "Chinese",
"cu": "Church Slavic, Old Slavonic, Church Slavonic, Old Bulgarian, Old Church Slavonic",
"cv": "Chuvash",
"kw": "Cornish",
"co": "Corsican",
"cr": "Cree",
"hr": "Croatian",
"cs": "Czech",
"da": "Danish",
"dv": "Divehi, Dhivehi, Maldivian",
"nl": "Dutch, Flemish",
"dz": "Dzongkha",
"en": "English",
"eo": "Esperanto",
"et": "Estonian",
"ee": "Ewe",
"fo": "Faroese",
"fj": "Fijian",
"fi": "Finnish",
"fr": "French",
"fy": "Western Frisian",
"ff": "Fulah",
"gd": "Gaelic, Scottish Gaelic",
"gl": "Galician",
"lg": "Ganda",
"ka": "Georgian",
"de": "German",
"el": "Greek, Modern (1453–)",
"kl": "Kalaallisut, Greenlandic",
"gn": "Guarani",
"gu": "Gujarati",
"ht": "Haitian, Haitian Creole",
"ha": "Hausa",
"he": "Hebrew",
"hz": "Herero",
"hi": "Hindi",
"ho": "Hiri Motu",
"hu": "Hungarian",
"is": "Icelandic",
"io": "Ido",
"ig": "Igbo",
"id": "Indonesian",
"ia": "Interlingua (International Auxiliary Language Association)",
"ie": "Interlingue, Occidental",
"iu": "Inuktitut",
"ik": "Inupiaq",
"ga": "Irish",
"it": "Italian",
"ja": "Japanese",
"jv": "Javanese",
"kn": "Kannada",
"kr": "Kanuri",
"ks": "Kashmiri",
"kk": "Kazakh",
"km": "Central Khmer",
"ki": "Kikuyu, Gikuyu",
"rw": "Kinyarwanda",
"ky": "Kirghiz, Kyrgyz",
"kv": "Komi",
"kg": "Kongo",
"ko": "Korean",
"kj": "Kuanyama, Kwanyama",
"ku": "Kurdish",
"lo": "Lao",
"la": "Latin",
"lv": "Latvian",
"li": "Limburgan, Limburger, Limburgish",
"ln": "Lingala",
"lt": "Lithuanian",
"lu": "Luba-Katanga",
"lb": "Luxembourgish, Letzeburgesch",
"mk": "Macedonian",
"mg": "Malagasy",
"ms": "Malay",
"ml": "Malayalam",
"mt": "Maltese",
"gv": "Manx",
"mi": "Maori",
"mr": "Marathi",
"mh": "Marshallese",
"mn": "Mongolian",
"na": "Nauru",
"nv": "Navajo, Navaho",
"nd": "North Ndebele",
"nr": "South Ndebele",
"ng": "Ndonga",
"ne": "Nepali",
"no": "Norwegian",
"nb": "Norwegian Bokmål",
"nn": "Norwegian Nynorsk",
"ii": "Sichuan Yi, Nuosu",
"oc": "Occitan",
"oj": "Ojibwa",
"or": "Oriya",
"om": "Oromo",
"os": "Ossetian, Ossetic",
"pi": "Pali",
"ps": "Pashto, Pushto",
"fa": "Persian",
"pl": "Polish",
"pt": "Portuguese",
"pa": "Punjabi, Panjabi",
"qu": "Quechua",
"ro": "Romanian, Moldavian, Moldovan",
"rm": "Romansh",
"rn": "Rundi",
"ru": "Russian",
"se": "Northern Sami",
"sm": "Samoan",
"sg": "Sango",
"sa": "Sanskrit",
"sc": "Sardinian",
"sr": "Serbian",
"sn": "Shona",
"sd": "Sindhi",
"si": "Sinhala, Sinhalese",
"sk": "Slovak",
"sl": "Slovenian",
"so": "Somali",
"st": "Southern Sotho",
"es": "Spanish, Castilian",
"su": "Sundanese",
"sw": "Swahili",
"ss": "Swati",
"sv": "Swedish",
"tl": "Tagalog",
"ty": "Tahitian",
"tg": "Tajik",
"ta": "Tamil",
"tt": "Tatar",
"te": "Telugu",
"th": "Thai",
"bo": "Tibetan",
"ti": "Tigrinya",
"to": "Tonga (Tonga Islands)",
"ts": "Tsonga",
"tn": "Tswana",
"tr": "Turkish",
"tk": "Turkmen",
"tw": "Twi",
"ug": "Uighur, Uyghur",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"ve": "Venda",
"vi": "Vietnamese",
"vo": "Volapük",
"wa": "Walloon",
"cy": "Welsh",
"wo": "Wolof",
"xh": "Xhosa",
"yi": "Yiddish",
"yo": "Yoruba",
"za": "Zhuang, Chuang",
"zu": "Zulu",
}


def highlight(input):
return "<span style='color: #F08080'>" + input + "</span>"
Expand Down Expand Up @@ -229,6 +435,7 @@ def __init__(
original_task: Optional[bool] = None,
choices_in_prompt: Optional[bool] = None,
metrics: Optional[List[str]] = None,
languages: Optional[List[str]] = None,
):
"""
Initializes template metadata.
Expand All @@ -242,10 +449,12 @@ def __init__(
:param choices_in_prompt: If True, the answer choices are included in the templates such that models
see those choices in the input. Only applicable to classification tasks.
:param metrics: List of strings denoting metrics to use for evaluation
:param metrics: List of strings denoting languages used in the prompt (not the associated dataset!)
"""
self.original_task = original_task
self.choices_in_prompt = choices_in_prompt
self.metrics = metrics
self.languages = languages


class TemplateCollection:
Expand Down Expand Up @@ -505,6 +714,7 @@ def get_templates_data_frame():
"original_task": [],
"choices_in_prompt": [],
"metrics": [],
"languages": [],
"answer_choices": [],
"jinja": [],
}
Expand All @@ -523,6 +733,7 @@ def get_templates_data_frame():
data["original_task"].append(template.metadata.original_task)
data["choices_in_prompt"].append(template.metadata.choices_in_prompt)
data["metrics"].append(template.metadata.metrics)
data["languages"].append(template.metadata.languages)
data["answer_choices"].append(template.get_answer_choices_expr())
data["jinja"].append(template.jinja)

Expand Down
Loading