diff --git a/packages/tasks/scripts/inference-codegen.ts b/packages/tasks/scripts/inference-codegen.ts
index a041821689..37b389efba 100644
--- a/packages/tasks/scripts/inference-codegen.ts
+++ b/packages/tasks/scripts/inference-codegen.ts
@@ -57,7 +57,7 @@ async function generateTypescript(inputData: InputData): Promise<SerializedRende
 		indentation: "\t",
 		rendererOptions: {
 			"just-types": true,
-			"nice-property-names": true,
+			"nice-property-names": false,
 			"prefer-unions": true,
 			"prefer-const-values": true,
 			"prefer-unknown": true,
diff --git a/packages/tasks/src/tasks/audio-classification/inference.ts b/packages/tasks/src/tasks/audio-classification/inference.ts
index ae37f29acf..2e4f062878 100644
--- a/packages/tasks/src/tasks/audio-classification/inference.ts
+++ b/packages/tasks/src/tasks/audio-classification/inference.ts
@@ -23,11 +23,11 @@ export interface AudioClassificationInput {
  * Additional inference parameters for Audio Classification
  */
 export interface AudioClassificationParameters {
-	functionToApply?: ClassificationOutputTransform;
+	function_to_apply?: ClassificationOutputTransform;
 	/**
 	 * When specified, limits the output to the top K most probable classes.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 /**
@@ -40,7 +40,7 @@ export type AudioClassificationOutput = AudioClassificationOutputElement[];
  */
 export interface AudioClassificationOutputElement {
 	/**
-	 * The predicted class label (model specific).
+	 * The predicted class label.
 	 */
 	label: string;
 	/**
diff --git a/packages/tasks/src/tasks/audio-classification/spec/input.json b/packages/tasks/src/tasks/audio-classification/spec/input.json
index cfd5a54a6f..d3bfb44ac4 100644
--- a/packages/tasks/src/tasks/audio-classification/spec/input.json
+++ b/packages/tasks/src/tasks/audio-classification/spec/input.json
@@ -19,11 +19,11 @@
 			"description": "Additional inference parameters for Audio Classification",
 			"type": "object",
 			"properties": {
-				"functionToApply": {
+				"function_to_apply": {
 					"title": "AudioClassificationOutputTransform",
 					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When specified, limits the output to the top K most probable classes."
 				}
diff --git a/packages/tasks/src/tasks/audio-classification/spec/output.json b/packages/tasks/src/tasks/audio-classification/spec/output.json
index 83e7abe71d..dac7a92256 100644
--- a/packages/tasks/src/tasks/audio-classification/spec/output.json
+++ b/packages/tasks/src/tasks/audio-classification/spec/output.json
@@ -5,17 +5,6 @@
 	"description": "Outputs for Audio Classification inference",
 	"type": "array",
 	"items": {
-		"type": "object",
-		"properties": {
-			"label": {
-				"type": "string",
-				"description": "The predicted class label (model specific)."
-			},
-			"score": {
-				"type": "number",
-				"description": "The corresponding probability."
-			}
-		},
-		"required": ["label", "score"]
+		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
 	}
 }
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
index d9e2adc859..ce35c6db71 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Automatic Speech Recognition inference
  */
@@ -17,6 +18,7 @@ export interface AutomaticSpeechRecognitionInput {
 	parameters?: AutomaticSpeechRecognitionParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -30,9 +32,10 @@ export interface AutomaticSpeechRecognitionParameters {
 	/**
 	 * Whether to output corresponding timestamps with the generated text
 	 */
-	returnTimestamps?: boolean;
+	return_timestamps?: boolean;
 	[property: string]: unknown;
 }
+
 /**
  * Parametrization of the text generation process
  *
@@ -42,18 +45,18 @@ export interface GenerationParameters {
 	/**
 	 * Whether to use sampling instead of greedy decoding when generating new tokens.
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Controls the stopping condition for beam-based methods.
 	 */
-	earlyStopping?: EarlyStoppingUnion;
+	early_stopping?: EarlyStoppingUnion;
 	/**
 	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
 	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
 	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
 	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
 	 */
-	epsilonCutoff?: number;
+	epsilon_cutoff?: number;
 	/**
 	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
 	 * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -63,37 +66,37 @@ export interface GenerationParameters {
 	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
 	 * for more details.
 	 */
-	etaCutoff?: number;
+	eta_cutoff?: number;
 	/**
 	 * The maximum length (in tokens) of the generated text, including the input.
 	 */
-	maxLength?: number;
+	max_length?: number;
 	/**
 	 * The maximum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The minimum length (in tokens) of the generated text, including the input.
 	 */
-	minLength?: number;
+	min_length?: number;
 	/**
 	 * The minimum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	minNewTokens?: number;
+	min_new_tokens?: number;
 	/**
 	 * Number of groups to divide num_beams into in order to ensure diversity among different
 	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
 	 */
-	numBeamGroups?: number;
+	num_beam_groups?: number;
 	/**
 	 * Number of beams to use for beam search.
 	 */
-	numBeams?: number;
+	num_beams?: number;
 	/**
 	 * The value balances the model confidence and the degeneration penalty in contrastive
 	 * search decoding.
 	 */
-	penaltyAlpha?: number;
+	penalty_alpha?: number;
 	/**
 	 * The value used to modulate the next token probabilities.
 	 */
@@ -101,12 +104,12 @@ export interface GenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
 	 * that add up to top_p or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Local typicality measures how similar the conditional probability of predicting a target
 	 * token next is to the expected conditional probability of predicting a random token next,
@@ -114,33 +117,23 @@ export interface GenerationParameters {
 	 * most locally typical tokens with probabilities that add up to typical_p or higher are
 	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Whether the model should use the past last key/values attentions to speed up decoding
 	 */
-	useCache?: boolean;
+	use_cache?: boolean;
 	[property: string]: unknown;
 }
+
 /**
  * Controls the stopping condition for beam-based methods.
  */
 export type EarlyStoppingUnion = boolean | "never";
-export interface AutomaticSpeechRecognitionOutputChunk {
-	/**
-	 * A chunk of text identified by the model
-	 */
-	text: string;
-	/**
-	 * The start and end timestamps corresponding with the text
-	 */
-	timestamps: number[];
-	[property: string]: unknown;
-}
-export type AutomaticSpeechRecognitionOutput = AutomaticSpeechRecognitionOutputElement[];
+
 /**
  * Outputs of inference for the Automatic Speech Recognition task
  */
-export interface AutomaticSpeechRecognitionOutputElement {
+export interface AutomaticSpeechRecognitionOutput {
 	/**
 	 * When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
 	 * the model.
@@ -152,3 +145,15 @@ export interface AutomaticSpeechRecognitionOutputElement {
 	text: string;
 	[property: string]: unknown;
 }
+
+export interface AutomaticSpeechRecognitionOutputChunk {
+	/**
+	 * A chunk of text identified by the model
+	 */
+	text: string;
+	/**
+	 * The start and end timestamps corresponding with the text
+	 */
+	timestamps: number[];
+	[property: string]: unknown;
+}
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
index 2d31957ed2..49602aa462 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
@@ -19,7 +19,7 @@
 			"description": "Additional inference parameters for Automatic Speech Recognition",
 			"type": "object",
 			"properties": {
-				"returnTimestamps": {
+				"return_timestamps": {
 					"type": "boolean",
 					"description": "Whether to output corresponding timestamps with the generated text"
 				},
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
index 217f210b15..db8a1cf241 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
@@ -3,34 +3,36 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Automatic Speech Recognition task",
 	"title": "AutomaticSpeechRecognitionOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"text": {
-				"type": "string",
-				"description": "The recognized text."
-			},
-			"chunks": {
-				"type": "array",
-				"description": "When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model.",
-				"items": {
-					"type": "object",
-					"title": "AutomaticSpeechRecognitionOutputChunk",
-					"properties": {
-						"text": { "type": "string", "description": "A chunk of text identified by the model" },
-						"timestamps": {
-							"type": "array",
-							"description": "The start and end timestamps corresponding with the text",
-							"items": { "type": "number" },
-							"minLength": 2,
-							"maxLength": 2
-						}
+	"type": "object",
+	"properties": {
+		"text": {
+			"type": "string",
+			"description": "The recognized text."
+		},
+		"chunks": {
+			"type": "array",
+			"description": "When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model.",
+			"items": {
+				"type": "object",
+				"title": "AutomaticSpeechRecognitionOutputChunk",
+				"properties": {
+					"text": {
+						"type": "string",
+						"description": "A chunk of text identified by the model"
 					},
-					"required": ["text", "timestamps"]
-				}
+					"timestamps": {
+						"type": "array",
+						"description": "The start and end timestamps corresponding with the text",
+						"items": {
+							"type": "number"
+						},
+						"minLength": 2,
+						"maxLength": 2
+					}
+				},
+				"required": ["text", "timestamps"]
 			}
-		},
-		"required": ["text"]
-	}
+		}
+	},
+	"required": ["text"]
 }
diff --git a/packages/tasks/src/tasks/common-definitions.json b/packages/tasks/src/tasks/common-definitions.json
index 6e0ec532d4..f78d3d9e47 100644
--- a/packages/tasks/src/tasks/common-definitions.json
+++ b/packages/tasks/src/tasks/common-definitions.json
@@ -43,63 +43,71 @@
 					"type": "number",
 					"description": "The value used to modulate the next token probabilities."
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
 				},
-				"topP": {
+				"top_p": {
 					"type": "number",
 					"description": "If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation."
 				},
-				"typicalP": {
+				"typical_p": {
 					"type": "number",
 					"description": " Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details."
 				},
-				"epsilonCutoff": {
+				"epsilon_cutoff": {
 					"type": "number",
 					"description": "If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
 				},
-				"etaCutoff": {
+				"eta_cutoff": {
 					"type": "number",
 					"description": "Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
 				},
-				"maxLength": {
+				"max_length": {
 					"type": "integer",
 					"description": "The maximum length (in tokens) of the generated text, including the input."
 				},
-				"maxNewTokens": {
+				"max_new_tokens": {
 					"type": "integer",
 					"description": "The maximum number of tokens to generate. Takes precedence over maxLength."
 				},
-				"minLength": {
+				"min_length": {
 					"type": "integer",
 					"description": "The minimum length (in tokens) of the generated text, including the input."
 				},
-				"minNewTokens": {
+				"min_new_tokens": {
 					"type": "integer",
 					"description": "The minimum number of tokens to generate. Takes precedence over maxLength."
 				},
-				"doSample": {
+				"do_sample": {
 					"type": "boolean",
 					"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
 				},
-				"earlyStopping": {
+				"early_stopping": {
 					"description": "Controls the stopping condition for beam-based methods.",
-					"oneOf": [{ "type": "boolean" }, { "const": "never", "type": "string" }]
-				},
-				"numBeams": {
+					"oneOf": [
+						{
+							"type": "boolean"
+						},
+						{
+							"const": "never",
+							"type": "string"
+						}
+					]
+				},
+				"num_beams": {
 					"type": "integer",
 					"description": "Number of beams to use for beam search."
 				},
-				"numBeamGroups": {
+				"num_beam_groups": {
 					"type": "integer",
 					"description": "Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details."
 				},
-				"penaltyAlpha": {
+				"penalty_alpha": {
 					"type": "number",
 					"description": "The value balances the model confidence and the degeneration penalty in contrastive search decoding."
 				},
-				"useCache": {
+				"use_cache": {
 					"type": "boolean",
 					"description": "Whether the model should use the past last key/values attentions to speed up decoding"
 				}
diff --git a/packages/tasks/src/tasks/depth-estimation/inference.ts b/packages/tasks/src/tasks/depth-estimation/inference.ts
index 2eb17c74e3..4126e87188 100644
--- a/packages/tasks/src/tasks/depth-estimation/inference.ts
+++ b/packages/tasks/src/tasks/depth-estimation/inference.ts
@@ -30,6 +30,6 @@ export interface DepthEstimationOutput {
 	/**
 	 * The predicted depth as a tensor
 	 */
-	predictedDepth?: unknown;
+	predicted_depth?: unknown;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/document-question-answering/inference.ts b/packages/tasks/src/tasks/document-question-answering/inference.ts
index cd2ab54051..e1284fde3e 100644
--- a/packages/tasks/src/tasks/document-question-answering/inference.ts
+++ b/packages/tasks/src/tasks/document-question-answering/inference.ts
@@ -42,11 +42,11 @@ export interface DocumentQuestionAnsweringParameters {
 	 * be split in several chunks with some overlap. This argument controls the size of that
 	 * overlap.
 	 */
-	docStride?: number;
+	doc_stride?: number;
 	/**
 	 * Whether to accept impossible as an answer
 	 */
-	handleImpossibleAnswer?: boolean;
+	handle_impossible_answer?: boolean;
 	/**
 	 * Language to use while running OCR. Defaults to english.
 	 */
@@ -55,27 +55,27 @@ export interface DocumentQuestionAnsweringParameters {
 	 * The maximum length of predicted answers (e.g., only answers with a shorter length are
 	 * considered).
 	 */
-	maxAnswerLen?: number;
+	max_answer_len?: number;
 	/**
 	 * The maximum length of the question after tokenization. It will be truncated if needed.
 	 */
-	maxQuestionLen?: number;
+	max_question_len?: number;
 	/**
 	 * The maximum length of the total sentence (context + question) in tokens of each chunk
 	 * passed to the model. The context will be split in several chunks (using doc_stride as
 	 * overlap) if needed.
 	 */
-	maxSeqLen?: number;
+	max_seq_len?: number;
 	/**
 	 * The number of answers to return (will be chosen by order of likelihood). Can return less
 	 * than top_k answers if there are not enough options available within the context.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
 	 * skip the OCR step and use the provided bounding boxes instead.
 	 */
-	wordBoxes?: WordBox[];
+	word_boxes?: WordBox[];
 	[property: string]: unknown;
 }
 export type WordBox = number[] | string;
@@ -88,11 +88,19 @@ export interface DocumentQuestionAnsweringOutputElement {
 	 * The answer to the question.
 	 */
 	answer: string;
+	/**
+	 * The end word index of the answer (in the OCR’d version of the input or provided word
+	 * boxes).
+	 */
 	end: number;
 	/**
 	 * The probability associated to the answer.
 	 */
 	score: number;
+	/**
+	 * The start word index of the answer (in the OCR’d version of the input or provided word
+	 * boxes).
+	 */
 	start: number;
 	/**
 	 * The index of each word/box pair that is in the answer
diff --git a/packages/tasks/src/tasks/document-question-answering/spec/input.json b/packages/tasks/src/tasks/document-question-answering/spec/input.json
index a607735e74..c669a92644 100644
--- a/packages/tasks/src/tasks/document-question-answering/spec/input.json
+++ b/packages/tasks/src/tasks/document-question-answering/spec/input.json
@@ -31,11 +31,11 @@
 			"description": "Additional inference parameters for Document Question Answering",
 			"type": "object",
 			"properties": {
-				"docStride": {
+				"doc_stride": {
 					"type": "integer",
 					"description": "If the words in the document are too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
 				},
-				"handleImpossibleAnswer": {
+				"handle_impossible_answer": {
 					"type": "boolean",
 					"description": "Whether to accept impossible as an answer"
 				},
@@ -43,23 +43,23 @@
 					"type": "string",
 					"description": "Language to use while running OCR. Defaults to english."
 				},
-				"maxAnswerLen": {
+				"max_answer_len": {
 					"type": "integer",
 					"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
 				},
-				"maxSeqLen": {
+				"max_seq_len": {
 					"type": "integer",
 					"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using doc_stride as overlap) if needed."
 				},
-				"maxQuestionLen": {
+				"max_question_len": {
 					"type": "integer",
 					"description": "The maximum length of the question after tokenization. It will be truncated if needed."
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of answers to return (will be chosen by order of likelihood). Can return less than top_k answers if there are not enough options available within the context."
 				},
-				"wordBoxes": {
+				"word_boxes": {
 					"type": "array",
 					"description": "A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR step and use the provided bounding boxes instead.",
 					"items": {
diff --git a/packages/tasks/src/tasks/document-question-answering/spec/output.json b/packages/tasks/src/tasks/document-question-answering/spec/output.json
index 9f69584ae8..4fda3771a6 100644
--- a/packages/tasks/src/tasks/document-question-answering/spec/output.json
+++ b/packages/tasks/src/tasks/document-question-answering/spec/output.json
@@ -17,11 +17,11 @@
 			},
 			"start": {
 				"type": "integer",
-				"descrtiption": "The start word index of the answer (in the OCR’d version of the input or provided word boxes)."
+				"description": "The start word index of the answer (in the OCR\u2019d version of the input or provided word boxes)."
 			},
 			"end": {
 				"type": "integer",
-				"descrtiption": "The end word index of the answer (in the OCR’d version of the input or provided word boxes)."
+				"description": "The end word index of the answer (in the OCR\u2019d version of the input or provided word boxes)."
 			},
 			"words": {
 				"type": "array",
diff --git a/packages/tasks/src/tasks/fill-mask/inference.ts b/packages/tasks/src/tasks/fill-mask/inference.ts
index b80383da64..d81592aed2 100644
--- a/packages/tasks/src/tasks/fill-mask/inference.ts
+++ b/packages/tasks/src/tasks/fill-mask/inference.ts
@@ -33,7 +33,7 @@ export interface FillMaskParameters {
 	/**
 	 * When passed, overrides the number of predictions to return.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 export type FillMaskOutput = FillMaskOutputElement[];
@@ -53,9 +53,10 @@ export interface FillMaskOutputElement {
 	 * The predicted token id (to replace the masked one).
 	 */
 	token: number;
+	tokenStr: unknown;
 	/**
 	 * The predicted token (to replace the masked one).
 	 */
-	tokenStr: string;
+	token_str?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/fill-mask/spec/input.json b/packages/tasks/src/tasks/fill-mask/spec/input.json
index 00def602ef..aa08d0f2a5 100644
--- a/packages/tasks/src/tasks/fill-mask/spec/input.json
+++ b/packages/tasks/src/tasks/fill-mask/spec/input.json
@@ -20,7 +20,7 @@
 			"description": "Additional inference parameters for Fill Mask",
 			"type": "object",
 			"properties": {
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When passed, overrides the number of predictions to return."
 				},
diff --git a/packages/tasks/src/tasks/fill-mask/spec/output.json b/packages/tasks/src/tasks/fill-mask/spec/output.json
index f8e91aeeaa..0b613382e7 100644
--- a/packages/tasks/src/tasks/fill-mask/spec/output.json
+++ b/packages/tasks/src/tasks/fill-mask/spec/output.json
@@ -19,7 +19,7 @@
 				"type": "integer",
 				"description": "The predicted token id (to replace the masked one)."
 			},
-			"tokenStr": {
+			"token_str": {
 				"type": "string",
 				"description": "The predicted token (to replace the masked one)."
 			}
diff --git a/packages/tasks/src/tasks/image-classification/inference.ts b/packages/tasks/src/tasks/image-classification/inference.ts
index 7138a50735..50eb895fbb 100644
--- a/packages/tasks/src/tasks/image-classification/inference.ts
+++ b/packages/tasks/src/tasks/image-classification/inference.ts
@@ -23,11 +23,11 @@ export interface ImageClassificationInput {
  * Additional inference parameters for Image Classification
  */
 export interface ImageClassificationParameters {
-	functionToApply?: ClassificationOutputTransform;
+	function_to_apply?: ClassificationOutputTransform;
 	/**
 	 * When specified, limits the output to the top K most probable classes.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 /**
diff --git a/packages/tasks/src/tasks/image-classification/spec/input.json b/packages/tasks/src/tasks/image-classification/spec/input.json
index 362c0d5171..c21bcad54b 100644
--- a/packages/tasks/src/tasks/image-classification/spec/input.json
+++ b/packages/tasks/src/tasks/image-classification/spec/input.json
@@ -19,11 +19,11 @@
 			"description": "Additional inference parameters for Image Classification",
 			"type": "object",
 			"properties": {
-				"functionToApply": {
+				"function_to_apply": {
 					"title": "ImageClassificationOutputTransform",
 					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When specified, limits the output to the top K most probable classes."
 				}
diff --git a/packages/tasks/src/tasks/image-segmentation/inference.ts b/packages/tasks/src/tasks/image-segmentation/inference.ts
index b316715f54..ff458b2cfd 100644
--- a/packages/tasks/src/tasks/image-segmentation/inference.ts
+++ b/packages/tasks/src/tasks/image-segmentation/inference.ts
@@ -26,11 +26,11 @@ export interface ImageSegmentationParameters {
 	/**
 	 * Threshold to use when turning the predicted masks into binary values.
 	 */
-	maskThreshold?: number;
+	mask_threshold?: number;
 	/**
 	 * Mask overlap threshold to eliminate small, disconnected segments.
 	 */
-	overlapMaskAreaThreshold?: number;
+	overlap_mask_area_threshold?: number;
 	/**
 	 * Segmentation task to be performed, depending on model capabilities.
 	 */
diff --git a/packages/tasks/src/tasks/image-segmentation/spec/input.json b/packages/tasks/src/tasks/image-segmentation/spec/input.json
index ae4adc70e9..8105941c5f 100644
--- a/packages/tasks/src/tasks/image-segmentation/spec/input.json
+++ b/packages/tasks/src/tasks/image-segmentation/spec/input.json
@@ -19,11 +19,11 @@
 			"description": "Additional inference parameters for Image Segmentation",
 			"type": "object",
 			"properties": {
-				"maskThreshold": {
+				"mask_threshold": {
 					"type": "number",
 					"description": "Threshold to use when turning the predicted masks into binary values."
 				},
-				"overlapMaskAreaThreshold": {
+				"overlap_mask_area_threshold": {
 					"type": "number",
 					"description": "Mask overlap threshold to eliminate small, disconnected segments."
 				},
diff --git a/packages/tasks/src/tasks/image-to-image/inference.ts b/packages/tasks/src/tasks/image-to-image/inference.ts
index 96a532b252..b08d0709ff 100644
--- a/packages/tasks/src/tasks/image-to-image/inference.ts
+++ b/packages/tasks/src/tasks/image-to-image/inference.ts
@@ -29,20 +29,20 @@ export interface ImageToImageParameters {
 	 * For diffusion models. A higher guidance scale value encourages the model to generate
 	 * images closely linked to the text prompt at the expense of lower image quality.
 	 */
-	guidanceScale?: number;
+	guidance_scale?: number;
 	/**
 	 * One or several prompt to guide what NOT to include in image generation.
 	 */
-	negativePrompt?: string[];
+	negative_prompt?: string[];
 	/**
 	 * For diffusion models. The number of denoising steps. More denoising steps usually lead to
 	 * a higher quality image at the expense of slower inference.
 	 */
-	numInferenceSteps?: number;
+	num_inference_steps?: number;
 	/**
 	 * The size in pixel of the output image
 	 */
-	targetSize?: TargetSize;
+	target_size?: TargetSize;
 	[property: string]: unknown;
 }
 
diff --git a/packages/tasks/src/tasks/image-to-image/spec/input.json b/packages/tasks/src/tasks/image-to-image/spec/input.json
index 11d4bee8af..c812774d68 100644
--- a/packages/tasks/src/tasks/image-to-image/spec/input.json
+++ b/packages/tasks/src/tasks/image-to-image/spec/input.json
@@ -19,20 +19,22 @@
 			"description": "Additional inference parameters for Image To Image",
 			"type": "object",
 			"properties": {
-				"guidanceScale": {
+				"guidance_scale": {
 					"type": "number",
 					"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
 				},
-				"negativePrompt": {
+				"negative_prompt": {
 					"type": "array",
-					"items": { "type": "string" },
+					"items": {
+						"type": "string"
+					},
 					"description": "One or several prompt to guide what NOT to include in image generation."
 				},
-				"numInferenceSteps": {
+				"num_inference_steps": {
 					"type": "integer",
 					"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
 				},
-				"targetSize": {
+				"target_size": {
 					"type": "object",
 					"description": "The size in pixel of the output image",
 					"properties": {
diff --git a/packages/tasks/src/tasks/image-to-text/inference.ts b/packages/tasks/src/tasks/image-to-text/inference.ts
index 19bb147e2d..330b299633 100644
--- a/packages/tasks/src/tasks/image-to-text/inference.ts
+++ b/packages/tasks/src/tasks/image-to-text/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Image To Text inference
  */
@@ -17,6 +18,7 @@ export interface ImageToTextInput {
 	parameters?: ImageToTextParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -30,9 +32,10 @@ export interface ImageToTextParameters {
 	/**
 	 * The amount of maximum tokens to generate.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	[property: string]: unknown;
 }
+
 /**
  * Parametrization of the text generation process
  *
@@ -42,18 +45,18 @@ export interface GenerationParameters {
 	/**
 	 * Whether to use sampling instead of greedy decoding when generating new tokens.
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Controls the stopping condition for beam-based methods.
 	 */
-	earlyStopping?: EarlyStoppingUnion;
+	early_stopping?: EarlyStoppingUnion;
 	/**
 	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
 	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
 	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
 	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
 	 */
-	epsilonCutoff?: number;
+	epsilon_cutoff?: number;
 	/**
 	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
 	 * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -63,37 +66,37 @@ export interface GenerationParameters {
 	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
 	 * for more details.
 	 */
-	etaCutoff?: number;
+	eta_cutoff?: number;
 	/**
 	 * The maximum length (in tokens) of the generated text, including the input.
 	 */
-	maxLength?: number;
+	max_length?: number;
 	/**
 	 * The maximum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The minimum length (in tokens) of the generated text, including the input.
 	 */
-	minLength?: number;
+	min_length?: number;
 	/**
 	 * The minimum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	minNewTokens?: number;
+	min_new_tokens?: number;
 	/**
 	 * Number of groups to divide num_beams into in order to ensure diversity among different
 	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
 	 */
-	numBeamGroups?: number;
+	num_beam_groups?: number;
 	/**
 	 * Number of beams to use for beam search.
 	 */
-	numBeams?: number;
+	num_beams?: number;
 	/**
 	 * The value balances the model confidence and the degeneration penalty in contrastive
 	 * search decoding.
 	 */
-	penaltyAlpha?: number;
+	penalty_alpha?: number;
 	/**
 	 * The value used to modulate the next token probabilities.
 	 */
@@ -101,12 +104,12 @@ export interface GenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
 	 * that add up to top_p or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Local typicality measures how similar the conditional probability of predicting a target
 	 * token next is to the expected conditional probability of predicting a random token next,
@@ -114,25 +117,27 @@ export interface GenerationParameters {
 	 * most locally typical tokens with probabilities that add up to typical_p or higher are
 	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Whether the model should use the past last key/values attentions to speed up decoding
 	 */
-	useCache?: boolean;
+	use_cache?: boolean;
 	[property: string]: unknown;
 }
+
 /**
  * Controls the stopping condition for beam-based methods.
  */
 export type EarlyStoppingUnion = boolean | "never";
-export type ImageToTextOutput = ImageToTextOutputElement[];
+
 /**
  * Outputs of inference for the Image To Text task
  */
-export interface ImageToTextOutputElement {
+export interface ImageToTextOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text.
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/image-to-text/spec/input.json b/packages/tasks/src/tasks/image-to-text/spec/input.json
index 0ef8ba1dc5..d68c9a8bc8 100644
--- a/packages/tasks/src/tasks/image-to-text/spec/input.json
+++ b/packages/tasks/src/tasks/image-to-text/spec/input.json
@@ -19,7 +19,7 @@
 			"description": "Additional inference parameters for Image To Text",
 			"type": "object",
 			"properties": {
-				"maxNewTokens": {
+				"max_new_tokens": {
 					"type": "integer",
 					"description": "The amount of maximum tokens to generate."
 				},
diff --git a/packages/tasks/src/tasks/image-to-text/spec/output.json b/packages/tasks/src/tasks/image-to-text/spec/output.json
index e3283e34f7..388c3456f4 100644
--- a/packages/tasks/src/tasks/image-to-text/spec/output.json
+++ b/packages/tasks/src/tasks/image-to-text/spec/output.json
@@ -3,15 +3,12 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Image To Text task",
 	"title": "ImageToTextOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"generatedText": {
-				"type": "string",
-				"description": "The generated text."
-			}
-		},
-		"required": ["generatedText"]
-	}
+	"type": "object",
+	"properties": {
+		"generated_text": {
+			"type": "string",
+			"description": "The generated text."
+		}
+	},
+	"required": ["generatedText"]
 }
diff --git a/packages/tasks/src/tasks/placeholder/spec/input.json b/packages/tasks/src/tasks/placeholder/spec/input.json
index 5c206baef3..0d2ba2e2a0 100644
--- a/packages/tasks/src/tasks/placeholder/spec/input.json
+++ b/packages/tasks/src/tasks/placeholder/spec/input.json
@@ -20,11 +20,11 @@
 			"description": "TODO: describe additional parameters here.",
 			"type": "object",
 			"properties": {
-				"dummyParameterName": {
+				"dummy_parameter_name": {
 					"type": "boolean",
 					"description": "TODO: describe the parameter here"
 				},
-				"dummyParameterName2": {
+				"dummy_parameter_name2": {
 					"type": "integer",
 					"description": "TODO: describe the parameter here"
 				}
diff --git a/packages/tasks/src/tasks/placeholder/spec/output.json b/packages/tasks/src/tasks/placeholder/spec/output.json
index 8e3e132941..697c6e2672 100644
--- a/packages/tasks/src/tasks/placeholder/spec/output.json
+++ b/packages/tasks/src/tasks/placeholder/spec/output.json
@@ -7,7 +7,7 @@
 	"items": {
 		"type": "object",
 		"properties": {
-			"meaningfulOutputName": {
+			"meaningful_output_name": {
 				"type": "string",
 				"description": "TODO: Describe what is outputed by the inference here"
 			}
diff --git a/packages/tasks/src/tasks/question-answering/inference.ts b/packages/tasks/src/tasks/question-answering/inference.ts
index bffc71cc6e..d7bda23f49 100644
--- a/packages/tasks/src/tasks/question-answering/inference.ts
+++ b/packages/tasks/src/tasks/question-answering/inference.ts
@@ -41,37 +41,37 @@ export interface QuestionAnsweringParameters {
 	 * Attempts to align the answer to real words. Improves quality on space separated
 	 * languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
 	 */
-	alignToWords?: boolean;
+	align_to_words?: boolean;
 	/**
 	 * If the context is too long to fit with the question for the model, it will be split in
 	 * several chunks with some overlap. This argument controls the size of that overlap.
 	 */
-	docStride?: number;
+	doc_stride?: number;
 	/**
 	 * Whether to accept impossible as an answer.
 	 */
-	handleImpossibleAnswer?: boolean;
+	handle_impossible_answer?: boolean;
 	/**
 	 * The maximum length of predicted answers (e.g., only answers with a shorter length are
 	 * considered).
 	 */
-	maxAnswerLen?: number;
+	max_answer_len?: number;
 	/**
 	 * The maximum length of the question after tokenization. It will be truncated if needed.
 	 */
-	maxQuestionLen?: number;
+	max_question_len?: number;
 	/**
 	 * The maximum length of the total sentence (context + question) in tokens of each chunk
 	 * passed to the model. The context will be split in several chunks (using docStride as
 	 * overlap) if needed.
 	 */
-	maxSeqLen?: number;
+	max_seq_len?: number;
 	/**
 	 * The number of answers to return (will be chosen by order of likelihood). Note that we
 	 * return less than topk answers if there are not enough options available within the
 	 * context.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 export type QuestionAnsweringOutput = QuestionAnsweringOutputElement[];
diff --git a/packages/tasks/src/tasks/question-answering/spec/input.json b/packages/tasks/src/tasks/question-answering/spec/input.json
index 088e77200b..ec23cee641 100644
--- a/packages/tasks/src/tasks/question-answering/spec/input.json
+++ b/packages/tasks/src/tasks/question-answering/spec/input.json
@@ -32,31 +32,31 @@
 			"description": "Additional inference parameters for Question Answering",
 			"type": "object",
 			"properties": {
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
 				},
-				"docStride": {
+				"doc_stride": {
 					"type": "integer",
 					"description": "If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
 				},
-				"maxAnswerLen": {
+				"max_answer_len": {
 					"type": "integer",
 					"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
 				},
-				"maxSeqLen": {
+				"max_seq_len": {
 					"type": "integer",
 					"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed."
 				},
-				"maxQuestionLen": {
+				"max_question_len": {
 					"type": "integer",
 					"description": "The maximum length of the question after tokenization. It will be truncated if needed."
 				},
-				"handleImpossibleAnswer": {
+				"handle_impossible_answer": {
 					"type": "boolean",
 					"description": "Whether to accept impossible as an answer."
 				},
-				"alignToWords": {
+				"align_to_words": {
 					"type": "boolean",
 					"description": "Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese)"
 				}
diff --git a/packages/tasks/src/tasks/summarization/inference.ts b/packages/tasks/src/tasks/summarization/inference.ts
index 16d30cf7a1..0870cf7ba9 100644
--- a/packages/tasks/src/tasks/summarization/inference.ts
+++ b/packages/tasks/src/tasks/summarization/inference.ts
@@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters {
 	/**
 	 * Whether to clean up the potential extra spaces in the text output.
 	 */
-	cleanUpTokenizationSpaces?: boolean;
+	clean_up_tokenization_spaces?: boolean;
 	/**
 	 * Additional parametrization of the text generation algorithm
 	 */
-	generateParameters?: { [key: string]: unknown };
+	generate_parameters?: { [key: string]: unknown };
 	/**
 	 * The truncation strategy to use
 	 */
@@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest
  * Outputs of inference for the Text2text Generation task
  */
 export interface SummarizationOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text.
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/table-question-answering/spec/input.json b/packages/tasks/src/tasks/table-question-answering/spec/input.json
index 6309cf1f36..59d170d99a 100644
--- a/packages/tasks/src/tasks/table-question-answering/spec/input.json
+++ b/packages/tasks/src/tasks/table-question-answering/spec/input.json
@@ -13,7 +13,12 @@
 				"table": {
 					"description": "The table to serve as context for the questions",
 					"type": "object",
-					"additionalProperties": { "type": "array", "items": { "type": "string" } }
+					"additionalProperties": {
+						"type": "array",
+						"items": {
+							"type": "string"
+						}
+					}
 				},
 				"question": {
 					"description": "The question to be answered about the table",
diff --git a/packages/tasks/src/tasks/text-classification/inference.ts b/packages/tasks/src/tasks/text-classification/inference.ts
index 9bc728a50c..3e8439ebcb 100644
--- a/packages/tasks/src/tasks/text-classification/inference.ts
+++ b/packages/tasks/src/tasks/text-classification/inference.ts
@@ -23,11 +23,11 @@ export interface TextClassificationInput {
  * Additional inference parameters for Text Classification
  */
 export interface TextClassificationParameters {
-	functionToApply?: ClassificationOutputTransform;
+	function_to_apply?: ClassificationOutputTransform;
 	/**
 	 * When specified, limits the output to the top K most probable classes.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 /**
diff --git a/packages/tasks/src/tasks/text-classification/spec/input.json b/packages/tasks/src/tasks/text-classification/spec/input.json
index 6ae6f1c39c..32a6fe210f 100644
--- a/packages/tasks/src/tasks/text-classification/spec/input.json
+++ b/packages/tasks/src/tasks/text-classification/spec/input.json
@@ -20,11 +20,11 @@
 			"description": "Additional inference parameters for Text Classification",
 			"type": "object",
 			"properties": {
-				"functionToApply": {
+				"function_to_apply": {
 					"title": "TextClassificationOutputTransform",
 					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When specified, limits the output to the top K most probable classes."
 				}
diff --git a/packages/tasks/src/tasks/text-generation/inference.ts b/packages/tasks/src/tasks/text-generation/inference.ts
index 94279336c8..326bd01358 100644
--- a/packages/tasks/src/tasks/text-generation/inference.ts
+++ b/packages/tasks/src/tasks/text-generation/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Text Generation inference
  */
@@ -17,6 +18,7 @@ export interface TextGenerationInput {
 	parameters?: TextGenerationParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -26,24 +28,24 @@ export interface TextGenerationParameters {
 	/**
 	 * Whether to use logit sampling (true) or greedy search (false).
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Maximum number of generated tokens.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
 	 * paper](https://hf.co/papers/1909.05858) for more details.
 	 */
-	repetitionPenalty?: number;
+	repetition_penalty?: number;
 	/**
 	 * Whether to prepend the prompt to the generated text.
 	 */
-	returnFullText?: boolean;
+	return_full_text?: boolean;
 	/**
 	 * Stop generating tokens if a member of `stop_sequences` is generated.
 	 */
-	stopSequences?: string[];
+	stop_sequences?: string[];
 	/**
 	 * The value used to modulate the logits distribution.
 	 */
@@ -51,12 +53,12 @@ export interface TextGenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to < 1, only the smallest set of most probable tokens with probabilities that add
 	 * up to `top_p` or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Truncate input tokens to the given size.
 	 */
@@ -65,21 +67,22 @@ export interface TextGenerationParameters {
 	 * Typical Decoding mass. See [Typical Decoding for Natural Language
 	 * Generation](https://hf.co/papers/2202.00666) for more information
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
 	 */
 	watermark?: boolean;
 	[property: string]: unknown;
 }
-export type TextGenerationOutput = TextGenerationOutputElement[];
+
 /**
  * Outputs for Text Generation inference
  */
-export interface TextGenerationOutputElement {
+export interface TextGenerationOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text-generation/spec/input.json b/packages/tasks/src/tasks/text-generation/spec/input.json
index 2235616913..6caf359bf6 100644
--- a/packages/tasks/src/tasks/text-generation/spec/input.json
+++ b/packages/tasks/src/tasks/text-generation/spec/input.json
@@ -20,23 +20,23 @@
 			"description": "Additional inference parameters for Text Generation",
 			"type": "object",
 			"properties": {
-				"doSample": {
+				"do_sample": {
 					"type": "boolean",
 					"description": "Whether to use logit sampling (true) or greedy search (false)."
 				},
-				"maxNewTokens": {
+				"max_new_tokens": {
 					"type": "integer",
 					"description": "Maximum number of generated tokens."
 				},
-				"repetitionPenalty": {
+				"repetition_penalty": {
 					"type": "number",
 					"description": "The parameter for repetition penalty. A value of 1.0 means no penalty. See [this paper](https://hf.co/papers/1909.05858) for more details."
 				},
-				"returnFullText": {
+				"return_full_text": {
 					"type": "boolean",
 					"description": "Whether to prepend the prompt to the generated text."
 				},
-				"stopSequences": {
+				"stop_sequences": {
 					"type": "array",
 					"items": {
 						"type": "string"
@@ -47,11 +47,11 @@
 					"type": "number",
 					"description": "The value used to modulate the logits distribution."
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
 				},
-				"topP": {
+				"top_p": {
 					"type": "number",
 					"description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
 				},
@@ -59,7 +59,7 @@
 					"type": "integer",
 					"description": "Truncate input tokens to the given size."
 				},
-				"typicalP": {
+				"typical_p": {
 					"type": "number",
 					"description": "Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://hf.co/papers/2202.00666) for more information"
 				},
diff --git a/packages/tasks/src/tasks/text-generation/spec/output.json b/packages/tasks/src/tasks/text-generation/spec/output.json
index eacb907e2c..b38bc8be30 100644
--- a/packages/tasks/src/tasks/text-generation/spec/output.json
+++ b/packages/tasks/src/tasks/text-generation/spec/output.json
@@ -3,15 +3,12 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs for Text Generation inference",
 	"title": "TextGenerationOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"generatedText": {
-				"type": "string",
-				"description": "The generated text"
-			}
-		},
-		"required": ["generatedText"]
-	}
+	"type": "object",
+	"properties": {
+		"generated_text": {
+			"type": "string",
+			"description": "The generated text"
+		}
+	},
+	"required": ["generatedText"]
 }
diff --git a/packages/tasks/src/tasks/text-to-audio/inference.ts b/packages/tasks/src/tasks/text-to-audio/inference.ts
index 14c484bf2f..e149a551b9 100644
--- a/packages/tasks/src/tasks/text-to-audio/inference.ts
+++ b/packages/tasks/src/tasks/text-to-audio/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Text To Audio inference
  */
@@ -17,6 +18,7 @@ export interface TextToAudioInput {
 	parameters?: TextToAudioParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -29,6 +31,7 @@ export interface TextToAudioParameters {
 	generate?: GenerationParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Parametrization of the text generation process
  *
@@ -38,18 +41,18 @@ export interface GenerationParameters {
 	/**
 	 * Whether to use sampling instead of greedy decoding when generating new tokens.
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Controls the stopping condition for beam-based methods.
 	 */
-	earlyStopping?: EarlyStoppingUnion;
+	early_stopping?: EarlyStoppingUnion;
 	/**
 	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
 	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
 	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
 	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
 	 */
-	epsilonCutoff?: number;
+	epsilon_cutoff?: number;
 	/**
 	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
 	 * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -59,37 +62,37 @@ export interface GenerationParameters {
 	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
 	 * for more details.
 	 */
-	etaCutoff?: number;
+	eta_cutoff?: number;
 	/**
 	 * The maximum length (in tokens) of the generated text, including the input.
 	 */
-	maxLength?: number;
+	max_length?: number;
 	/**
 	 * The maximum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The minimum length (in tokens) of the generated text, including the input.
 	 */
-	minLength?: number;
+	min_length?: number;
 	/**
 	 * The minimum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	minNewTokens?: number;
+	min_new_tokens?: number;
 	/**
 	 * Number of groups to divide num_beams into in order to ensure diversity among different
 	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
 	 */
-	numBeamGroups?: number;
+	num_beam_groups?: number;
 	/**
 	 * Number of beams to use for beam search.
 	 */
-	numBeams?: number;
+	num_beams?: number;
 	/**
 	 * The value balances the model confidence and the degeneration penalty in contrastive
 	 * search decoding.
 	 */
-	penaltyAlpha?: number;
+	penalty_alpha?: number;
 	/**
 	 * The value used to modulate the next token probabilities.
 	 */
@@ -97,12 +100,12 @@ export interface GenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
 	 * that add up to top_p or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Local typicality measures how similar the conditional probability of predicting a target
 	 * token next is to the expected conditional probability of predicting a random token next,
@@ -110,29 +113,31 @@ export interface GenerationParameters {
 	 * most locally typical tokens with probabilities that add up to typical_p or higher are
 	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Whether the model should use the past last key/values attentions to speed up decoding
 	 */
-	useCache?: boolean;
+	use_cache?: boolean;
 	[property: string]: unknown;
 }
+
 /**
  * Controls the stopping condition for beam-based methods.
  */
 export type EarlyStoppingUnion = boolean | "never";
-export type TextToAudioOutput = TextToAudioOutputElement[];
+
 /**
  * Outputs of inference for the Text To Audio task
  */
-export interface TextToAudioOutputElement {
+export interface TextToAudioOutput {
 	/**
 	 * The generated audio waveform.
 	 */
 	audio: unknown;
+	samplingRate: unknown;
 	/**
 	 * The sampling rate of the generated audio waveform.
 	 */
-	samplingRate: number;
+	sampling_rate?: number;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text-to-audio/spec/output.json b/packages/tasks/src/tasks/text-to-audio/spec/output.json
index b0a25bd9ad..c171d62bff 100644
--- a/packages/tasks/src/tasks/text-to-audio/spec/output.json
+++ b/packages/tasks/src/tasks/text-to-audio/spec/output.json
@@ -3,18 +3,15 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Text To Audio task",
 	"title": "TextToAudioOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"audio": {
-				"description": "The generated audio waveform."
-			},
-			"samplingRate": {
-				"type": "number",
-				"description": "The sampling rate of the generated audio waveform."
-			}
+	"type": "object",
+	"properties": {
+		"audio": {
+			"description": "The generated audio waveform."
 		},
-		"required": ["audio", "samplingRate"]
-	}
+		"sampling_rate": {
+			"type": "number",
+			"description": "The sampling rate of the generated audio waveform."
+		}
+	},
+	"required": ["audio", "samplingRate"]
 }
diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts
index c25031b29e..d9f0b2efec 100644
--- a/packages/tasks/src/tasks/text-to-image/inference.ts
+++ b/packages/tasks/src/tasks/text-to-image/inference.ts
@@ -29,16 +29,16 @@ export interface TextToImageParameters {
 	 * For diffusion models. A higher guidance scale value encourages the model to generate
 	 * images closely linked to the text prompt at the expense of lower image quality.
 	 */
-	guidanceScale?: number;
+	guidance_scale?: number;
 	/**
 	 * One or several prompt to guide what NOT to include in image generation.
 	 */
-	negativePrompt?: string[];
+	negative_prompt?: string[];
 	/**
 	 * For diffusion models. The number of denoising steps. More denoising steps usually lead to
 	 * a higher quality image at the expense of slower inference.
 	 */
-	numInferenceSteps?: number;
+	num_inference_steps?: number;
 	/**
 	 * For diffusion models. Override the scheduler with a compatible one
 	 */
@@ -46,7 +46,7 @@ export interface TextToImageParameters {
 	/**
 	 * The size in pixel of the output image
 	 */
-	targetSize?: TargetSize;
+	target_size?: TargetSize;
 	[property: string]: unknown;
 }
 
@@ -62,9 +62,7 @@ export interface TargetSize {
 /**
  * Outputs of inference for the Text To Image task
  */
-export type TextToImageOutput = unknown[] | boolean | number | number | null | TextToImageOutputObject | string;
-
-export interface TextToImageOutputObject {
+export interface TextToImageOutput {
 	/**
 	 * The generated image
 	 */
diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json
index cb1e1c6cf6..2cb5e8ad08 100644
--- a/packages/tasks/src/tasks/text-to-image/spec/input.json
+++ b/packages/tasks/src/tasks/text-to-image/spec/input.json
@@ -20,20 +20,22 @@
 			"description": "Additional inference parameters for Text To Image",
 			"type": "object",
 			"properties": {
-				"guidanceScale": {
+				"guidance_scale": {
 					"type": "number",
 					"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
 				},
-				"negativePrompt": {
+				"negative_prompt": {
 					"type": "array",
-					"items": { "type": "string" },
+					"items": {
+						"type": "string"
+					},
 					"description": "One or several prompt to guide what NOT to include in image generation."
 				},
-				"numInferenceSteps": {
+				"num_inference_steps": {
 					"type": "integer",
 					"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
 				},
-				"targetSize": {
+				"target_size": {
 					"type": "object",
 					"description": "The size in pixel of the output image",
 					"properties": {
diff --git a/packages/tasks/src/tasks/text-to-image/spec/output.json b/packages/tasks/src/tasks/text-to-image/spec/output.json
index 5ab3ee7879..ff952a3a36 100644
--- a/packages/tasks/src/tasks/text-to-image/spec/output.json
+++ b/packages/tasks/src/tasks/text-to-image/spec/output.json
@@ -3,13 +3,11 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Text To Image task",
 	"title": "TextToImageOutput",
-	"type": "array",
-	"items": {
-		"properties": {
-			"image": {
-				"description": "The generated image"
-			}
-		},
-		"required": ["image"]
-	}
+	"type": "object",
+	"properties": {
+		"image": {
+			"description": "The generated image"
+		}
+	},
+	"required": ["image"]
 }
diff --git a/packages/tasks/src/tasks/text-to-speech/inference.ts b/packages/tasks/src/tasks/text-to-speech/inference.ts
index f67e03652a..84e28a9e9a 100644
--- a/packages/tasks/src/tasks/text-to-speech/inference.ts
+++ b/packages/tasks/src/tasks/text-to-speech/inference.ts
@@ -43,18 +43,18 @@ export interface GenerationParameters {
 	/**
 	 * Whether to use sampling instead of greedy decoding when generating new tokens.
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Controls the stopping condition for beam-based methods.
 	 */
-	earlyStopping?: EarlyStoppingUnion;
+	early_stopping?: EarlyStoppingUnion;
 	/**
 	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
 	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
 	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
 	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
 	 */
-	epsilonCutoff?: number;
+	epsilon_cutoff?: number;
 	/**
 	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
 	 * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -64,37 +64,37 @@ export interface GenerationParameters {
 	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
 	 * for more details.
 	 */
-	etaCutoff?: number;
+	eta_cutoff?: number;
 	/**
 	 * The maximum length (in tokens) of the generated text, including the input.
 	 */
-	maxLength?: number;
+	max_length?: number;
 	/**
 	 * The maximum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The minimum length (in tokens) of the generated text, including the input.
 	 */
-	minLength?: number;
+	min_length?: number;
 	/**
 	 * The minimum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	minNewTokens?: number;
+	min_new_tokens?: number;
 	/**
 	 * Number of groups to divide num_beams into in order to ensure diversity among different
 	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
 	 */
-	numBeamGroups?: number;
+	num_beam_groups?: number;
 	/**
 	 * Number of beams to use for beam search.
 	 */
-	numBeams?: number;
+	num_beams?: number;
 	/**
 	 * The value balances the model confidence and the degeneration penalty in contrastive
 	 * search decoding.
 	 */
-	penaltyAlpha?: number;
+	penalty_alpha?: number;
 	/**
 	 * The value used to modulate the next token probabilities.
 	 */
@@ -102,12 +102,12 @@ export interface GenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
 	 * that add up to top_p or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Local typicality measures how similar the conditional probability of predicting a target
 	 * token next is to the expected conditional probability of predicting a random token next,
@@ -115,11 +115,11 @@ export interface GenerationParameters {
 	 * most locally typical tokens with probabilities that add up to typical_p or higher are
 	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Whether the model should use the past last key/values attentions to speed up decoding
 	 */
-	useCache?: boolean;
+	use_cache?: boolean;
 	[property: string]: unknown;
 }
 
@@ -138,9 +138,10 @@ export interface TextToSpeechOutput {
 	 * The generated audio waveform.
 	 */
 	audio: unknown;
+	samplingRate: unknown;
 	/**
 	 * The sampling rate of the generated audio waveform.
 	 */
-	samplingRate: number;
+	sampling_rate?: number;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text2text-generation/inference.ts b/packages/tasks/src/tasks/text2text-generation/inference.ts
index 788845dd24..0fa4376d6a 100644
--- a/packages/tasks/src/tasks/text2text-generation/inference.ts
+++ b/packages/tasks/src/tasks/text2text-generation/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Text2text Generation inference
  */
@@ -17,6 +18,7 @@ export interface Text2TextGenerationInput {
 	parameters?: Text2TextGenerationParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -26,28 +28,28 @@ export interface Text2TextGenerationParameters {
 	/**
 	 * Whether to clean up the potential extra spaces in the text output.
 	 */
-	cleanUpTokenizationSpaces?: boolean;
+	clean_up_tokenization_spaces?: boolean;
 	/**
 	 * Additional parametrization of the text generation algorithm
 	 */
-	generateParameters?: {
-		[key: string]: unknown;
-	};
+	generate_parameters?: { [key: string]: unknown };
 	/**
 	 * The truncation strategy to use
 	 */
 	truncation?: Text2TextGenerationTruncationStrategy;
 	[property: string]: unknown;
 }
+
 export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
-export type Text2TextGenerationOutput = Text2TextGenerationOutputElement[];
+
 /**
  * Outputs of inference for the Text2text Generation task
  */
-export interface Text2TextGenerationOutputElement {
+export interface Text2TextGenerationOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text.
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text2text-generation/spec/input.json b/packages/tasks/src/tasks/text2text-generation/spec/input.json
index a00ae575fc..7a17bbe160 100644
--- a/packages/tasks/src/tasks/text2text-generation/spec/input.json
+++ b/packages/tasks/src/tasks/text2text-generation/spec/input.json
@@ -20,7 +20,7 @@
 			"description": "Additional inference parameters for Text2text Generation",
 			"type": "object",
 			"properties": {
-				"cleanUpTokenizationSpaces": {
+				"clean_up_tokenization_spaces": {
 					"type": "boolean",
 					"description": "Whether to clean up the potential extra spaces in the text output."
 				},
@@ -43,7 +43,7 @@
 						}
 					]
 				},
-				"generateParameters": {
+				"generate_parameters": {
 					"title": "generateParameters",
 					"type": "object",
 					"description": "Additional parametrization of the text generation algorithm"
diff --git a/packages/tasks/src/tasks/text2text-generation/spec/output.json b/packages/tasks/src/tasks/text2text-generation/spec/output.json
index f60ba8933e..0da61f103d 100644
--- a/packages/tasks/src/tasks/text2text-generation/spec/output.json
+++ b/packages/tasks/src/tasks/text2text-generation/spec/output.json
@@ -3,15 +3,12 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Text2text Generation task",
 	"title": "Text2TextGenerationOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"generatedText": {
-				"type": "string",
-				"description": "The generated text."
-			}
-		},
-		"required": ["generatedText"]
-	}
+	"type": "object",
+	"properties": {
+		"generated_text": {
+			"type": "string",
+			"description": "The generated text."
+		}
+	},
+	"required": ["generatedText"]
 }
diff --git a/packages/tasks/src/tasks/token-classification/inference.ts b/packages/tasks/src/tasks/token-classification/inference.ts
index 7a8da8dcfc..1278cbe38a 100644
--- a/packages/tasks/src/tasks/token-classification/inference.ts
+++ b/packages/tasks/src/tasks/token-classification/inference.ts
@@ -26,11 +26,11 @@ export interface TokenClassificationParameters {
 	/**
 	 * The strategy used to fuse tokens based on model predictions
 	 */
-	aggregationStrategy?: TokenClassificationAggregationStrategy;
+	aggregation_strategy?: TokenClassificationAggregationStrategy;
 	/**
 	 * A list of labels to ignore
 	 */
-	ignoreLabels?: string[];
+	ignore_labels?: string[];
 	/**
 	 * The number of overlapping tokens between chunks when splitting the input text.
 	 */
@@ -64,7 +64,7 @@ export interface TokenClassificationOutputElement {
 	/**
 	 * The predicted label for that group of tokens
 	 */
-	entityGroup?: string;
+	entity_group?: string;
 	label: unknown;
 	/**
 	 * The associated score / probability
diff --git a/packages/tasks/src/tasks/token-classification/spec/input.json b/packages/tasks/src/tasks/token-classification/spec/input.json
index 2fd89ce34c..fde3066404 100644
--- a/packages/tasks/src/tasks/token-classification/spec/input.json
+++ b/packages/tasks/src/tasks/token-classification/spec/input.json
@@ -20,7 +20,7 @@
 			"description": "Additional inference parameters for Token Classification",
 			"type": "object",
 			"properties": {
-				"ignoreLabels": {
+				"ignore_labels": {
 					"type": "array",
 					"items": {
 						"type": "string"
@@ -31,7 +31,7 @@
 					"type": "integer",
 					"description": "The number of overlapping tokens between chunks when splitting the input text."
 				},
-				"aggregationStrategy": {
+				"aggregation_strategy": {
 					"title": "TokenClassificationAggregationStrategy",
 					"type": "string",
 					"description": "The strategy used to fuse tokens based on model predictions",
diff --git a/packages/tasks/src/tasks/token-classification/spec/output.json b/packages/tasks/src/tasks/token-classification/spec/output.json
index 8522d972a2..95bdc06f53 100644
--- a/packages/tasks/src/tasks/token-classification/spec/output.json
+++ b/packages/tasks/src/tasks/token-classification/spec/output.json
@@ -7,7 +7,7 @@
 	"items": {
 		"type": "object",
 		"properties": {
-			"entityGroup": {
+			"entity_group": {
 				"type": "string",
 				"description": "The predicted label for that group of tokens"
 			},
diff --git a/packages/tasks/src/tasks/translation/inference.ts b/packages/tasks/src/tasks/translation/inference.ts
index c932617a40..8f232a736d 100644
--- a/packages/tasks/src/tasks/translation/inference.ts
+++ b/packages/tasks/src/tasks/translation/inference.ts
@@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters {
 	/**
 	 * Whether to clean up the potential extra spaces in the text output.
 	 */
-	cleanUpTokenizationSpaces?: boolean;
+	clean_up_tokenization_spaces?: boolean;
 	/**
 	 * Additional parametrization of the text generation algorithm
 	 */
-	generateParameters?: { [key: string]: unknown };
+	generate_parameters?: { [key: string]: unknown };
 	/**
 	 * The truncation strategy to use
 	 */
@@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest
  * Outputs of inference for the Text2text Generation task
  */
 export interface TranslationOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text.
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/video-classification/inference.ts b/packages/tasks/src/tasks/video-classification/inference.ts
index 1f765160f3..557222ceb3 100644
--- a/packages/tasks/src/tasks/video-classification/inference.ts
+++ b/packages/tasks/src/tasks/video-classification/inference.ts
@@ -26,16 +26,16 @@ export interface VideoClassificationParameters {
 	/**
 	 * The sampling rate used to select frames from the video.
 	 */
-	frameSamplingRate?: number;
-	functionToApply?: ClassificationOutputTransform;
+	frame_sampling_rate?: number;
+	function_to_apply?: ClassificationOutputTransform;
 	/**
 	 * The number of sampled frames to consider for classification.
 	 */
-	numFrames?: number;
+	num_frames?: number;
 	/**
 	 * When specified, limits the output to the top K most probable classes.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 /**
diff --git a/packages/tasks/src/tasks/video-classification/spec/input.json b/packages/tasks/src/tasks/video-classification/spec/input.json
index 984670953b..78e312883c 100644
--- a/packages/tasks/src/tasks/video-classification/spec/input.json
+++ b/packages/tasks/src/tasks/video-classification/spec/input.json
@@ -19,19 +19,19 @@
 			"description": "Additional inference parameters for Video Classification",
 			"type": "object",
 			"properties": {
-				"functionToApply": {
+				"function_to_apply": {
 					"title": "TextClassificationOutputTransform",
 					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
 				},
-				"numFrames": {
+				"num_frames": {
 					"type": "integer",
 					"description": "The number of sampled frames to consider for classification."
 				},
-				"frameSamplingRate": {
+				"frame_sampling_rate": {
 					"type": "integer",
 					"description": "The sampling rate used to select frames from the video."
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When specified, limits the output to the top K most probable classes."
 				}
diff --git a/packages/tasks/src/tasks/visual-question-answering/inference.ts b/packages/tasks/src/tasks/visual-question-answering/inference.ts
index 0eb513ebf6..bc1b410ad5 100644
--- a/packages/tasks/src/tasks/visual-question-answering/inference.ts
+++ b/packages/tasks/src/tasks/visual-question-answering/inference.ts
@@ -42,7 +42,7 @@ export interface VisualQuestionAnsweringParameters {
 	 * return less than topk answers if there are not enough options available within the
 	 * context.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 export type VisualQuestionAnsweringOutput = VisualQuestionAnsweringOutputElement[];
diff --git a/packages/tasks/src/tasks/visual-question-answering/spec/input.json b/packages/tasks/src/tasks/visual-question-answering/spec/input.json
index b6cb0e123c..2523ae7f27 100644
--- a/packages/tasks/src/tasks/visual-question-answering/spec/input.json
+++ b/packages/tasks/src/tasks/visual-question-answering/spec/input.json
@@ -30,7 +30,7 @@
 			"description": "Additional inference parameters for Visual Question Answering",
 			"type": "object",
 			"properties": {
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
 				}
diff --git a/packages/tasks/src/tasks/zero-shot-classification/inference.ts b/packages/tasks/src/tasks/zero-shot-classification/inference.ts
index e0b43ec70b..ba79520e84 100644
--- a/packages/tasks/src/tasks/zero-shot-classification/inference.ts
+++ b/packages/tasks/src/tasks/zero-shot-classification/inference.ts
@@ -41,13 +41,13 @@ export interface ZeroShotClassificationParameters {
 	 * The sentence used in conjunction with candidateLabels to attempt the text classification
 	 * by replacing the placeholder with the candidate labels.
 	 */
-	hypothesisTemplate?: string;
+	hypothesis_template?: string;
 	/**
 	 * Whether multiple candidate labels can be true. If false, the scores are normalized such
 	 * that the sum of the label likelihoods for each sequence is 1. If true, the labels are
 	 * considered independent and probabilities are normalized for each candidate.
 	 */
-	multiLabel?: boolean;
+	multi_label?: boolean;
 	[property: string]: unknown;
 }
 export type ZeroShotClassificationOutput = ZeroShotClassificationOutputElement[];
diff --git a/packages/tasks/src/tasks/zero-shot-classification/spec/input.json b/packages/tasks/src/tasks/zero-shot-classification/spec/input.json
index 689c22769c..c64db564fc 100644
--- a/packages/tasks/src/tasks/zero-shot-classification/spec/input.json
+++ b/packages/tasks/src/tasks/zero-shot-classification/spec/input.json
@@ -35,11 +35,11 @@
 			"description": "Additional inference parameters for Zero Shot Classification",
 			"type": "object",
 			"properties": {
-				"hypothesisTemplate": {
+				"hypothesis_template": {
 					"type": "string",
 					"description": "The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels."
 				},
-				"multiLabel": {
+				"multi_label": {
 					"type": "boolean",
 					"description": "Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of the label likelihoods for each sequence is 1. If true, the labels are considered independent and probabilities are normalized for each candidate."
 				}
diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts b/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts
index 2bea5436b8..1fedd98216 100644
--- a/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts
+++ b/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts
@@ -41,7 +41,7 @@ export interface ZeroShotImageClassificationParameters {
 	 * The sentence used in conjunction with candidateLabels to attempt the text classification
 	 * by replacing the placeholder with the candidate labels.
 	 */
-	hypothesisTemplate?: string;
+	hypothesis_template?: string;
 	[property: string]: unknown;
 }
 export type ZeroShotImageClassificationOutput = ZeroShotImageClassificationOutputElement[];
diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json b/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json
index d5b212918f..f0068fd80b 100644
--- a/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json
+++ b/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json
@@ -34,7 +34,7 @@
 			"description": "Additional inference parameters for Zero Shot Image Classification",
 			"type": "object",
 			"properties": {
-				"hypothesisTemplate": {
+				"hypothesis_template": {
 					"type": "string",
 					"description": "The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels."
 				}