diff --git a/.github/README.jp.md b/.github/README.jp.md index 9d64e559d..a160eebf7 100644 --- a/.github/README.jp.md +++ b/.github/README.jp.md @@ -148,6 +148,7 @@ AIゲートウェイは、すべての他のプロバイダーとモデルに対 | | [AI21](https://portkey.ai/docs/welcome/integration-guides) | ✅ | ✅ | | | [Stability AI](https://portkey.ai/docs/welcome/integration-guides/stability-ai) | ✅ | ✅ | | | [DeepInfra](https://portkey.ai/docs/welcome/integration-guides) | ✅ | ✅ | +| HPC-AI | [HPC-AI](https://www.hpc-ai.com/) | ✅ | ✅ | | | [Ollama](https://portkey.ai/docs/welcome/integration-guides/ollama) | ✅ | ✅ | | | Novita AI | ✅ | ✅ | `/chat/completions`, `/completions` | diff --git a/README.md b/README.md index 199b744ca..4e618f38b 100644 --- a/README.md +++ b/README.md @@ -260,11 +260,15 @@ Explore Gateway integrations with [45+ providers](https://portkey.wiki/gh-59) an | | [AI21](https://portkey.wiki/gh-91) | ✅ | ✅ | | | [Stability AI](https://portkey.wiki/gh-71) | ✅ | ✅ | | | [DeepInfra](https://portkey.sh/gh-92) | ✅ | ✅ | +| HPC-AI | [HPC-AI](https://www.hpc-ai.com/) | ✅ | ✅ | | | [Ollama](https://portkey.wiki/gh-72) | ✅ | ✅ | | | [Novita AI](https://portkey.wiki/gh-73) | ✅ | ✅ | `/chat/completions`, `/completions` | > [View the complete list of 200+ supported models here](https://portkey.wiki/gh-74) + +**HPC-AI:** Use `provider: "hpc-ai"` with your HPC-AI API key. The gateway targets `https://api.hpc-ai.com/inference/v1` (OpenAI-compatible chat completions). Override the base URL with `x-portkey-custom-host` or `custom_host` in config if required. Example models: `minimax/minimax-m2.5`, `moonshotai/kimi-k2.5`. For local integration tests, optional env vars: `HPC_AI_API_KEY`, `HPC_AI_BASE_URL` (set these in your environment or `.env` file as appropriate). +

diff --git a/src/data/models.json b/src/data/models.json index 6373438e0..e85c8742e 100644 --- a/src/data/models.json +++ b/src/data/models.json @@ -26,6 +26,22 @@ }, "name": "Axon Code" }, + { + "id": "minimax/minimax-m2.5", + "object": "model", + "provider": { + "id": "hpc-ai" + }, + "name": "MiniMax M2.5" + }, + { + "id": "moonshotai/kimi-k2.5", + "object": "model", + "provider": { + "id": "hpc-ai" + }, + "name": "Kimi K2.5" + }, { "id": "meta-llama/llama-3.1-70b-instruct/fp-8", "object": "model", diff --git a/src/globals.ts b/src/globals.ts index 4d6e327e4..583be9a86 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -97,6 +97,7 @@ export const LEPTON: string = 'lepton'; export const KLUSTER_AI: string = 'kluster-ai'; export const NSCALE: string = 'nscale'; export const HYPERBOLIC: string = 'hyperbolic'; +export const HPC_AI: string = 'hpc-ai'; export const BYTEZ: string = 'bytez'; export const FEATHERLESS_AI: string = 'featherless-ai'; export const KRUTRIM: string = 'krutrim'; @@ -173,6 +174,7 @@ export const VALID_PROVIDERS = [ KLUSTER_AI, NSCALE, HYPERBOLIC, + HPC_AI, BYTEZ, FEATHERLESS_AI, KRUTRIM, diff --git a/src/providers/hpc-ai/api.ts b/src/providers/hpc-ai/api.ts new file mode 100644 index 000000000..588e1155d --- /dev/null +++ b/src/providers/hpc-ai/api.ts @@ -0,0 +1,29 @@ +import { ProviderAPIConfig } from '../types'; + +const DEFAULT_HPC_AI_BASE_URL = 'https://api.hpc-ai.com/inference/v1'; + +const HpcAiApiConfig: ProviderAPIConfig = { + getBaseURL: ({ providerOptions }) => { + const fromEnv = + typeof process !== 'undefined' && process.env?.HPC_AI_BASE_URL + ? process.env.HPC_AI_BASE_URL + : ''; + return providerOptions.customHost || fromEnv || DEFAULT_HPC_AI_BASE_URL; + }, + headers: ({ providerOptions }) => { + return { + Authorization: `Bearer ${providerOptions.apiKey}`, + }; + }, + getEndpoint: ({ fn }) => { + switch (fn) { + case 'chatComplete': + case 'stream-chatComplete': + return '/chat/completions'; + default: + return ''; + } + }, +}; + +export default HpcAiApiConfig; diff --git a/src/providers/hpc-ai/chatComplete.ts b/src/providers/hpc-ai/chatComplete.ts new file mode 100644 index 000000000..c58c62d89 --- /dev/null +++ b/src/providers/hpc-ai/chatComplete.ts @@ -0,0 +1,230 @@ +import { HPC_AI } from '../../globals'; +import { Params } from '../../types/requestBody'; +import { + ChatCompletionResponse, + ErrorResponse, + ProviderConfig, +} from '../types'; +import { + generateErrorResponse, + generateInvalidProviderResponseError, +} from '../utils'; + +export const HpcAiChatCompleteConfig: ProviderConfig = { + model: { + param: 'model', + required: true, + default: 'minimax/minimax-m2.5', + }, + messages: { + param: 'messages', + required: true, + default: [], + transform: (params: Params) => { + return params.messages?.map((message) => { + if (message.role === 'developer') return { ...message, role: 'system' }; + return message; + }); + }, + }, + frequency_penalty: { + param: 'frequency_penalty', + default: 0, + min: -2, + max: 2, + }, + max_tokens: { + param: 'max_tokens', + default: 100, + min: 1, + }, + max_completion_tokens: { + param: 'max_tokens', + default: 100, + min: 1, + }, + n: { + param: 'n', + default: 1, + min: 1, + max: 1, + }, + presence_penalty: { + param: 'presence_penalty', + min: -2, + max: 2, + default: 0, + }, + temperature: { + param: 'temperature', + default: 1, + min: 0, + max: 2, + }, + top_p: { + param: 'top_p', + default: 1, + min: 0, + max: 1, + }, + stop: { + param: 'stop', + default: null, + }, + stream: { + param: 'stream', + default: false, + }, +}; + +interface HpcAiChatCompleteResponse extends ChatCompletionResponse { + id: string; + object: string; + created: number; + model: string; + usage: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; +} + +export interface HpcAiDetailErrorResponse { + detail: { + loc: string[]; + msg: string; + type: string; + }[]; +} + +interface HpcAiStreamChunk { + id: string; + object: string; + created: number; + model: string; + choices: { + delta: { + role?: string | null; + content?: string; + }; + index: number; + finish_reason: string | null; + }[]; + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; +} + +export const HpcAiChatCompleteResponseTransform: ( + response: + | HpcAiChatCompleteResponse + | HpcAiDetailErrorResponse + | ErrorResponse, + responseStatus: number +) => ChatCompletionResponse | ErrorResponse = (response, responseStatus) => { + if (responseStatus !== 200 && 'error' in response && response.error) { + return generateErrorResponse( + { + ...response.error, + }, + HPC_AI + ); + } + + if ( + 'detail' in response && + responseStatus !== 200 && + Array.isArray(response.detail) && + response.detail.length + ) { + let firstError: Record | undefined; + let errorField: string | null = null; + let errorMessage: string | undefined; + let errorType: string | null = null; + + [firstError] = response.detail; + errorField = firstError?.loc?.join('.') ?? ''; + errorMessage = firstError.msg; + errorType = firstError.type; + + return generateErrorResponse( + { + message: `${errorField ? `${errorField}: ` : ''}${errorMessage}`, + type: errorType, + param: null, + code: null, + }, + HPC_AI + ); + } + + if ('choices' in response) { + return { + id: response.id, + object: response.object, + created: response.created, + model: response.model, + provider: HPC_AI, + choices: response.choices.map((c) => ({ + index: c.index, + message: { + role: c.message.role, + content: c.message.content, + }, + finish_reason: c.finish_reason, + })), + usage: { + prompt_tokens: response.usage?.prompt_tokens, + completion_tokens: response.usage?.completion_tokens, + total_tokens: response.usage?.total_tokens, + }, + }; + } + + return generateInvalidProviderResponseError(response, HPC_AI); +}; + +export const HpcAiChatCompleteStreamChunkTransform: ( + response: string +) => string = (responseChunk) => { + if ( + responseChunk.match( + /^:\s*ping\s*-\s*\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{6}\+\d{2}:\d{2}$/ + ) + ) { + return ''; + } + + let chunk = responseChunk.trim(); + chunk = chunk.replace(/^data: /, ''); + chunk = chunk.trim(); + if (chunk === '[DONE]') { + return `data: ${chunk}\n\n`; + } + const parsedChunk: HpcAiStreamChunk = JSON.parse(chunk); + return ( + `data: ${JSON.stringify({ + id: parsedChunk.id, + object: parsedChunk.object, + created: parsedChunk.created, + model: parsedChunk.model, + provider: HPC_AI, + choices: [ + { + index: parsedChunk.choices[0].index, + delta: parsedChunk.choices[0].delta, + finish_reason: parsedChunk.choices[0].finish_reason, + }, + ], + usage: parsedChunk.usage + ? { + prompt_tokens: parsedChunk.usage.prompt_tokens, + completion_tokens: parsedChunk.usage.completion_tokens, + total_tokens: parsedChunk.usage.total_tokens, + } + : undefined, + })}` + '\n\n' + ); +}; diff --git a/src/providers/hpc-ai/index.ts b/src/providers/hpc-ai/index.ts new file mode 100644 index 000000000..a9e976f3d --- /dev/null +++ b/src/providers/hpc-ai/index.ts @@ -0,0 +1,18 @@ +import { ProviderConfigs } from '../types'; +import HpcAiApiConfig from './api'; +import { + HpcAiChatCompleteConfig, + HpcAiChatCompleteResponseTransform, + HpcAiChatCompleteStreamChunkTransform, +} from './chatComplete'; + +const HpcAiConfig: ProviderConfigs = { + chatComplete: HpcAiChatCompleteConfig, + api: HpcAiApiConfig, + responseTransforms: { + chatComplete: HpcAiChatCompleteResponseTransform, + 'stream-chatComplete': HpcAiChatCompleteStreamChunkTransform, + }, +}; + +export default HpcAiConfig; diff --git a/src/providers/index.ts b/src/providers/index.ts index 2cd5355f8..f244a7194 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -60,6 +60,7 @@ import LeptonConfig from './lepton'; import KlusterAIConfig from './kluster-ai'; import NscaleConfig from './nscale'; import HyperbolicConfig from './hyperbolic'; +import HpcAiConfig from './hpc-ai'; import { FeatherlessAIConfig } from './featherless-ai'; import KrutrimConfig from './krutrim'; import AI302Config from './302ai'; @@ -133,6 +134,7 @@ const Providers: { [key: string]: ProviderConfigs } = { 'kluster-ai': KlusterAIConfig, nscale: NscaleConfig, hyperbolic: HyperbolicConfig, + 'hpc-ai': HpcAiConfig, bytez: BytezConfig, 'featherless-ai': FeatherlessAIConfig, krutrim: KrutrimConfig, diff --git a/src/public/index.html b/src/public/index.html index 9bd7e77e2..2c5bc7786 100644 --- a/src/public/index.html +++ b/src/public/index.html @@ -1109,6 +1109,7 @@

Select Provider

+ @@ -1460,6 +1461,7 @@

Enter API Key

"openai": "gpt-4o-mini", "anthropic": "claude-3-5-sonnet-20240620", "groq": "llama3-70b-8192", + "hpc-ai": "minimax/minimax-m2.5", "bedrock": "anthropic.claude-3-sonnet-20240229-v1:0", "azure-openai": "gpt-4o-mini", "cohere": "command-r-plus", @@ -1474,6 +1476,7 @@

Enter API Key

"openai": "https://portkey.ai/docs/integrations/llms/openai", "anthropic": "https://portkey.ai/docs/integrations/llms/anthropic", "groq": "https://portkey.ai/docs/integrations/llms/groq", + "hpc-ai": "https://portkey.ai/docs/integrations/llms/hpc-ai", "bedrock": "https://portkey.ai/docs/integrations/llms/aws-bedrock", "azure-openai": "https://portkey.ai/docs/integrations/llms/azure-openai", "cohere": "https://portkey.ai/docs/integrations/llms/cohere", diff --git a/src/tests/resources/testVariables.ts b/src/tests/resources/testVariables.ts index 2c5047dc7..b270819b2 100644 --- a/src/tests/resources/testVariables.ts +++ b/src/tests/resources/testVariables.ts @@ -62,6 +62,10 @@ const testVariables: TestVariables = { apiKey: process.env.DEEPINFRA_API_KEY, chatCompletions: { model: 'meta-llama/Meta-Llama-3-8B-Instruct' }, }, + 'hpc-ai': { + apiKey: process.env.HPC_AI_API_KEY, + chatCompletions: { model: 'minimax/minimax-m2.5' }, + }, 'stability-ai': { apiKey: process.env.STABILITY_AI_API_KEY, chatCompletions: { model: '' },