Create app.py
parent
4a7d5455f0
commit
2744f97284
@ -0,0 +1,30 @@
|
||||
name: CI/CD
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install grpcio grpcio-tools torchserve
|
||||
|
||||
- name: Build Docker image
|
||||
run: docker build -t myapp .
|
||||
|
||||
- name: Run Docker container
|
||||
run: docker run myapp
|
@ -0,0 +1,32 @@
|
||||
stages:
|
||||
- build
|
||||
- deploy
|
||||
|
||||
variables:
|
||||
DOCKER_HOST: tcp://docker:2375/
|
||||
DOCKER_DRIVER: overlay2
|
||||
|
||||
services:
|
||||
- docker:dind
|
||||
|
||||
build:
|
||||
stage: build
|
||||
image: docker:latest
|
||||
script:
|
||||
- docker build -t your_registry/flask-app:latest -f Dockerfiles/Dockerfile_flask .
|
||||
- docker build -t your_registry/models-app:latest -f Dockerfiles/Dockerfile_models .
|
||||
- docker build -t your_registry/grafana-app:latest -f Dockerfiles/Dockerfile_grafana .
|
||||
- docker build -t your_registry/prometheus-app:latest -f Dockerfiles/Dockerfile_prometheus .
|
||||
|
||||
deploy:
|
||||
stage: deploy
|
||||
image: ubuntu
|
||||
script:
|
||||
- apk add --no-cache curl
|
||||
- curl -sL https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl
|
||||
- chmod +x /usr/local/bin/kubectl
|
||||
- kubectl apply -f kubernetes/flask-deployment.yaml
|
||||
- kubectl apply -f kubernetes/models-deployment.yaml
|
||||
- kubectl apply -f kubernetes/service.yaml
|
||||
- kubectl apply -f kubernetes/grafana-deployment.yaml
|
||||
- kubectl apply -f kubernetes/prometheus-deployment.yaml
|
@ -0,0 +1,8 @@
|
||||
### Example user template template
|
||||
### Example user template
|
||||
|
||||
# IntelliJ project files
|
||||
.idea
|
||||
*.iml
|
||||
out
|
||||
gen
|
@ -0,0 +1,48 @@
|
||||
import grpc
|
||||
import inference_pb2
|
||||
import inference_pb2_grpc
|
||||
import management_pb2
|
||||
import management_pb2_grpc
|
||||
import json
|
||||
|
||||
|
||||
def get_inference_stub():
|
||||
channel = grpc.insecure_channel('localhost:7070')
|
||||
stub = inference_pb2_grpc.InferenceAPIsServiceStub(channel)
|
||||
return stub
|
||||
|
||||
|
||||
def get_management_stub():
|
||||
channel = grpc.insecure_channel('localhost:7071')
|
||||
stub = management_pb2_grpc.ManagementAPIsServiceStub(channel)
|
||||
return stub
|
||||
|
||||
|
||||
def list_models(management_stub):
|
||||
list_model_request_object = management_pb2.ListModelsRequest(limit=10)
|
||||
return management_stub.ListModels(list_model_request_object)
|
||||
|
||||
|
||||
def make_prediction(inference_stub, image_path):
|
||||
with open(image_path, "rb") as f:
|
||||
image_bytes = f.read()
|
||||
input_data = {"data": image_bytes}
|
||||
prediction_request = inference_pb2.PredictionsRequest(model_name="digitmodel", input=input_data)
|
||||
inference_prediction = inference_stub.Predictions(prediction_request)
|
||||
return inference_prediction
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
inference_stub = get_inference_stub()
|
||||
management_stub = get_management_stub()
|
||||
|
||||
output = list_models(management_stub)
|
||||
print(f"output: {output}")
|
||||
|
||||
message = json.loads(output.msg)
|
||||
print(f"message: {message}")
|
||||
|
||||
prediction = make_prediction(inference_stub, "torchserve_grpc/images/test.png")
|
||||
|
||||
print(prediction)
|
@ -0,0 +1,45 @@
|
||||
from ts.torch_handler.vision_handler import VisionHandler
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torchvision import transforms
|
||||
import logging
|
||||
import io
|
||||
import base64
|
||||
|
||||
|
||||
class CustomHandler(VisionHandler):
|
||||
def __init__(self):
|
||||
super(CustomHandler, self).__init__()
|
||||
self.image_processing = transforms.Compose([
|
||||
transforms.Resize((96, 96)),
|
||||
transforms.ToTensor()])
|
||||
|
||||
def preprocess(self, data):
|
||||
images = []
|
||||
for row in data:
|
||||
# Compat layer: normally the envelope should just return the data
|
||||
# directly, but older versions of Torchserve didn't have envelope.
|
||||
image = row.get("data") or row.get("body")
|
||||
if isinstance(image, str):
|
||||
# if the image is a string of bytesarray.
|
||||
image = base64.b64decode(image)
|
||||
|
||||
# If the image is sent as bytesarray
|
||||
if isinstance(image, (bytearray, bytes)):
|
||||
image = Image.open(io.BytesIO(image)).convert("RGB")
|
||||
image = self.image_processing(image)
|
||||
logging.info(f"image shape after preprocess: {image.shape}")
|
||||
else:
|
||||
# if the image is a list
|
||||
image = torch.FloatTensor(image)
|
||||
|
||||
images.append(image)
|
||||
|
||||
return torch.stack(images).to(self.device)
|
||||
|
||||
def postprocess(self, data):
|
||||
logging.info("Inside Post Process")
|
||||
logging.info(f"Outputs: {data}")
|
||||
predictions = torch.argmax(data, dim=1) + 1
|
||||
logging.info(predictions.tolist())
|
||||
return predictions.tolist()
|
@ -0,0 +1,34 @@
|
||||
syntax = "proto3";
|
||||
package org.pytorch.serve.grpc.inference;
|
||||
|
||||
import "google/protobuf/empty.proto";
|
||||
|
||||
option java_multiple_files = true;
|
||||
|
||||
message PredictionsRequest {
|
||||
// Name of model.
|
||||
string model_name = 1; //required
|
||||
|
||||
// Version of model to run prediction on.
|
||||
string model_version = 2; //optional
|
||||
|
||||
// input data for model prediction
|
||||
map<string, bytes> input = 3; //required
|
||||
}
|
||||
|
||||
message PredictionResponse {
|
||||
// TorchServe health
|
||||
bytes prediction = 1;
|
||||
}
|
||||
|
||||
message TorchServeHealthResponse {
|
||||
// TorchServe health
|
||||
string health = 1;
|
||||
}
|
||||
|
||||
service InferenceAPIsService {
|
||||
rpc Ping(google.protobuf.Empty) returns (TorchServeHealthResponse) {}
|
||||
|
||||
// Predictions entry point to get inference using default model version.
|
||||
rpc Predictions(PredictionsRequest) returns (PredictionResponse) {}
|
||||
}
|
@ -0,0 +1,119 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package org.pytorch.serve.grpc.management;
|
||||
|
||||
option java_multiple_files = true;
|
||||
|
||||
message ManagementResponse {
|
||||
// Response string of different management API calls.
|
||||
string msg = 1;
|
||||
}
|
||||
|
||||
message DescribeModelRequest {
|
||||
// Name of model to describe.
|
||||
string model_name = 1; //required
|
||||
// Version of model to describe.
|
||||
string model_version = 2; //optional
|
||||
// Customized metadata
|
||||
bool customized = 3; //optional
|
||||
}
|
||||
|
||||
message ListModelsRequest {
|
||||
// Use this parameter to specify the maximum number of items to return. When this value is present, TorchServe does not return more than the specified number of items, but it might return fewer. This value is optional. If you include a value, it must be between 1 and 1000, inclusive. If you do not include a value, it defaults to 100.
|
||||
int32 limit = 1; //optional
|
||||
|
||||
// The token to retrieve the next set of results. TorchServe provides the token when the response from a previous call has more results than the maximum page size.
|
||||
int32 next_page_token = 2; //optional
|
||||
}
|
||||
|
||||
message RegisterModelRequest {
|
||||
// Inference batch size, default: 1.
|
||||
int32 batch_size = 1; //optional
|
||||
|
||||
// Inference handler entry-point. This value will override handler in MANIFEST.json if present.
|
||||
string handler = 2; //optional
|
||||
|
||||
// Number of initial workers, default: 0.
|
||||
int32 initial_workers = 3; //optional
|
||||
|
||||
// Maximum delay for batch aggregation, default: 100.
|
||||
int32 max_batch_delay = 4; //optional
|
||||
|
||||
// Name of model. This value will override modelName in MANIFEST.json if present.
|
||||
string model_name = 5; //optional
|
||||
|
||||
// Maximum time, in seconds, the TorchServe waits for a response from the model inference code, default: 120.
|
||||
int32 response_timeout = 6; //optional
|
||||
|
||||
// Runtime for the model custom service code. This value will override runtime in MANIFEST.json if present.
|
||||
string runtime = 7; //optional
|
||||
|
||||
// Decides whether creation of worker synchronous or not, default: false.
|
||||
bool synchronous = 8; //optional
|
||||
|
||||
// Model archive download url, support local file or HTTP(s) protocol.
|
||||
string url = 9; //required
|
||||
|
||||
// Decides whether S3 SSE KMS enabled or not, default: false.
|
||||
bool s3_sse_kms = 10; //optional
|
||||
}
|
||||
|
||||
message ScaleWorkerRequest {
|
||||
|
||||
// Name of model to scale workers.
|
||||
string model_name = 1; //required
|
||||
|
||||
// Model version.
|
||||
string model_version = 2; //optional
|
||||
|
||||
// Maximum number of worker processes.
|
||||
int32 max_worker = 3; //optional
|
||||
|
||||
// Minimum number of worker processes.
|
||||
int32 min_worker = 4; //optional
|
||||
|
||||
// Number of GPU worker processes to create.
|
||||
int32 number_gpu = 5; //optional
|
||||
|
||||
// Decides whether the call is synchronous or not, default: false.
|
||||
bool synchronous = 6; //optional
|
||||
|
||||
// Waiting up to the specified wait time if necessary for a worker to complete all pending requests. Use 0 to terminate backend worker process immediately. Use -1 for wait infinitely.
|
||||
int32 timeout = 7; //optional
|
||||
}
|
||||
|
||||
message SetDefaultRequest {
|
||||
// Name of model whose default version needs to be updated.
|
||||
string model_name = 1; //required
|
||||
|
||||
// Version of model to be set as default version for the model
|
||||
string model_version = 2; //required
|
||||
}
|
||||
|
||||
message UnregisterModelRequest {
|
||||
// Name of model to unregister.
|
||||
string model_name = 1; //required
|
||||
|
||||
// Name of model to unregister.
|
||||
string model_version = 2; //optional
|
||||
}
|
||||
|
||||
service ManagementAPIsService {
|
||||
// Provides detailed information about the default version of a model.
|
||||
rpc DescribeModel(DescribeModelRequest) returns (ManagementResponse) {}
|
||||
|
||||
// List registered models in TorchServe.
|
||||
rpc ListModels(ListModelsRequest) returns (ManagementResponse) {}
|
||||
|
||||
// Register a new model in TorchServe.
|
||||
rpc RegisterModel(RegisterModelRequest) returns (ManagementResponse) {}
|
||||
|
||||
// Configure number of workers for a default version of a model.This is a asynchronous call by default. Caller need to call describeModel to check if the model workers has been changed.
|
||||
rpc ScaleWorker(ScaleWorkerRequest) returns (ManagementResponse) {}
|
||||
|
||||
// Set default version of a model
|
||||
rpc SetDefault(SetDefaultRequest) returns (ManagementResponse) {}
|
||||
|
||||
// Unregister the default version of a model from TorchServe if it is the only version available.This is a asynchronous call by default. Caller can call listModels to confirm model is unregistered
|
||||
rpc UnregisterModel(UnregisterModelRequest) returns (ManagementResponse) {}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
import torch
|
||||
import torchvision
|
||||
|
||||
|
||||
class DigitCNN(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = self._model_prep()
|
||||
|
||||
def _model_prep(self):
|
||||
model = torchvision.models.mobilenet_v3_small(pretrained=True, progress=True, )
|
||||
classification_layer = torch.nn.Sequential(
|
||||
torch.nn.Linear(576, out_features=1024),
|
||||
torch.nn.Hardswish(),
|
||||
torch.nn.Dropout(p=0.5),
|
||||
torch.nn.Linear(1024, 10),
|
||||
torch.nn.Softmax()
|
||||
)
|
||||
model.classifier = classification_layer
|
||||
|
||||
for param in model.features[:11].parameters():
|
||||
param.requires_grad = False
|
||||
return model
|
||||
|
||||
def forward(self, x):
|
||||
return self.model(x)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,3 +1,7 @@
|
||||
flask
|
||||
flask~=3.0.2
|
||||
Flask-SQLAlchemy
|
||||
APScheduler
|
||||
APScheduler~=3.10.4
|
||||
grpcio
|
||||
torch~=2.2.1
|
||||
torchvision~=0.17.1
|
||||
pillow~=10.2.0
|
@ -0,0 +1,50 @@
|
||||
import os
|
||||
import grpc
|
||||
import asyncio
|
||||
import torchserve_pb2
|
||||
import torchserve_pb2_grpc
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
||||
def load_models_from_directory(stub):
|
||||
model_directories = [d for d in os.listdir('model_store') if os.path.isdir(os.path.join('model_store', d))]
|
||||
|
||||
for model_dir in model_directories:
|
||||
model_path = os.path.join('model_store', model_dir, 'model.pth')
|
||||
request = torchserve_pb2.ModelRequest()
|
||||
request.model_name = model_dir
|
||||
request.model_path = model_path
|
||||
|
||||
stub.LoadModel(request)
|
||||
|
||||
|
||||
def run_inference(input_tensor):
|
||||
global stub
|
||||
request = torchserve_pb2.InferenceRequest()
|
||||
request.data.extend(input_tensor)
|
||||
|
||||
response = stub.Infer(request)
|
||||
return response.data
|
||||
|
||||
|
||||
async def main():
|
||||
global stub
|
||||
channel = grpc.aio.insecure_channel('localhost:7070')
|
||||
stub = torchserve_pb2_grpc.GreeterStub(channel)
|
||||
|
||||
# Асинхронная загрузка моделей
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, load_models_from_directory, stub)
|
||||
|
||||
# Создание списка входных данных для каждой модели
|
||||
input_tensors = [[1, 2, 3, 4], [5, 6, 7, 8]] # Примеры входных тензоров для разных моделей
|
||||
|
||||
# Параллельный инференс моделей
|
||||
with ThreadPoolExecutor(max_workers=len(input_tensors)) as executor:
|
||||
futures = [executor.submit(run_inference, input_tensor) for input_tensor in input_tensors]
|
||||
results = [future.result() for future in futures]
|
||||
print(results)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
@ -0,0 +1,21 @@
|
||||
# tasks.py
|
||||
class ModelTester:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def run_tests(self):
|
||||
# Код для тестирования моделей
|
||||
pass
|
||||
|
||||
|
||||
class ModelUpdater:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def check_for_updates(self):
|
||||
# Код для проверки наличия обновлений моделей
|
||||
pass
|
||||
|
||||
def update_models(self):
|
||||
# Код для обновления моделей
|
||||
pass
|
@ -0,0 +1,29 @@
|
||||
<!-- templates/index.html -->
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Schedules</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Schedules</h1>
|
||||
<form action="/create_schedule" method="post">
|
||||
<label for="name">Name:</label><br>
|
||||
<input type="text" id="name" name="name"><br>
|
||||
<label for="time">Time:</label><br>
|
||||
<input type="time" id="time" name="time"><br>
|
||||
<label for="task">Task:</label><br>
|
||||
<select id="task" name="task">
|
||||
<option value="test_models">Test Models</option>
|
||||
<option value="update_models">Update Models</option>
|
||||
</select><br>
|
||||
<input type="submit" value="Submit">
|
||||
</form>
|
||||
<ul>
|
||||
{% for schedule in schedules %}
|
||||
<li>{{ schedule.name }} - {{ schedule.time }} - {{ schedule.task }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue