From 554c2211dbe84552112fc57c14bb2ba948704ad1 Mon Sep 17 00:00:00 2001
From: Tom Grant <thomas.grant.mail@gmail.com>
Date: Mon, 7 Feb 2022 19:58:59 +0000
Subject: [PATCH 01/10] Remove dlib dependency for faster facial recognition

DLib and imutils were very slow for high resolution images and required
a large build on the initial installation. Fortunately, at this resolution,
`cv2.dnn` was ~4x faster, more accurate and already install with opencv.

Alternatively, I have provided `FaceDetectorCascade` which uses
`c2v.CascadeClassifier`. Although this is less accurate than dlib, I have
found that it is much faster at the required resolutions.
---
 requirements.txt     |  23 +++----
 src/face_detector.py | 150 +++++++++++++++++++++++--------------------
 src/main.py          |  11 ++--
 src/structures.py    |   2 -
 4 files changed, 96 insertions(+), 90 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index b40b0fc..0dba8f8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,18 +2,17 @@ aiohttp==3.8.1
 aiosignal==1.2.0
 async-timeout==4.0.2
 attrs==21.4.0
-black==21.12b0
+black==22.1.0
 cairocffi==1.3.0
 CairoSVG==2.5.2
 cffi==1.15.0
 cfgv==3.3.1
-charset-normalizer==2.0.10
+charset-normalizer==2.0.11
 click==8.0.3
 cssselect2==0.4.1
-deepgram-sdk==0.2.1
+deepgram-sdk==0.2.4
 defusedxml==0.7.1
 distlib==0.3.4
-dlib==19.22.1
 drawSvg==1.8.3
 ffmpeg-python==0.2.0
 filelock==3.4.2
@@ -21,20 +20,19 @@ Flask==2.0.2
 frozenlist==1.3.0
 future==0.18.2
 gunicorn==20.1.0
-identify==2.4.4
+identify==2.4.8
 idna==3.3
-imageio==2.14.0
-imutils==0.5.4
+imageio==2.15.0
 itsdangerous==2.0.1
 Jinja2==3.0.3
 MarkupSafe==2.0.1
-multidict==5.2.0
+multidict==6.0.2
 mypy-extensions==0.4.3
 nodeenv==1.6.0
-numpy==1.22.1
+numpy==1.22.2
 opencv-python==4.5.5.62
 pathspec==0.9.0
-Pillow==9.0.0
+Pillow==9.0.1
 platformdirs==2.4.1
 pre-commit==2.17.0
 pycparser==2.21
@@ -43,9 +41,8 @@ PyYAML==6.0
 six==1.16.0
 tinycss2==1.1.1
 toml==0.10.2
-tomli==1.2.3
-typing_extensions==4.0.1
-virtualenv==20.13.0
+tomli==2.0.0
+virtualenv==20.13.1
 webencodings==0.5.1
 websockets==10.1
 Werkzeug==2.0.2
diff --git a/src/face_detector.py b/src/face_detector.py
index 5314d18..1f9871b 100644
--- a/src/face_detector.py
+++ b/src/face_detector.py
@@ -1,83 +1,85 @@
-from pathlib import Path
-
+import numpy as np
 import cv2
-import dlib
-from imutils import face_utils
 
 from structures import Rect
 
 
 class FaceDetector:
-    def __init__(self):
-        self.DETECTOR = dlib.get_frontal_face_detector()
-        # self.PREDICTOR = dlib.shape_predictor(
-        #     (
-        #         Path(".")
-        #         / "src"
-        #         / "dlib_shape_predictor"
-        #         / "shape_predictor_68_face_landmarks.dat"
-        #     ).as_posix()
-        # )
-
-    @staticmethod
-    def dist(a, b):
-        return ((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) ** 0.5
+    def find_speaker_face(self, frame: np.ndarray) -> Rect:
+        raise NotImplementedError()
+
+
+class FaceDetectorDNN(FaceDetector):
+    def __init__(
+        self,
+        model="opencv_face_detector_uint8.pb",
+        config="opencv_face_detector.pbtxt",
+        detection_threshold=0.5,
+    ):
+        self.model = cv2.dnn.readNetFromTensorflow(model, config)
+        self.detection_threshold = detection_threshold
 
     def find_speaker_face(self, frame):
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        rects = self.DETECTOR(gray, 1)
-
-        # this is the default speaker face position
-        speaker_face = Rect(
-            ((frame.shape[0] // 2) - 10),
-            ((frame.shape[1] // 2) - 10),
-            ((frame.shape[0] // 2) + 10),
-            ((frame.shape[1] // 2) + 10),
+        blob = cv2.dnn.blobFromImage(
+            frame, 1.0, (300, 300), [104, 117, 123], False, False
         )
 
-        speaker_mouth_ratio = 0.0
+        self.model.setInput(blob)
+        possible_face_detections = self.model.forward()
 
         # find the minimum bounding box that contains all speakers
-        min_x = frame.shape[0]
-        max_x = 0
-        min_y = frame.shape[1]
-        max_y = 0
+        min_x, min_y = frame.shape[1], frame.shape[0]
+        max_x, max_y = 0, 0
+
+        for i in range(possible_face_detections.shape[2]):
+            face = possible_face_detections[0, 0, i]
+            if face[2] > self.detection_threshold:
+                x1 = int(face[3] * frame.shape[1])
+                y1 = int(face[4] * frame.shape[0])
+                x2 = int(face[5] * frame.shape[1])
+                y2 = int(face[6] * frame.shape[0])
+
+                min_x, min_y = min(min_x, x1), min(min_y, y1)
+                max_x, max_y = max(max_x, x2), max(max_y, y2)
+
+        if min_x > max_x or min_y > max_y:
+            # Can't find a face, default to the whole image
+            min_x, min_y = 0, 0
+            max_x, max_y = frame.shape[1], frame.shape[0]
+
+        speakers_bb = Rect(
+            min_x,
+            min_y,
+            (max_x - min_x),
+            (max_y - min_y),
+        )
+
+        return speakers_bb
 
-        for rect in rects:
-            # shape = self.PREDICTOR(gray, rect)
-            # shape = face_utils.shape_to_np(shape)
 
-            # mouth_open = max(
-            #     FaceDetector.dist(shape[61], shape[67]),
-            #     FaceDetector.dist(shape[62], shape[66]),
-            #     FaceDetector.dist(shape[63], shape[65]),
-            # )
-            # mouth_width = FaceDetector.dist(shape[54], shape[48])
+class FaceDetectorCascade(FaceDetector):
+    def __init__(self, model="haarcascade_frontalface_default.xml"):
+        self.model = cv2.CascadeClassifier(model)
+
+    def find_speaker_face(self, frame):
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        gray = cv2.equalizeHist(gray)
+
+        face_rects = self.model.detectMultiScale(gray, 1.1, 6)
 
-            (x, y, w, h) = face_utils.rect_to_bb(rect)
+        # find the minimum bounding box that contains all speakers
+        min_x, min_y = frame.shape[1], frame.shape[0]
+        max_x, max_y = 0, 0
 
+        for (x, y, w, h) in face_rects:
             # extend text exclusion bounding box to include speaker
-            if x < min_x:
-                min_x = x
-            if x + w > max_x:
-                max_x = x + w
-            if y < min_y:
-                min_y = y
-            if y + h > max_y:
-                max_y = y + h
-
-            # if (mouth_open / mouth_width) > speaker_mouth_ratio:
-            #     speaker_mouth_ratio = mouth_open / mouth_width
-            #     speaker_face = Rect(
-            #         x,
-            #         y,
-            #         w,
-            #         h,
-            #     )
-
-        if min_x > max_x:
-            max_x = min_x
-            max_y = min_y
+            min_x, min_y = min(x, min_x), min(y, min_y)
+            max_x, max_y = max(x, x + w), max(y, y + h)
+
+        if min_x > max_x or min_y > max_y:
+            # Can't find a face, default to the whole image
+            min_x, min_y = 0, 0
+            max_x, max_y = frame.shape[1], frame.shape[0]
 
         speakers_bb = Rect(
             min_x,
@@ -86,21 +88,33 @@ def find_speaker_face(self, frame):
             (max_y - min_y),
         )
 
-        return speaker_face, speakers_bb
+        return speakers_bb
 
 
 if __name__ == "__main__":
     cap = cv2.VideoCapture(0)
-    face_detector = FaceDetector()
+    face_detector = FaceDetectorDNN()
 
     if not cap.isOpened():
         raise IOError("Cannot open webcam")
 
     while True:
         ret, frame = cap.read()
-        frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
-        cv2.imshow("Input", frame)
-        speaker_face, speakers_bb = face_detector.find_speaker_face(frame)
+
+        speakers_bb = face_detector.find_speaker_face(frame)
+
+        cv2.rectangle(
+            frame,
+            (speakers_bb.x, speakers_bb.y),
+            (
+                speakers_bb.x + speakers_bb.width,
+                speakers_bb.y + speakers_bb.height,
+            ),
+            (255, 0, 0),
+            2,
+        )
+
+        cv2.imshow("Face", frame)
 
         if cv2.waitKey(1) == ord("q"):
             break
diff --git a/src/main.py b/src/main.py
index b58f342..4fba842 100644
--- a/src/main.py
+++ b/src/main.py
@@ -17,7 +17,7 @@
     url_for,
 )
 
-from face_detector import FaceDetector
+from face_detector import FaceDetector, FaceDetectorDNN
 from frame_processor import StyleTransfer
 
 from layout_generator import LayoutGenerator
@@ -62,10 +62,7 @@ def get_key_frame_index(segment: Segment) -> None:
 def detect_speaker(face_detector: FaceDetector):
     def face_detector_func(segment: Segment) -> None:
         segment.keyframe = segment.frames[segment.keyframe_index]
-        (
-            segment.speaker_location,
-            segment.speakers_bbox,
-        ) = face_detector.find_speaker_face(segment.keyframe)
+        segment.speakers_bbox = face_detector.find_speaker_face(segment.keyframe)
 
     return face_detector_func
 
@@ -145,7 +142,7 @@ def process_video(path: str) -> str:
         with open("transcript.json", "w") as file:
             json.dump(utterances, file, indent=4)
 
-    face_detector = FaceDetector()
+    face_detector = FaceDetectorDNN()
     pipeline = pipe(
         attach_frames(video),
         get_key_frame_index,
@@ -220,4 +217,4 @@ def submit_video_api():
 
 
 if __name__ == "__main__":
-    app.run(host="127.0.0.1", port=8000, debug=True, threaded=True)
+    app.run(host="127.0.0.1", port=8081, debug=True, threaded=True)
diff --git a/src/structures.py b/src/structures.py
index d7c917b..7a9f399 100644
--- a/src/structures.py
+++ b/src/structures.py
@@ -47,7 +47,6 @@ def __init__(
         frames: np.ndarray = None,
         keyframe_index: int = None,
         keyframe: np.ndarray = None,
-        speaker_location: Rect = None,
         speakers_bbox: Rect = None,
         image: ImageData = None,
     ):
@@ -59,6 +58,5 @@ def __init__(
         self.frames = frames
         self.keyframe_index = keyframe_index
         self.keyframe = keyframe
-        self.speaker_location = speaker_location
         self.speakers_bbox = speakers_bbox
         self.image = image

From 088fda5117d0114c85dad86291186e0b7cc822c8 Mon Sep 17 00:00:00 2001
From: Tom Grant <thomas.grant.mail@gmail.com>
Date: Mon, 7 Feb 2022 20:29:06 +0000
Subject: [PATCH 02/10] Makefile infrastructure for opencv face-detection

---
 .gitignore                                         |  4 ++++
 Makefile                                           | 12 ++++++++----
 .../.gitfolder                                     |  0
 src/face_detector.py                               | 14 +++++++++-----
 4 files changed, 21 insertions(+), 9 deletions(-)
 rename {src/dlib_shape_predictor => opencv_model}/.gitfolder (100%)

diff --git a/.gitignore b/.gitignore
index e4b71a8..5426bfe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,7 @@ cython_debug/
 uploads/
 *.dat
 *.pickle
+#   OpenCV model files
+haarcascade_frontalface_default.xml
+*.pb
+*.pbtxt
diff --git a/Makefile b/Makefile
index daa1c93..f75745b 100644
--- a/Makefile
+++ b/Makefile
@@ -8,10 +8,14 @@ install: download-model
 	pre-commit install
 
 download-model:
-	if [ ! -f "$(DIR)/src/dlib_shape_predictor/shape_predictor_68_face_landmarks.dat" ]; then \
-		wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2; \
-		bzip2 -d shape_predictor_68_face_landmarks.dat.bz2; \
-		mv shape_predictor_68_face_landmarks.dat $(DIR)/src/dlib_shape_predictor/; \
+	if [ ! -f "$(DIR)/opencv_model/opencv_face_detector_uint8.pb" ]; then \
+		wget https://github.com/spmallick/learnopencv/raw/master/AgeGender/opencv_face_detector_uint8.pb -P "$(DIR)/opencv_model/"; \
+	fi;
+	if [ ! -f "$(DIR)/opencv_model/opencv_face_detector.pbtxt" ]; then \
+		wget https://raw.githubusercontent.com/spmallick/learnopencv/master/AgeGender/opencv_face_detector.pbtxt -P "$(DIR)/opencv_model/"; \
+	fi;
+	if [ ! -f "$(DIR)/opencv_model/haarcascade_frontalface_default.xml" ]; then \
+		wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml -P "$(DIR)/opencv_model/"; \
 	fi;
 
 build:
diff --git a/src/dlib_shape_predictor/.gitfolder b/opencv_model/.gitfolder
similarity index 100%
rename from src/dlib_shape_predictor/.gitfolder
rename to opencv_model/.gitfolder
diff --git a/src/face_detector.py b/src/face_detector.py
index 1f9871b..cddf497 100644
--- a/src/face_detector.py
+++ b/src/face_detector.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import numpy as np
 import cv2
 
@@ -12,11 +14,11 @@ def find_speaker_face(self, frame: np.ndarray) -> Rect:
 class FaceDetectorDNN(FaceDetector):
     def __init__(
         self,
-        model="opencv_face_detector_uint8.pb",
-        config="opencv_face_detector.pbtxt",
+        model_path=Path("opencv_model", "opencv_face_detector_uint8.pb"),
+        config_path=Path("opencv_model", "opencv_face_detector.pbtxt"),
         detection_threshold=0.5,
     ):
-        self.model = cv2.dnn.readNetFromTensorflow(model, config)
+        self.model = cv2.dnn.readNetFromTensorflow(str(model_path), str(config_path))
         self.detection_threshold = detection_threshold
 
     def find_speaker_face(self, frame):
@@ -58,8 +60,10 @@ def find_speaker_face(self, frame):
 
 
 class FaceDetectorCascade(FaceDetector):
-    def __init__(self, model="haarcascade_frontalface_default.xml"):
-        self.model = cv2.CascadeClassifier(model)
+    def __init__(
+        self, model_path=Path("opencv_model", "haarcascade_frontalface_default.xml")
+    ):
+        self.model = cv2.CascadeClassifier(str(model_path))
 
     def find_speaker_face(self, frame):
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

From c84ddea0c2d62085e978b91b061e62933436c7dd Mon Sep 17 00:00:00 2001
From: Tom Grant <thomas.grant.mail@gmail.com>
Date: Tue, 8 Feb 2022 00:22:20 +0000
Subject: [PATCH 03/10] Revert to default port number

---
 src/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main.py b/src/main.py
index 4fba842..12b323b 100644
--- a/src/main.py
+++ b/src/main.py
@@ -217,4 +217,4 @@ def submit_video_api():
 
 
 if __name__ == "__main__":
-    app.run(host="127.0.0.1", port=8081, debug=True, threaded=True)
+    app.run(host="127.0.0.1", port=8000, debug=True, threaded=True)

From b4773a7bee8ec4299dff5f4eb760a4d641b2aa85 Mon Sep 17 00:00:00 2001
From: Weixuan Zhang <weixuanz.git@icloud.com>
Date: Tue, 8 Feb 2022 01:15:51 +0000
Subject: [PATCH 04/10] Update Dockerfile

---
 Dockerfile | 8 ++++----
 Makefile   | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 4181a24..7a598bb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,10 +1,8 @@
-FROM ubuntu:20.04
+FROM python:3
 LABEL version="0.1"
 LABEL description="Docker image for YACK"
-ARG DEBIAN_FRONTEND=noninteractive
 
-RUN apt update
-RUN apt install -y python3-pip python3-dev cmake ffmpeg libsm6 libxext6 wget
+RUN apt update && apt install -y ffmpeg libsm6 libxext6 wget
 
 COPY ./requirements.txt /app/requirements.txt
 
@@ -14,5 +12,7 @@ RUN pip install --no-cache-dir -r requirements.txt
 
 COPY . /app
 
+RUN make download-model
+
 EXPOSE 8000
 CMD ["gunicorn", "--bind", ":8000", "--workers", "2", "--threads", "8", "--timeout", "0", "--pythonpath", "./src", "main:app"]
diff --git a/Makefile b/Makefile
index f75745b..2ca018a 100644
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,7 @@ download-model:
 build:
 	docker build -t yack:latest .
 
-run: download-model build
+run: build
 	docker run -e "DEEPGRAM_API_KEY=$(DEEPGRAM_API_KEY)" -e "ENV=production" -p 8000:8000 yack
 
 push: build

From 9fdda895b867a6d61bdae003cb8a537ada3eefb5 Mon Sep 17 00:00:00 2001
From: Weixuan Zhang <weixuanz.git@icloud.com>
Date: Tue, 8 Feb 2022 01:27:48 +0000
Subject: [PATCH 05/10] Remove dlib from README

---
 README.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/README.md b/README.md
index 83d1795..f93215d 100644
--- a/README.md
+++ b/README.md
@@ -6,11 +6,6 @@ Deepgram api is used for speech-to-text, get your key at https://deepgram.com an
 DEEPGRAM_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 ```
 
-dlib Facial Landmark Detector is used, which is available under the Boost Software License
-from https://github.com/davisking/dlib. The pretrained weights used are available
-from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 and should be placed
-in `./src/dlib_shape_predictor/`.
-
 To start developing using Docker, simply use
 ```shell
 make run

From 1cb41c01bc5e0c4a1f66227feeb3504464602560 Mon Sep 17 00:00:00 2001
From: Andreas Economides <antroseconomides@hotmail.co.uk>
Date: Sun, 13 Feb 2022 13:40:40 +0000
Subject: [PATCH 06/10] Improve containerization

- The Makefile runs yack in dev mode, because we want http locally.
- Dockerfile only copies required files to the container image.
- docker run called with --rm; there's no reason for the container to
  persist.
---
 .dockerignore |  1 +
 Dockerfile    | 10 +++++++---
 Makefile      |  2 +-
 src/main.py   |  2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)
 create mode 100644 .dockerignore

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..550d67d
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1 @@
+src/__pycache__
diff --git a/Dockerfile b/Dockerfile
index 7a598bb..3d8c02b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3
-LABEL version="0.1"
-LABEL description="Docker image for YACK"
+LABEL version="0.2"
+LABEL description="Docker image for yack!"
 
 RUN apt update && apt install -y ffmpeg libsm6 libxext6 wget
 
@@ -10,9 +10,13 @@ WORKDIR /app
 
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY . /app
+COPY ./.secrets         /app/.secrets
+COPY ./Makefile         /app/Makefile
+COPY ./opencv_model/    /app/opencv_model/
+COPY ./src/             /app/src/
 
 RUN make download-model
+RUN mkdir /app/uploads
 
 EXPOSE 8000
 CMD ["gunicorn", "--bind", ":8000", "--workers", "2", "--threads", "8", "--timeout", "0", "--pythonpath", "./src", "main:app"]
diff --git a/Makefile b/Makefile
index 2ca018a..a271bfe 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ build:
 	docker build -t yack:latest .
 
 run: build
-	docker run -e "DEEPGRAM_API_KEY=$(DEEPGRAM_API_KEY)" -e "ENV=production" -p 8000:8000 yack
+	docker run --rm -e "DEEPGRAM_API_KEY=$(DEEPGRAM_API_KEY)" -e "ENV=development" -p 8000:8000 yack
 
 push: build
 	docker tag yack:latest $(CONTAINER_REGISTRY)/yack:latest
diff --git a/src/main.py b/src/main.py
index 12b323b..04488bc 100644
--- a/src/main.py
+++ b/src/main.py
@@ -30,7 +30,7 @@
 app = Flask(__name__)
 app.config["UPLOAD_FOLDER"] = (Path(".") / "uploads").resolve()
 app.config["MAX_CONTENT_LENGTH"] = 16 * 1000 * 1000  # Limit uploads to 16 MB.
-app.config["PREFERRED_URL_SCHEME"] = "https"
+app.config["PREFERRED_URL_SCHEME"] = "https" if PRODUCTION else "http"
 
 
 def pipe(

From aae889cbccaed33dd81472a42bde4f58d1bb328b Mon Sep 17 00:00:00 2001
From: Andreas Economides <antroseconomides@hotmail.co.uk>
Date: Sun, 13 Feb 2022 15:24:10 +0000
Subject: [PATCH 07/10] Self-sufficient Dockerfile

---
 Dockerfile | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3d8c02b..e36b9f2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,21 +2,18 @@ FROM python:3
 LABEL version="0.2"
 LABEL description="Docker image for yack!"
 
-RUN apt update && apt install -y ffmpeg libsm6 libxext6 wget
-
-COPY ./requirements.txt /app/requirements.txt
+RUN apt update && apt install -y ffmpeg libsm6 libxext6
 
 WORKDIR /app
+RUN mkdir /app/uploads
 
+COPY ./requirements.txt /app/requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY ./.secrets         /app/.secrets
-COPY ./Makefile         /app/Makefile
-COPY ./opencv_model/    /app/opencv_model/
-COPY ./src/             /app/src/
-
-RUN make download-model
-RUN mkdir /app/uploads
+COPY ./src/ /app/src/
+ADD https://github.com/spmallick/learnopencv/raw/master/AgeGender/opencv_face_detector_uint8.pb                     /app/opencv_model/
+ADD https://raw.githubusercontent.com/spmallick/learnopencv/master/AgeGender/opencv_face_detector.pbtxt             /app/opencv_model/
+ADD https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml    /app/opencv_model/
 
 EXPOSE 8000
 CMD ["gunicorn", "--bind", ":8000", "--workers", "2", "--threads", "8", "--timeout", "0", "--pythonpath", "./src", "main:app"]

From 53923b9886d5091cadc2ac532835c8004b61feab Mon Sep 17 00:00:00 2001
From: Andreas Economides <antroseconomides@hotmail.co.uk>
Date: Sun, 13 Feb 2022 15:32:51 +0000
Subject: [PATCH 08/10] Make the Dockerfile slightly prettier

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index e36b9f2..73002db 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,10 +7,10 @@ RUN apt update && apt install -y ffmpeg libsm6 libxext6
 WORKDIR /app
 RUN mkdir /app/uploads
 
-COPY ./requirements.txt /app/requirements.txt
+COPY requirements.txt /app/requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY ./src/ /app/src/
+COPY src/ /app/src/
 ADD https://github.com/spmallick/learnopencv/raw/master/AgeGender/opencv_face_detector_uint8.pb                     /app/opencv_model/
 ADD https://raw.githubusercontent.com/spmallick/learnopencv/master/AgeGender/opencv_face_detector.pbtxt             /app/opencv_model/
 ADD https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml    /app/opencv_model/

From a50d52aebec1f237a8531f9fd53ca213fcad504f Mon Sep 17 00:00:00 2001
From: Andreas Economides <antroseconomides@hotmail.co.uk>
Date: Sun, 13 Feb 2022 16:42:12 +0000
Subject: [PATCH 09/10] Follow Dockerfile best-practises

---
 Dockerfile | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 73002db..90bf9db 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,11 @@ FROM python:3
 LABEL version="0.2"
 LABEL description="Docker image for yack!"
 
-RUN apt update && apt install -y ffmpeg libsm6 libxext6
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
 RUN mkdir /app/uploads
@@ -10,10 +14,10 @@ RUN mkdir /app/uploads
 COPY requirements.txt /app/requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY src/ /app/src/
 ADD https://github.com/spmallick/learnopencv/raw/master/AgeGender/opencv_face_detector_uint8.pb                     /app/opencv_model/
 ADD https://raw.githubusercontent.com/spmallick/learnopencv/master/AgeGender/opencv_face_detector.pbtxt             /app/opencv_model/
 ADD https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml    /app/opencv_model/
+COPY src/ /app/src/
 
 EXPOSE 8000
 CMD ["gunicorn", "--bind", ":8000", "--workers", "2", "--threads", "8", "--timeout", "0", "--pythonpath", "./src", "main:app"]

From 1ee0426c7ac336191eaada412fc64afe58e9bacd Mon Sep 17 00:00:00 2001
From: Andreas Economides <antroseconomides@hotmail.co.uk>
Date: Sun, 13 Feb 2022 16:52:25 +0000
Subject: [PATCH 10/10] Conditionally load dotenv

---
 src/transcription.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/transcription.py b/src/transcription.py
index d03171a..94e7154 100644
--- a/src/transcription.py
+++ b/src/transcription.py
@@ -1,11 +1,13 @@
-import os
 from contextlib import suppress
+from os import getenv
 from textwrap import wrap
 
 from deepgram import Deepgram
-from dotenv import load_dotenv
 
-load_dotenv(".secrets")
+if getenv("DEEPGRAM_API_KEY") is None:
+    from dotenv import load_dotenv
+
+    load_dotenv(".secrets")
 
 
 def delete_keys(transcript: dict, keys: list):
@@ -40,7 +42,7 @@ def validate_transcript(transcript: dict):
 
 
 async def transcribe(audio: bytes) -> list:
-    dg_client = Deepgram(os.getenv("DEEPGRAM_API_KEY"))
+    dg_client = Deepgram(getenv("DEEPGRAM_API_KEY"))
 
     source = {"buffer": audio, "mimetype": "audio/wav"}