Skip to content

Commit 891349c

Browse files
authored
added a simple pub/sub topic for testing (#27610)
* added a simple pub/sub topic for testing * update the code based on comments --------- Co-authored-by: xqhu <xqhu@google.com>
1 parent 1273d22 commit 891349c

3 files changed

Lines changed: 178 additions & 0 deletions

File tree

.test-infra/pubsub/README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# Overview
21+
22+
This folder contains Python scripts to create a Pub/Sub topic under
23+
the GCP project `apache-beam-testing` and test the topic.
24+
The created topic is `projects/apache-beam-testing/topics/Imagenet_openimage_50k_benchmark`.
25+
26+
# Create the topic `Imagenet_openimage_50k_benchmark`
27+
28+
- Create one VM to run `gcs_image_looper.py`.
29+
The VM `pubsub-test-do-not-delete` was already created under `apache-beam-testing`.
30+
Keep the script running to continuously publish data.
31+
- You might run `gcloud auth application-default login` to get the auth.
32+
- You might run `pip install google-cloud-core google-cloud-pubsub google-cloud-storage`.
33+
- Must make `Imagenet_openimage_50k_benchmark` public by adding `allAuthenticatedUsers` to the Pub/Sub Subscriber role.
34+
35+
# Tes the topic by subscribing it
36+
37+
- Run `test_image_looper.py` to check whether you could get any data.
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
"""This executable loops image filepaths from a gcs bucket file."""
18+
import random
19+
import time
20+
21+
from google.api_core.exceptions import AlreadyExists
22+
from google.cloud import pubsub_v1
23+
from google.cloud import storage
24+
25+
# use the eou project and gcs to run the word looper
26+
project_id = "apache-beam-testing"
27+
gcs_bucket = "apache-beam-ml"
28+
num_images_per_second = 5
29+
30+
publisher = pubsub_v1.PublisherClient()
31+
image_file_path = "testing/inputs/openimage_50k_benchmark.txt"
32+
topic_name = "Imagenet_openimage_50k_benchmark"
33+
topic_path = publisher.topic_path(project_id, topic_name)
34+
35+
36+
class ImageLooper(object):
37+
"""Loop the images in a gcs bucket file and publish them to a pubsub topic.
38+
"""
39+
content = ""
40+
cursor = 0
41+
42+
def __init__(self, filename):
43+
self._read_gcs_file(filename)
44+
45+
def get_next_image(self):
46+
"""Returns the next image randomly."""
47+
next_image = ""
48+
while not next_image:
49+
image_id = random.randint(0, len(self.content) - 1)
50+
next_image = self.content[image_id]
51+
return next_image
52+
53+
def _read_gcs_file(self, filename):
54+
client = storage.Client()
55+
bucket = client.get_bucket(gcs_bucket)
56+
blob = bucket.get_blob(filename)
57+
self.content = blob.download_as_string().decode("utf-8").split('\n')
58+
59+
60+
try:
61+
publisher.create_topic(request={"name": topic_path})
62+
except AlreadyExists:
63+
pass
64+
65+
looper = ImageLooper(image_file_path)
66+
while True:
67+
image = looper.get_next_image()
68+
publisher.publish(topic_path, data=image.encode("utf-8"))
69+
time.sleep(1 / num_images_per_second)
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
"""This executable test the pub/sub topic created by gcs_image_looper.py"""
18+
19+
from concurrent.futures import TimeoutError
20+
21+
from google.cloud import pubsub_v1
22+
from google.api_core.exceptions import AlreadyExists
23+
24+
project_id = "apache-beam-testing"
25+
subscription_id = "test-image-looper"
26+
topic_id = "Imagenet_openimage_50k_benchmark"
27+
28+
publisher = pubsub_v1.PublisherClient()
29+
subscriber = pubsub_v1.SubscriberClient()
30+
topic_path = publisher.topic_path(project_id, topic_id)
31+
subscription_path = subscriber.subscription_path(project_id, subscription_id)
32+
33+
try:
34+
subscription = subscriber.create_subscription(request={
35+
"name": subscription_path,
36+
"topic": topic_path
37+
})
38+
print(f"Subscription created: {subscription}")
39+
except AlreadyExists:
40+
subscriber.delete_subscription(request={"subscription": subscription_path})
41+
subscription = subscriber.create_subscription(request={
42+
"name": subscription_path,
43+
"topic": topic_path
44+
})
45+
print(f"Subscription recreated: {subscription}")
46+
47+
timeout = 3.0
48+
49+
total_images = []
50+
51+
52+
def callback(message: pubsub_v1.subscriber.message.Message) -> None:
53+
total_images.append(message.data.decode())
54+
message.ack()
55+
56+
57+
streaming_pull_future = subscriber.subscribe(subscription_path,
58+
callback=callback)
59+
print(f"Listening for messages on {subscription_path}..\n")
60+
61+
try:
62+
# When `timeout` is not set, result() will block indefinitely,
63+
# unless an exception is encountered first.
64+
streaming_pull_future.result(timeout=timeout)
65+
except TimeoutError:
66+
streaming_pull_future.cancel() # Trigger the shutdown.
67+
streaming_pull_future.result() # Block until the shutdown is complete.
68+
print("Results: \n", total_images)
69+
70+
subscriber.delete_subscription(request={"subscription": subscription_path})
71+
72+
print(f"Subscription deleted: {subscription_path}.")

0 commit comments

Comments
 (0)