From 1ffcd071e8a636e1cc55ee77f07aab80ae12a545 Mon Sep 17 00:00:00 2001 From: Yi Hu Date: Tue, 21 Apr 2026 16:17:40 -0400 Subject: [PATCH] Remove Samza runner support --- .github/ISSUE_TEMPLATE/bug.yml | 2 +- .github/ISSUE_TEMPLATE/failing_test.yml | 2 +- .github/ISSUE_TEMPLATE/feature.yml | 2 +- .github/ISSUE_TEMPLATE/task.yml | 2 +- .github/autolabeler.yml | 1 - .github/issue-rules.yml | 4 +- ...PostCommit_Java_ValidatesRunner_Samza.json | 8 - .github/workflows/README.md | 5 - .../workflows/beam_PostCommit_Go_VR_Samza.yml | 85 -- .../beam_PostCommit_Java_PVR_Samza.yml | 104 -- ..._PostCommit_Java_ValidatesRunner_Samza.yml | 100 -- ...ostCommit_Python_ValidatesRunner_Samza.yml | 110 -- .../workflows/beam_PostCommit_XVR_Samza.yml | 107 -- .test-infra/BUILD_STATUS.md | 390 ------ .../src/main/resources/configuration.yaml | 2 +- CHANGES.md | 1 + build.gradle.kts | 3 - runners/samza/build.gradle | 194 --- runners/samza/job-server/build.gradle | 253 ---- .../runners/samza/SamzaExecutionContext.java | 72 -- .../samza/SamzaExecutionEnvironment.java | 44 - .../beam/runners/samza/SamzaJobInvoker.java | 82 -- .../runners/samza/SamzaJobServerDriver.java | 101 -- .../samza/SamzaPipelineExceptionContext.java | 37 - .../samza/SamzaPipelineLifeCycleListener.java | 44 - .../runners/samza/SamzaPipelineOptions.java | 182 --- .../samza/SamzaPipelineOptionsValidator.java | 54 - .../runners/samza/SamzaPipelineResult.java | 161 --- .../runners/samza/SamzaPipelineRunner.java | 89 -- .../samza/SamzaPortablePipelineOptions.java | 32 - .../samza/SamzaPortablePipelineResult.java | 48 - .../beam/runners/samza/SamzaRunner.java | 213 ---- .../samza/SamzaRunnerOverrideConfigs.java | 85 -- .../runners/samza/SamzaRunnerRegistrar.java | 53 - .../beam/runners/samza/TestSamzaRunner.java | 109 -- .../samza/adapter/BoundedSourceSystem.java | 449 ------- .../samza/adapter/UnboundedSourceSystem.java | 533 -------- .../runners/samza/adapter/package-info.java | 20 - .../samza/container/BeamContainerRunner.java | 89 -- .../container/BeamJobCoordinatorRunner.java | 78 -- .../samza/container/ContainerCfgLoader.java | 62 - .../container/ContainerCfgLoaderFactory.java | 30 - .../runners/samza/container/package-info.java | 20 - .../samza/metrics/DoFnRunnerWithMetrics.java | 108 -- .../samza/metrics/FnWithMetricsWrapper.java | 50 - .../samza/metrics/SamzaGBKMetricOp.java | 194 --- .../runners/samza/metrics/SamzaMetricOp.java | 173 --- .../samza/metrics/SamzaMetricOpFactory.java | 73 -- .../samza/metrics/SamzaMetricsContainer.java | 120 -- .../metrics/SamzaTransformMetricRegistry.java | 208 --- .../samza/metrics/SamzaTransformMetrics.java | 142 --- .../runners/samza/metrics/package-info.java | 20 - .../beam/runners/samza/package-info.java | 20 - .../samza/runtime/AsyncDoFnRunner.java | 188 --- .../runners/samza/runtime/BundleManager.java | 90 -- .../samza/runtime/ClassicBundleManager.java | 327 ----- .../beam/runners/samza/runtime/DoFnOp.java | 579 --------- .../runtime/DoFnRunnerWithKeyedInternals.java | 126 -- .../samza/runtime/FutureCollector.java | 67 - .../samza/runtime/FutureCollectorImpl.java | 99 -- .../runners/samza/runtime/GroupByKeyOp.java | 243 ---- .../runners/samza/runtime/KeyedInternals.java | 180 --- .../runners/samza/runtime/KeyedTimerData.java | 216 ---- .../samza/runtime/KvToKeyedWorkItemOp.java | 37 - .../apache/beam/runners/samza/runtime/Op.java | 64 - .../beam/runners/samza/runtime/OpAdapter.java | 237 ---- .../beam/runners/samza/runtime/OpEmitter.java | 41 - .../beam/runners/samza/runtime/OpMessage.java | 161 --- .../samza/runtime/OutputManagerFactory.java | 31 - .../samza/runtime/PortableBundleManager.java | 218 ---- .../runners/samza/runtime/PortableDoFnOp.java | 467 ------- .../samza/runtime/SamzaAssignContext.java | 56 - .../runtime/SamzaDoFnInvokerRegistrar.java | 35 - .../samza/runtime/SamzaDoFnRunners.java | 506 -------- .../SamzaExecutableStageContextFactory.java | 61 - .../SamzaMetricsBundleProgressHandler.java | 158 --- .../runtime/SamzaStateRequestHandlers.java | 179 --- .../runtime/SamzaStoreStateInternals.java | 1131 ----------------- .../runtime/SamzaTimerInternalsFactory.java | 733 ----------- .../samza/runtime/SingletonKeyedWorkItem.java | 49 - ...SplittableParDoProcessKeyedElementsOp.java | 248 ---- .../runners/samza/runtime/WindowAssignOp.java | 53 - .../runners/samza/runtime/package-info.java | 20 - .../runners/samza/state/SamzaMapState.java | 40 - .../runners/samza/state/SamzaSetState.java | 39 - .../runners/samza/state/package-info.java | 20 - .../transforms/GroupWithoutRepartition.java | 60 - .../samza/transforms/UpdatingCombineFn.java | 35 - .../samza/transforms/package-info.java | 20 - .../samza/translation/ConfigBuilder.java | 360 ------ .../samza/translation/ConfigContext.java | 79 -- .../FlattenPCollectionsTranslator.java | 111 -- .../translation/GroupByKeyTranslator.java | 272 ---- .../samza/translation/ImpulseTranslator.java | 80 -- .../samza/translation/PViewToIdMapper.java | 83 -- .../ParDoBoundMultiTranslator.java | 563 -------- .../PortableTranslationContext.java | 117 -- .../samza/translation/ReadTranslator.java | 91 -- .../RedistributeByKeyTranslator.java | 64 - .../translation/ReshuffleTranslator.java | 134 -- .../SamzaImpulseSystemFactory.java | 146 --- .../translation/SamzaPipelineTranslator.java | 211 --- .../SamzaPortablePipelineTranslator.java | 112 -- .../SamzaPortableTranslatorRegistrar.java | 25 - .../samza/translation/SamzaPublishView.java | 66 - .../SamzaPublishViewTransformOverride.java | 66 - .../SamzaPublishViewTranslator.java | 76 -- .../SamzaTestStreamSystemFactory.java | 179 --- .../SamzaTestStreamTranslator.java | 148 --- .../translation/SamzaTransformOverrides.java | 62 - .../translation/SamzaTranslatorRegistrar.java | 25 - .../SplittableParDoTranslators.java | 157 --- .../samza/translation/StateIdParser.java | 69 - .../translation/TransformConfigGenerator.java | 40 - .../translation/TransformTranslator.java | 45 - .../samza/translation/TranslationContext.java | 380 ------ .../translation/WindowAssignTranslator.java | 80 -- .../samza/translation/package-info.java | 20 - .../beam/runners/samza/util/ConfigUtils.java | 43 - .../beam/runners/samza/util/DoFnUtils.java | 75 -- .../beam/runners/samza/util/FutureUtils.java | 67 - .../runners/samza/util/HashIdGenerator.java | 66 - .../samza/util/PipelineJsonRenderer.java | 335 ----- .../samza/util/PortableConfigUtils.java | 43 - .../beam/runners/samza/util/SamzaCoders.java | 77 -- .../util/SamzaPipelineExceptionListener.java | 34 - .../util/SamzaPipelineTranslatorUtils.java | 61 - .../beam/runners/samza/util/StateUtils.java | 39 - .../runners/samza/util/StoreIdGenerator.java | 48 - .../beam/runners/samza/util/WindowUtils.java | 74 -- .../beam/runners/samza/util/package-info.java | 20 - .../samza/src/main/resources/log4j.properties | 23 - .../src/main/resources/samza-conf.properties | 37 - .../SamzaPipelineOptionsValidatorTest.java | 60 - .../adapter/BoundedSourceSystemTest.java | 309 ----- .../samza/adapter/TestBoundedSource.java | 188 --- .../samza/adapter/TestCheckpointMark.java | 40 - .../samza/adapter/TestSourceHelpers.java | 162 --- .../samza/adapter/TestUnboundedSource.java | 208 --- .../adapter/UnboundedSourceSystemTest.java | 405 ------ .../TestSamzaRunnerWithTransformMetrics.java | 323 ----- .../TestSamzaTransformMetricsRegistry.java | 191 --- .../samza/runtime/AsyncDoFnRunnerTest.java | 240 ---- .../runtime/ClassicBundleManagerTest.java | 457 ------- .../runtime/FutureCollectorImplTest.java | 92 -- .../samza/runtime/GroupByKeyOpTest.java | 133 -- .../samza/runtime/KeyedTimerDataTest.java | 65 - .../runtime/PortableBundleManagerTest.java | 178 --- ...SamzaMetricsBundleProgressHandlerTest.java | 187 --- .../runtime/SamzaStoreStateInternalsTest.java | 432 ------- .../SamzaTimerInternalsFactoryTest.java | 752 ----------- .../runtime/SdkHarnessDoFnRunnerTest.java | 48 - .../translation/ConfigGeneratorTest.java | 461 ------- .../translation/SamzaImpulseSystemTest.java | 65 - .../translation/TranslationContextTest.java | 98 -- .../runners/samza/util/DoFnUtilsTest.java | 84 -- .../runners/samza/util/FutureUtilsTest.java | 112 -- .../samza/util/InMemoryMetricsReporter.java | 49 - .../samza/util/PipelineJsonRendererTest.java | 146 --- .../samza/util/PortableConfigUtilsTest.java | 58 - .../samza/util/TestHashIdGenerator.java | 85 -- .../runners/samza/util/WindowUtilsTest.java | 87 -- .../samza/src/test/resources/ExpectedDag.json | 373 ------ .../src/test/resources/log4j-test.properties | 26 - .../large_wordcount/large_wordcount.go | 1 - sdks/go/pkg/beam/runners/samza/samza.go | 35 - sdks/go/pkg/beam/x/beamx/run.go | 1 - sdks/go/test/build.gradle | 28 - sdks/go/test/integration/expansions_test.go | 1 - sdks/go/test/integration/integration.go | 49 - .../io/mongodbio/mongodbio_test.go | 1 - .../io/xlang/debezium/debezium_test.go | 1 - .../integration/io/xlang/jdbc/jdbc_test.go | 1 - .../integration/io/xlang/kafka/kafka_test.go | 1 - .../integration/primitives/primitives_test.go | 1 - .../integration/synthetic/synthetic_test.go | 2 +- .../integration/wordcount/wordcount_test.go | 1 - sdks/go/test/integration/xlang/xlang_test.go | 1 - sdks/go/test/regression/lperror_test.go | 1 - sdks/go/test/regression/pardo_test.go | 1 - sdks/go/test/run_validatesrunner_tests.sh | 17 +- .../resources/archetype-resources/pom.xml | 11 - .../resources/archetype-resources/pom.xml | 10 - .../fn_api_runner/fn_runner_test.py | 3 - .../portability/portable_runner_test.py | 2 - .../runners/portability/samza_runner_test.py | 200 --- sdks/python/test-suites/gradle.properties | 3 - sdks/python/test-suites/portable/build.gradle | 6 - .../python/test-suites/portable/common.gradle | 27 - sdks/python/tox.ini | 6 - settings.gradle.kts | 2 - .../site/content/en/blog/capability-matrix.md | 2 +- .../postcommits-policies-details.md | 2 +- .../content/en/documentation/runners/samza.md | 4 +- .../content/en/get-started/beam-overview.md | 5 +- .../content/en/get-started/quickstart-java.md | 21 - .../en/get-started/wordcount-example.md | 43 - website/www/site/content/en/roadmap/_index.md | 2 +- website/www/site/content/en/roadmap/go-sdk.md | 2 +- .../site/content/en/roadmap/samza-runner.md | 2 +- website/www/site/data/capability_matrix.yaml | 34 - .../partials/section-menu/en/runners.html | 1 - 202 files changed, 23 insertions(+), 23509 deletions(-) delete mode 100644 .github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Samza.json delete mode 100644 .github/workflows/beam_PostCommit_Go_VR_Samza.yml delete mode 100644 .github/workflows/beam_PostCommit_Java_PVR_Samza.yml delete mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml delete mode 100644 .github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml delete mode 100644 .github/workflows/beam_PostCommit_XVR_Samza.yml delete mode 100644 .test-infra/BUILD_STATUS.md delete mode 100644 runners/samza/build.gradle delete mode 100644 runners/samza/job-server/build.gradle delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaExecutionContext.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaExecutionEnvironment.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaJobInvoker.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaJobServerDriver.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineExceptionContext.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineLifeCycleListener.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineResult.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineRunner.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPortablePipelineOptions.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPortablePipelineResult.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunner.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunnerOverrideConfigs.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunnerRegistrar.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/TestSamzaRunner.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/BoundedSourceSystem.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystem.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/package-info.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/container/BeamContainerRunner.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/container/BeamJobCoordinatorRunner.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/container/ContainerCfgLoader.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/container/ContainerCfgLoaderFactory.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/container/package-info.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/DoFnRunnerWithMetrics.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/FnWithMetricsWrapper.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaGBKMetricOp.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricOp.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricOpFactory.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricsContainer.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaTransformMetricRegistry.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaTransformMetrics.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/package-info.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/package-info.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/AsyncDoFnRunner.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/BundleManager.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/ClassicBundleManager.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnOp.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnRunnerWithKeyedInternals.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/FutureCollector.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/FutureCollectorImpl.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedInternals.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KvToKeyedWorkItemOp.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/Op.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpAdapter.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpEmitter.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpMessage.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OutputManagerFactory.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/PortableBundleManager.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/PortableDoFnOp.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaAssignContext.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnInvokerRegistrar.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnRunners.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaExecutableStageContextFactory.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaMetricsBundleProgressHandler.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStateRequestHandlers.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternals.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactory.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SingletonKeyedWorkItem.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SplittableParDoProcessKeyedElementsOp.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/WindowAssignOp.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/package-info.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/state/SamzaMapState.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/state/SamzaSetState.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/state/package-info.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/GroupWithoutRepartition.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/UpdatingCombineFn.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/package-info.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigBuilder.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigContext.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/FlattenPCollectionsTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/GroupByKeyTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ImpulseTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/PViewToIdMapper.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ParDoBoundMultiTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/PortableTranslationContext.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReadTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/RedistributeByKeyTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReshuffleTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaImpulseSystemFactory.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPipelineTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortablePipelineTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortableTranslatorRegistrar.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishView.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishViewTransformOverride.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishViewTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTestStreamSystemFactory.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTestStreamTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTransformOverrides.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTranslatorRegistrar.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SplittableParDoTranslators.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/StateIdParser.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TransformConfigGenerator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TransformTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TranslationContext.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/WindowAssignTranslator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/translation/package-info.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/ConfigUtils.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/DoFnUtils.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/FutureUtils.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/HashIdGenerator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/PipelineJsonRenderer.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/PortableConfigUtils.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaCoders.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaPipelineExceptionListener.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaPipelineTranslatorUtils.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/StateUtils.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/StoreIdGenerator.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/WindowUtils.java delete mode 100644 runners/samza/src/main/java/org/apache/beam/runners/samza/util/package-info.java delete mode 100644 runners/samza/src/main/resources/log4j.properties delete mode 100644 runners/samza/src/main/resources/samza-conf.properties delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidatorTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/BoundedSourceSystemTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestBoundedSource.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestCheckpointMark.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestSourceHelpers.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestUnboundedSource.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystemTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/metrics/TestSamzaRunnerWithTransformMetrics.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/metrics/TestSamzaTransformMetricsRegistry.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/AsyncDoFnRunnerTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/ClassicBundleManagerTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/FutureCollectorImplTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/KeyedTimerDataTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/PortableBundleManagerTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaMetricsBundleProgressHandlerTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternalsTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactoryTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SdkHarnessDoFnRunnerTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/translation/ConfigGeneratorTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/translation/SamzaImpulseSystemTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/translation/TranslationContextTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/util/DoFnUtilsTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/util/FutureUtilsTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/util/InMemoryMetricsReporter.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/util/PipelineJsonRendererTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/util/PortableConfigUtilsTest.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/util/TestHashIdGenerator.java delete mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/util/WindowUtilsTest.java delete mode 100644 runners/samza/src/test/resources/ExpectedDag.json delete mode 100644 runners/samza/src/test/resources/log4j-test.properties delete mode 100644 sdks/go/pkg/beam/runners/samza/samza.go delete mode 100644 sdks/python/apache_beam/runners/portability/samza_runner_test.py diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index 081c266e43c7..594dbbc59b22 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -72,7 +72,7 @@ body: - label: "Component: Infrastructure" - label: "Component: Spark Runner" - label: "Component: Flink Runner" - - label: "Component: Samza Runner" + - label: "Component: Prism Runner" - label: "Component: Twister2 Runner" - label: "Component: Hazelcast Jet Runner" - label: "Component: Google Cloud Dataflow Runner" diff --git a/.github/ISSUE_TEMPLATE/failing_test.yml b/.github/ISSUE_TEMPLATE/failing_test.yml index 0699621e2b16..bc461771d4dc 100644 --- a/.github/ISSUE_TEMPLATE/failing_test.yml +++ b/.github/ISSUE_TEMPLATE/failing_test.yml @@ -78,7 +78,7 @@ body: - label: "Component: Infrastructure" - label: "Component: Spark Runner" - label: "Component: Flink Runner" - - label: "Component: Samza Runner" + - label: "Component: Prism Runner" - label: "Component: Twister2 Runner" - label: "Component: Hazelcast Jet Runner" - label: "Component: Google Cloud Dataflow Runner" diff --git a/.github/ISSUE_TEMPLATE/feature.yml b/.github/ISSUE_TEMPLATE/feature.yml index 049c0049d29a..abde43ea7302 100644 --- a/.github/ISSUE_TEMPLATE/feature.yml +++ b/.github/ISSUE_TEMPLATE/feature.yml @@ -66,7 +66,7 @@ body: - label: "Component: Infrastructure" - label: "Component: Spark Runner" - label: "Component: Flink Runner" - - label: "Component: Samza Runner" + - label: "Component: Prism Runner" - label: "Component: Twister2 Runner" - label: "Component: Hazelcast Jet Runner" - label: "Component: Google Cloud Dataflow Runner" diff --git a/.github/ISSUE_TEMPLATE/task.yml b/.github/ISSUE_TEMPLATE/task.yml index 25d7e55e4e90..c02ff781ad44 100644 --- a/.github/ISSUE_TEMPLATE/task.yml +++ b/.github/ISSUE_TEMPLATE/task.yml @@ -67,7 +67,7 @@ body: - label: "Component: Infrastructure" - label: "Component: Spark Runner" - label: "Component: Flink Runner" - - label: "Component: Samza Runner" + - label: "Component: Prism Runner" - label: "Component: Twister2 Runner" - label: "Component: Hazelcast Jet Runner" - label: "Component: Google Cloud Dataflow Runner" diff --git a/.github/autolabeler.yml b/.github/autolabeler.yml index 6cd7516e7440..0eb9a04b6d2c 100644 --- a/.github/autolabeler.yml +++ b/.github/autolabeler.yml @@ -89,6 +89,5 @@ io: ["sdks/go/pkg/beam/io/**/*", "sdks/java/io/**/*", "sdks/python/apache_beam/ "local": ["runners/local-java/**/*"] "portability": ["runners/portability/**/*"] "prism": ["runners/prism/**/*", "sdks/go/pkg/beam/runners/prism/**/*", "sdks/go/cmd/prism/**/*", "sdks/python/apache_beam/runners/portability/prism_runner.py","sdks/python/apache_beam/runners/portability/prism_runner_test.py"] -"samza": ["runners/samza/**/*"] "spark": ["runners/spark/**/*", "sdks/go/pkg/beam/runners/spark/**/*"] "twister2": ["runners/twister2/**/*"] diff --git a/.github/issue-rules.yml b/.github/issue-rules.yml index c4acb2945575..4fafb1c87f78 100644 --- a/.github/issue-rules.yml +++ b/.github/issue-rules.yml @@ -52,8 +52,8 @@ rules: addLabels: ['spark'] - contains: '[x] Component: Flink' addLabels: ['flink'] -- contains: '[x] Component: Samza' - addLabels: ['samza'] +- contains: '[x] Component: Prism' + addLabels: ['prism'] - contains: '[x] Component: Twister2' addLabels: ['twister2'] - contains: '[x] Component: Hazelcast' diff --git a/.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Samza.json b/.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Samza.json deleted file mode 100644 index db03186ab405..000000000000 --- a/.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Samza.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "https://github.com/apache/beam/pull/34902": "Introducing OutputBuilder", - "https://github.com/apache/beam/pull/35177": "Introducing WindowedValueReceiver to runners", - "comment": "Modify this file in a trivial way to cause this test suite to run", - "https://github.com/apache/beam/pull/31156": "noting that PR #31156 should run this test", - "https://github.com/apache/beam/pull/31270": "re-add specialized Samza translation of Redistribute", - "https://github.com/apache/beam/pull/35159": "moving WindowedValue and making an interface" -} diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 9e685a983278..6708c909fd80 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -324,7 +324,6 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | N/A |`beam_PostCommit_Go.json`| [![.github/workflows/beam_PostCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml?query=event%3Aschedule) | | [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`beam_PostCommit_Go_Dataflow_ARM.json`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml?query=event%3Aschedule) | | [ PostCommit Go VR Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | N/A |`beam_PostCommit_Go_VR_Flink.json`| [![.github/workflows/beam_PostCommit_Go_VR_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml?query=event%3Aschedule) | -| [ PostCommit Go VR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | N/A |`beam_PostCommit_Go_VR_Samza.json`| [![.github/workflows/beam_PostCommit_Go_VR_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml?query=event%3Aschedule) | | [ PostCommit Go VR Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | N/A |`beam_PostCommit_Go_VR_Spark.json`| [![.github/workflows/beam_PostCommit_Go_VR_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml?query=event%3Aschedule) | | [ PostCommit Java Avro Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | N/A |`beam_PostCommit_Java_Avro_Versions.json`| [![.github/workflows/beam_PostCommit_Java_Avro_Versions.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml?query=event%3Aschedule) | | [ PostCommit Java BigQueryEarlyRollout ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_BigQueryEarlyRollout.yml) | N/A |`beam_PostCommit_Java_BigQueryEarlyRollout.json`| [![.github/workflows/beam_PostCommit_Java_BigQueryEarlyRollout.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_BigQueryEarlyRollout.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_BigQueryEarlyRollout.yml?query=event%3Aschedule) | @@ -355,7 +354,6 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit Java Nexmark Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml) | N/A |`beam_PostCommit_Java_Nexmark_Spark.json`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml?query=event%3Aschedule) | | [ PostCommit Java PVR Flink Batch ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Batch.yml) | N/A |`beam_PostCommit_Java_PVR_Flink_Batch.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Flink_Batch.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Batch.yml?query=event%3Aschedule) | | [ PostCommit Java PVR Flink Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml) | N/A |`beam_PostCommit_Java_PVR_Flink_Streaming.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml?query=event%3Aschedule) | -| [ PostCommit Java PVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml) | N/A |`beam_PostCommit_Java_PVR_Samza.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml?query=event%3Aschedule) | | [ PostCommit Java SingleStoreIO IT ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml) | N/A |`beam_PostCommit_Java_SingleStoreIO_IT.json`| [![.github/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml?query=event%3Aschedule) | | [ PostCommit Java PVR Spark3 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml) | N/A |`beam_PostCommit_Java_PVR_Spark3_Streaming.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml?query=event%3Aschedule) | | [ PostCommit Java PVR Spark Batch ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml) | N/A |`beam_PostCommit_Java_PVR_Spark_Batch.json`| [![.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml?query=event%3Aschedule) | @@ -371,7 +369,6 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit Java ValidatesRunner Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Direct.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner Flink Java8 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java8.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Flink_Java8.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java8.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java8.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java8.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Flink.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml?query=event%3Aschedule) | -| [ PostCommit Java ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Samza.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner Spark Java8 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java8.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Spark_Java8.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java8.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java8.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java8.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_Spark.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml?query=event%3Aschedule) | | [ PostCommit Java ValidatesRunner SparkStructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | N/A |`beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.json`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml?query=event%3Aschedule) | @@ -394,7 +391,6 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit Python ValidatesContainer Dataflow With RC ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml) | ['3.9','3.10','3.11','3.12'] |`beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.json`| [![.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml?query=event%3Aschedule) | | [ PostCommit Python ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml) | ['3.9','3.11','3.12'] |`beam_PostCommit_Python_ValidatesRunner_Dataflow.json`| [![.github/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml?query=event%3Aschedule) | | [ PostCommit Python ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | ['3.9','3.11','3.12'] |`beam_PostCommit_Python_ValidatesRunner_Flink.json`| [![.github/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml?query=event%3Aschedule) | -| [ PostCommit Python ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | ['3.9','3.11','3.12'] |`beam_PostCommit_Python_ValidatesRunner_Samza.json`| [![.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml?query=event%3Aschedule) | | [ PostCommit Python ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml) | ['3.9','3.10','3.11','3.12'] |`beam_PostCommit_Python_ValidatesRunner_Spark.json`| [![.github/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml?query=event%3Aschedule) | | [ PostCommit Python Xlang Gcp Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml) | N/A |`beam_PostCommit_Python_Xlang_Gcp_Dataflow.json`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml?query=event%3Aschedule) | | [ PostCommit Python Xlang Gcp Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml) | N/A |`beam_PostCommit_Python_Xlang_Gcp_Direct.json`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml?query=event%3Aschedule) | @@ -408,7 +404,6 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit XVR JavaUsingPython Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml) | N/A |`beam_PostCommit_XVR_JavaUsingPython_Dataflow.json`| [![.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml?query=event%3Aschedule) | | [ PostCommit XVR PythonUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml) | N/A |`beam_PostCommit_XVR_PythonUsingJava_Dataflow.json`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml?query=event%3Aschedule) | | [ PostCommit XVR PythonUsingJavaSQL Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | N/A |`beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.json`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml?query=event%3Aschedule) | -| [ PostCommit XVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | N/A |`beam_PostCommit_XVR_Samza.json`| [![.github/workflows/beam_PostCommit_XVR_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml?query=event%3Aschedule) | | [ PostCommit XVR Spark3 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | N/A |`beam_PostCommit_XVR_Spark3.json`| [![.github/workflows/beam_PostCommit_XVR_Spark3.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml?query=event%3Aschedule) | | [ PostCommit YAML Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml?query=event%3Aschedule) | | [ Python Validates Container Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml) | ['3.9','3.10','3.11','3.12'] |`beam_Python_ValidatesContainer_Dataflow_ARM.json`|[![.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml?query=event%3Aschedule) | diff --git a/.github/workflows/beam_PostCommit_Go_VR_Samza.yml b/.github/workflows/beam_PostCommit_Go_VR_Samza.yml deleted file mode 100644 index ac6f2dd5ed85..000000000000 --- a/.github/workflows/beam_PostCommit_Go_VR_Samza.yml +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: PostCommit Go VR Samza - -on: - schedule: - - cron: '30 3/6 * * *' - pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Go_VR_Samza.json'] - workflow_dispatch: - -#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event -permissions: - actions: write - pull-requests: read - checks: read - contents: read - deployments: read - id-token: none - issues: read - discussions: read - packages: read - pages: read - repository-projects: read - security-events: read - statuses: read - -# This allows a subsequently queued workflow run to interrupt previous runs -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' - cancel-in-progress: true - -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - -jobs: - beam_PostCommit_Go_VR_Samza: - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') || - github.event.comment.body == 'Run Go Samza ValidatesRunner' - runs-on: [self-hosted, ubuntu-24.04, main] - timeout-minutes: 100 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - strategy: - matrix: - job_name: ["beam_PostCommit_Go_VR_Samza"] - job_phrase: ["Run Go Samza ValidatesRunner"] - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - # TODO(https://github.com/apache/beam/issues/32208) move to Java11 after bump to Samza 1.8 - with: - java-version: | - 8 - 11 - - name: run Go Samza ValidatesRunner script - env: - CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: :sdks:go:test:samzaValidatesRunner -Pjava8Home=$JAVA_HOME_8_X64 -PtestJavaVersion=8 diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml b/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml deleted file mode 100644 index dd28b4555e5f..000000000000 --- a/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml +++ /dev/null @@ -1,104 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: PostCommit Java PVR Samza - -on: - schedule: - - cron: '15 4/6 * * *' - pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Java_PVR_Samza.json'] - workflow_dispatch: - -# This allows a subsequently queued workflow run to interrupt previous runs -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' - cancel-in-progress: true - -#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event -permissions: - actions: write - pull-requests: write - checks: write - contents: read - deployments: read - id-token: none - issues: write - discussions: read - packages: read - pages: read - repository-projects: read - security-events: read - statuses: read - -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - -jobs: - beam_PostCommit_Java_PVR_Samza: - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - runs-on: [self-hosted, ubuntu-24.04, main] - timeout-minutes: 120 - strategy: - matrix: - job_name: [beam_PostCommit_Java_PVR_Samza] - job_phrase: [Run Java Samza PortableValidatesRunner] - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') || - github.event.comment.body == 'Run Java Samza PortableValidatesRunner' - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - # TODO(https://github.com/apache/beam/issues/32208) move to Java11 after bump to Samza 1.8 - with: - java-version: | - 8 - 11 - - name: run PostCommit Java Samza script - env: - CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: :runners:samza:job-server:validatesPortableRunner - arguments: | - -PtestJavaVersion=8 \ - -Pjava8Home=$JAVA_HOME_8_X64 \ - - name: Archive JUnit Test Results - uses: actions/upload-artifact@v4 - if: ${{ !success() }} - with: - name: JUnit Test Results - path: "**/build/reports/tests/" - - name: Publish JUnit Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/build/test-results/**/*.xml' - large_files: true \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml deleted file mode 100644 index 7f65b2ccee33..000000000000 --- a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml +++ /dev/null @@ -1,100 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: PostCommit Java ValidatesRunner Samza - -on: - schedule: - - cron: '45 4/6 * * *' - pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Samza.json'] - workflow_dispatch: - -#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event -permissions: - actions: write - pull-requests: write - checks: write - contents: read - deployments: read - id-token: none - issues: write - discussions: read - packages: read - pages: read - repository-projects: read - security-events: read - statuses: read - -# This allows a subsequently queued workflow run to interrupt previous runs -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' - cancel-in-progress: true - -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - -jobs: - beam_PostCommit_Java_ValidatesRunner_Samza: - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - runs-on: [self-hosted, ubuntu-24.04, main] - timeout-minutes: 100 - strategy: - matrix: - job_name: [beam_PostCommit_Java_ValidatesRunner_Samza] - job_phrase: [Run Samza ValidatesRunner] - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') || - github.event.comment.body == 'Run Samza ValidatesRunner' - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - # TODO(https://github.com/apache/beam/issues/32208) move to Java11 after bump to Samza 1.8 - with: - java-version: | - 8 - 11 - - name: run validatesRunner script - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: :runners:samza:validatesRunner - arguments: | - -PtestJavaVersion=8 \ - -Pjava8Home=$JAVA_HOME_8_X64 \ - - name: Archive JUnit Test Results - uses: actions/upload-artifact@v4 - if: ${{ !success() }} - with: - name: JUnit Test Results - path: "**/build/reports/tests/" - - name: Publish JUnit Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/build/test-results/**/*.xml' - large_files: true \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml deleted file mode 100644 index 144de32d2be6..000000000000 --- a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml +++ /dev/null @@ -1,110 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: PostCommit Python ValidatesRunner Samza - -on: - schedule: - - cron: '15 5/6 * * *' - pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Python_ValidatesRunner_Samza.json'] - workflow_dispatch: - -#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event -permissions: - actions: write - pull-requests: write - checks: write - contents: read - deployments: read - id-token: none - issues: write - discussions: read - packages: read - pages: read - repository-projects: read - security-events: read - statuses: read - -# This allows a subsequently queued workflow run to interrupt previous runs -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' - cancel-in-progress: true - -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - -jobs: - beam_PostCommit_Python_ValidatesRunner_Samza: - if: | - (github.event_name == 'schedule' && github.repository == 'apache/beam') || - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - startsWith(github.event.comment.body, 'Run Python Samza ValidatesRunner') - runs-on: [self-hosted, ubuntu-24.04, main] - timeout-minutes: 100 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - strategy: - fail-fast: false - matrix: - job_name: ["beam_PostCommit_Python_ValidatesRunner_Samza"] - job_phrase: ["Run Python Samza ValidatesRunner"] - python_version: ['3.10', '3.14'] - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - # TODO(https://github.com/apache/beam/issues/32208) move to Java11 after bump to Samza 1.8 - with: - java-version: | - 8 - 11 - python-version: ${{ matrix.python_version }} - - name: Set PY_VER_CLEAN - id: set_py_ver_clean - run: | - PY_VER=${{ matrix.python_version }} - PY_VER_CLEAN=${PY_VER//.} - echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT - - name: Run samzaValidatesRunner script - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: :sdks:python:test-suites:portable:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:samzaValidatesRunner - arguments: | - -PpythonVersion=${{ matrix.python_version }} \ - -PtestJavaVersion=8 \ - -Pjava8Home=$JAVA_HOME_8_X64 \ - - name: Archive Python Test Results - uses: actions/upload-artifact@v4 - if: failure() - with: - name: Python Test Results - path: '**/pytest*.xml' - - name: Publish Python Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/pytest*.xml' - large_files: true \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Samza.yml b/.github/workflows/beam_PostCommit_XVR_Samza.yml deleted file mode 100644 index 6b6905f6ce72..000000000000 --- a/.github/workflows/beam_PostCommit_XVR_Samza.yml +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: PostCommit XVR Samza - -on: - schedule: - - cron: '45 5/6 * * *' - pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_XVR_Samza.json'] - workflow_dispatch: - -#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event -permissions: - actions: write - pull-requests: write - checks: write - contents: read - deployments: read - id-token: none - issues: read - discussions: read - packages: read - pages: read - repository-projects: read - security-events: read - statuses: read - -# This allows a subsequently queued workflow run to interrupt previous runs -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' - cancel-in-progress: true - -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - -jobs: - beam_PostCommit_XVR_Samza: - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') || - github.event.comment.body == 'Run XVR_Samza PostCommit' - runs-on: [self-hosted, ubuntu-24.04, main] - timeout-minutes: 100 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - strategy: - matrix: - job_name: ["beam_PostCommit_XVR_Samza"] - job_phrase: ["Run XVR_Samza PostCommit"] - python_version: ['3.10','3.14'] - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - # TODO(https://github.com/apache/beam/issues/32208) move to Java11 after bump to Samza 1.8 - with: - java-version: | - 8 - 11 - python-version: | - ${{ matrix.python_version }} - - name: run PostCommit XVR Samza script - env: - CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: :runners:samza:job-server:validatesCrossLanguageRunner - arguments: | - -PpythonVersion=${{ matrix.python_version }} \ - -PtestJavaVersion=8 \ - -Pjava8Home=$JAVA_HOME_8_X64 \ - -PskipNonPythonTask=${{ (matrix.python_version == '3.10' && true) || false }} \ - - name: Archive JUnit Test Results - uses: actions/upload-artifact@v4 - if: ${{ !success() }} - with: - name: JUnit Test Results - path: "**/build/reports/tests/" - - name: Publish JUnit Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/build/test-results/**/*.xml' - large_files: true \ No newline at end of file diff --git a/.test-infra/BUILD_STATUS.md b/.test-infra/BUILD_STATUS.md deleted file mode 100644 index 1972076df1e2..000000000000 --- a/.test-infra/BUILD_STATUS.md +++ /dev/null @@ -1,390 +0,0 @@ - - - -`ValidatesRunner` compliance status (on master branch) --------------------------------------------------------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LangULRDataflowFlinkSamzaSparkTwister2
Go--- - - Build Status - - - - Build Status - - - - Build Status - - - - Build Status - - ---
Java - - Build Status - - - - Build Status -
- - Build Status -
- - Build Status -
- - Build Status -
- - Build Status -
-
- - Build Status -
- - Build Status -
- - Build Status -
- - Build Status - -
- - Build Status -
- - Build Status - -
- - Build Status -
- - Build Status -
- - Build Status - -
- - Build Status - -
Python--- - - Build Status -
- - Build Status -
- - Build Status - -
- - Build Status -
- - Build Status - -
- - Build Status - - - - Build Status - - ---
XLang - - Build Status - - - - Build Status -
- - Build Status -
- - Build Status -
-
- - Build Status - - - - Build Status - - - - Build Status - - ---
- -Examples testing status on various runners --------------------------------------------------------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LangULRDataflowFlinkSamzaSparkTwister2
Go---------------------
Java--- - - Build Status -
- - Build Status -
- - Build Status -
-
---------------
Python---------------------
XLang---------------------
- -Post-Commit SDK/Transform Integration Tests Status (on master branch) ------------------------------------------------------------------------------------------------- - - - - - - - - - - - - - - - - -
GoJavaPython
- - Build Status - - - - Build Status - - - - Build Status -
- - Build Status -
- - Build Status - -
- -Pre-Commit Tests Status (on master branch) ------------------------------------------------------------------------------------------------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
---JavaPythonGoWebsiteWhitespaceTypescript
Non-portable - - Build Status -
-
- - Build Status -
- - Build Status -
- - Build Status -
- - Build Status - -
- - Build Status - - - - Build Status - - - - Build Status - - - - Build Status - -
Portable--- - - Build Status - - - - Build Status - - ---------
- -See [.test-infra/jenkins/README](https://github.com/apache/beam/blob/master/.test-infra/jenkins/README.md) for trigger phrase, status and link of all Jenkins jobs. - - -GitHub Actions Tests Status (on master branch) ------------------------------------------------------------------------------------------------- -[![Build python source distribution and wheels](https://github.com/apache/beam/workflows/Build%20python%20source%20distribution%20and%20wheels/badge.svg?branch=master&event=schedule)](https://github.com/apache/beam/actions?query=workflow%3A%22Build+python+source+distribution+and+wheels%22+branch%3Amaster+event%3Aschedule) -[![Python tests](https://github.com/apache/beam/workflows/Python%20tests/badge.svg?branch=master&event=schedule)](https://github.com/apache/beam/actions?query=workflow%3A%22Python+Tests%22+branch%3Amaster+event%3Aschedule) -[![Java tests](https://github.com/apache/beam/workflows/Java%20Tests/badge.svg?branch=master&event=schedule)](https://github.com/apache/beam/actions?query=workflow%3A%22Java+Tests%22+branch%3Amaster+event%3Aschedule) - -See [CI.md](https://github.com/apache/beam/blob/master/CI.md) for more information about GitHub Actions CI. diff --git a/.test-infra/validate-runner/src/main/resources/configuration.yaml b/.test-infra/validate-runner/src/main/resources/configuration.yaml index 31934d2146d7..4eb8cd018f99 100644 --- a/.test-infra/validate-runner/src/main/resources/configuration.yaml +++ b/.test-infra/validate-runner/src/main/resources/configuration.yaml @@ -19,6 +19,6 @@ batch: - dataflow: beam_PostCommit_Java_VR_Dataflow_V2 stream: - flink: beam_PostCommit_Java_PVR_Flink_Streaming - - samza: beam_PostCommit_Java_PVR_Samza + - spark: beam_PostCommit_Java_PVR_Spark3_Streaming server: https://ci-beam.apache.org/ jsonapi: testReport/api/json diff --git a/CHANGES.md b/CHANGES.md index bdcbd3451c7b..dc2a94f6dd02 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -82,6 +82,7 @@ ## Deprecations * X behavior is deprecated and will be removed in X versions ([#X](https://github.com/apache/beam/issues/X)). +* Removed Samza Runner support ([#35448](https://github.com/apache/beam/issues/35448)). ## Bugfixes diff --git a/build.gradle.kts b/build.gradle.kts index 2ccab281b49c..f6e97d19487e 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -283,8 +283,6 @@ tasks.register("javaPreCommit") { dependsOn(":runners:local-java:build") dependsOn(":runners:portability:java:build") dependsOn(":runners:prism:java:build") - dependsOn(":runners:samza:build") - dependsOn(":runners:samza:job-server:build") dependsOn(":runners:spark:3:build") dependsOn(":runners:spark:3:job-server:build") dependsOn(":runners:twister2:build") @@ -412,7 +410,6 @@ tasks.register("javaPostCommit") { } tasks.register("javaPostCommitSickbay") { - dependsOn(":runners:samza:validatesRunnerSickbay") for (version in project.ext.get("allFlinkVersions") as Array<*>) { dependsOn(":runners:flink:${version}:validatesRunnerSickbay") } diff --git a/runners/samza/build.gradle b/runners/samza/build.gradle deleted file mode 100644 index 626588b79a5d..000000000000 --- a/runners/samza/build.gradle +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import groovy.json.JsonOutput - -plugins { id 'org.apache.beam.module' } -applyJavaNature( - exportJavadoc: false, - classesTriggerCheckerBugs: [ - 'GroupWithoutRepartition': 'https://github.com/typetools/checker-framework/issues/3791', - ], - automaticModuleName: 'org.apache.beam.runners.samza', -) - -description = "Apache Beam :: Runners :: Samza" - -/* - * We need to rely on manually specifying these evaluationDependsOn to ensure that - * the following projects are evaluated before we evaluate this project. This is because - * we are attempting to reference the "sourceSets.test.output" directly. - */ -evaluationDependsOn(":sdks:java:core") - -configurations { - validatesRunner -} - -def samza_version = "1.6.0" - -dependencies { - implementation library.java.vendored_guava_32_1_2_jre - implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation project(":runners:core-java") - implementation project(":runners:java-fn-execution") - implementation project(":runners:java-job-service") - implementation library.java.jackson_annotations - implementation library.java.slf4j_api - implementation library.java.joda_time - implementation library.java.args4j - implementation library.java.commons_io - implementation library.java.commons_collections - runtimeOnly "org.rocksdb:rocksdbjni:6.15.2" - runtimeOnly "org.scala-lang:scala-library:2.11.8" - implementation "org.apache.samza:samza-api:$samza_version" - implementation "org.apache.samza:samza-core_2.11:$samza_version" - runtimeOnly "org.apache.samza:samza-kafka_2.11:$samza_version" - runtimeOnly "org.apache.samza:samza-kv_2.11:$samza_version" - implementation "org.apache.samza:samza-kv-rocksdb_2.11:$samza_version" - implementation "org.apache.samza:samza-kv-inmemory_2.11:$samza_version" - implementation "org.apache.samza:samza-yarn_2.11:$samza_version" - compileOnly library.java.error_prone_annotations - runtimeOnly "org.apache.kafka:kafka-clients:2.0.1" - implementation library.java.vendored_grpc_1_69_0 - implementation project(path: ":model:fn-execution", configuration: "shadow") - implementation project(path: ":model:job-management", configuration: "shadow") - implementation project(path: ":model:pipeline", configuration: "shadow") - testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") - testImplementation project(path: ":runners:core-java", configuration: "testRuntimeMigration") - testImplementation library.java.hamcrest - testImplementation library.java.junit - testImplementation library.java.mockito_core - testImplementation library.java.jackson_dataformat_yaml - testImplementation library.java.google_code_gson - validatesRunner project(path: ":sdks:java:core", configuration: "shadowTest") - validatesRunner project(path: ":runners:core-java", configuration: "testRuntimeMigration") - validatesRunner project(project.path) -} - -configurations.all { - exclude group: "org.slf4j", module: "slf4j-jdk14" -} - -def sickbayTests = [ - // TODO(https://github.com/apache/beam/issues/21033) - 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testInGlobalWindowBatchSizeByteSizeFn', - 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testInStreamingMode', - 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testWithShardedKeyInGlobalWindow', - // TODO(https://github.com/apache/beam/issues/21036) - 'org.apache.beam.sdk.transforms.MapElementsTest.testMapSimpleFunction', - // TODO(https://github.com/apache/beam/issues/21035) - 'org.apache.beam.sdk.transforms.ViewTest.testEmptySingletonSideInput', - 'org.apache.beam.sdk.transforms.ViewTest.testNonSingletonSideInput', - // TODO(https://github.com/apache/beam/issues/21037) - 'org.apache.beam.sdk.transforms.WithTimestampsTest.withTimestampsBackwardsInTimeShouldThrow', - 'org.apache.beam.sdk.transforms.WithTimestampsTest.withTimestampsWithNullTimestampShouldThrow', - // TODO(https://github.com/apache/beam/issues/21039) - 'org.apache.beam.sdk.io.FileIOTest*', - // TODO(https://github.com/apache/beam/issues/21038) - 'org.apache.beam.sdk.io.AvroIOTest*', - // TODO(https://github.com/apache/beam/issues/21040) - 'org.apache.beam.sdk.PipelineTest.testEmptyPipeline', - // TODO(https://github.com/apache/beam/issues/21041) - 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testEncodingNPException', - 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testEncodingIOException', - 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testDecodingNPException', - 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testDecodingIOException', - // https://github.com/apache/beam/issues/19344 - 'org.apache.beam.sdk.io.BoundedReadFromUnboundedSourceTest.testTimeBound', - // https://github.com/apache/beam/issues/31725 - 'org.apache.beam.sdk.io.TextIOWriteTest.testWriteUnboundedWithCustomBatchParameters', -] -tasks.register("validatesRunner", Test) { - group = "Verification" - description "Validates Samza runner" - systemProperty "beamTestPipelineOptions", JsonOutput.toJson([ - "--runner=TestSamzaRunner", - ]) - - classpath = configurations.validatesRunner - testClassesDirs = files(project(":sdks:java:core").sourceSets.test.output.classesDirs) - useJUnit { - includeCategories 'org.apache.beam.sdk.testing.NeedsRunner' - includeCategories 'org.apache.beam.sdk.testing.ValidatesRunner' - excludeCategories 'org.apache.beam.sdk.testing.UsesExternalService' - // Should be run only in a properly configured SDK harness environment - excludeCategories 'org.apache.beam.sdk.testing.UsesSdkHarnessEnvironment' - excludeCategories 'org.apache.beam.sdk.testing.UsesUnboundedSplittableParDo' - excludeCategories 'org.apache.beam.sdk.testing.UsesSchema' - excludeCategories 'org.apache.beam.sdk.testing.LargeKeys$Above100MB' - excludeCategories 'org.apache.beam.sdk.testing.UsesAttemptedMetrics' - excludeCategories 'org.apache.beam.sdk.testing.UsesCommittedMetrics' - excludeCategories 'org.apache.beam.sdk.testing.UsesTestStreamWithProcessingTime' - excludeCategories 'org.apache.beam.sdk.testing.UsesMetricsPusher' - excludeCategories 'org.apache.beam.sdk.testing.UsesParDoLifecycle' - excludeCategories 'org.apache.beam.sdk.testing.UsesProcessingTimeTimers' - excludeCategories 'org.apache.beam.sdk.testing.UsesStrictTimerOrdering' - excludeCategories 'org.apache.beam.sdk.testing.UsesOnWindowExpiration' - excludeCategories 'org.apache.beam.sdk.testing.UsesOrderedListState' - excludeCategories 'org.apache.beam.sdk.testing.UsesMultimapState' - excludeCategories 'org.apache.beam.sdk.testing.UsesBundleFinalizer' - excludeCategories 'org.apache.beam.sdk.testing.UsesLoopingTimer' - excludeCategories 'org.apache.beam.sdk.testing.UsesTriggeredSideInputs' - } - filter { - for (String test : sickbayTests) { - excludeTestsMatching test - } - // TODO(BEAM-10025) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testOutputTimestampDefaultUnbounded' - // TODO(https://github.com/apache/beam/issues/20703) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testOutputTimestamp' - // TODO(https://github.com/apache/beam/issues/20703) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testRelativeTimerWithOutputTimestamp' - // TODO(https://github.com/apache/beam/issues/20847) - excludeTestsMatching 'org.apache.beam.sdk.testing.TestStreamTest.testFirstElementLate' - // TODO(https://github.com/apache/beam/issues/20846) - excludeTestsMatching 'org.apache.beam.sdk.testing.TestStreamTest.testLateDataAccumulating' - - // These tests fail since there is no support for side inputs in Samza's unbounded splittable DoFn integration - excludeTestsMatching 'org.apache.beam.sdk.transforms.SplittableDoFnTest.testWindowedSideInputWithCheckpointsUnbounded' - excludeTestsMatching 'org.apache.beam.sdk.transforms.SplittableDoFnTest.testSideInputUnbounded' - excludeTestsMatching 'org.apache.beam.sdk.transforms.SplittableDoFnTest.testWindowedSideInputUnbounded' - // These tests produce the output but either the pipeline doesn't shutdown or PAssert fails - excludeTestsMatching 'org.apache.beam.sdk.transforms.SplittableDoFnTest.testAdditionalOutputUnbounded' - excludeTestsMatching 'org.apache.beam.sdk.transforms.SplittableDoFnTest.testPairWithIndexBasicUnbounded' - excludeTestsMatching 'org.apache.beam.sdk.transforms.SplittableDoFnTest.testPairWithIndexWindowedTimestampedUnbounded' - excludeTestsMatching 'org.apache.beam.sdk.transforms.SplittableDoFnTest.testOutputAfterCheckpointUnbounded' - } -} - -tasks.register("validatesRunnerSickbay", Test) { - group = "Verification" - description "Validates Samza runner (Sickbay Tests)" - systemProperty "beamTestPipelineOptions", JsonOutput.toJson([ - "--runner=TestSamzaRunner", - ]) - - classpath = configurations.validatesRunner - testClassesDirs = files(project(":sdks:java:core").sourceSets.test.output.classesDirs) - - filter { - for (String test : sickbayTests) { - includeTestsMatching test - } - } -} - -// Generates :runners:samza:runQuickstartJavaSamza -createJavaExamplesArchetypeValidationTask(type: 'Quickstart', runner:'Samza') diff --git a/runners/samza/job-server/build.gradle b/runners/samza/job-server/build.gradle deleted file mode 100644 index 7ffb2becd6d0..000000000000 --- a/runners/samza/job-server/build.gradle +++ /dev/null @@ -1,253 +0,0 @@ -import org.apache.beam.gradle.BeamModulePlugin - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -apply plugin: 'org.apache.beam.module' -apply plugin: 'application' -// we need to set mainClassName before applying shadow plugin -mainClassName = "org.apache.beam.runners.samza.SamzaJobServerDriver" - -applyJavaNature( - automaticModuleName: 'org.apache.beam.runners.samza.jobserver', - archivesBaseName: project.hasProperty('archives_base_name') ? archives_base_name : archivesBaseName, - validateShadowJar: false, - exportJavadoc: false, - shadowClosure: { - append "reference.conf" - }, -) - -def samzaRunnerProject = project.parent.path - -description = "Apache Beam :: Runners :: Samza :: Job Server" - -configurations { - validatesPortableRunner -} - -dependencies { - implementation project(samzaRunnerProject) - permitUnusedDeclared project(samzaRunnerProject) - runtimeOnly group: "org.slf4j", name: "jcl-over-slf4j", version: dependencies.create(project.library.java.slf4j_api).getVersion() - validatesPortableRunner project(path: samzaRunnerProject, configuration: "testRuntimeMigration") - validatesPortableRunner project(path: ":sdks:java:core", configuration: "shadowTest") - validatesPortableRunner project(path: ":runners:core-java", configuration: "testRuntimeMigration") - validatesPortableRunner project(path: ":runners:portability:java", configuration: "testRuntimeMigration") - runtimeOnly library.java.slf4j_simple -} - -runShadow { - args = [] -} - -def portableValidatesRunnerTask(String name, boolean docker) { - def tempDir = File.createTempDir() - def pipelineOptions = [ - "--configOverride={\"job.non-logged.store.base.dir\":\"" + tempDir + "\"}" - ] - createPortableValidatesRunnerTask( - name: "validatesPortableRunner${name}", - jobServerDriver: "org.apache.beam.runners.samza.SamzaJobServerDriver", - jobServerConfig: "--job-host=localhost,--job-port=0,--artifact-port=0,--expansion-port=0", - testClasspathConfiguration: configurations.validatesPortableRunner, - numParallelTests: 1, - pipelineOpts: pipelineOptions, - environment: docker ? BeamModulePlugin.PortableValidatesRunnerConfiguration.Environment.DOCKER : BeamModulePlugin.PortableValidatesRunnerConfiguration.Environment.EMBEDDED, - testCategories: { - if (docker) { - includeCategories 'org.apache.beam.sdk.testing.UsesSdkHarnessEnvironment' - return - } - // TODO(https://github.com/apache/beam/issues/22657) - // includeCategories 'org.apache.beam.sdk.testing.NeedsRunner' - includeCategories 'org.apache.beam.sdk.testing.ValidatesRunner' - // Should be run only in a properly configured SDK harness environment - excludeCategories 'org.apache.beam.sdk.testing.UsesSdkHarnessEnvironment' - // TODO: BEAM-12350 - excludeCategories 'org.apache.beam.sdk.testing.UsesAttemptedMetrics' - // TODO: BEAM-12681 - excludeCategories 'org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders' - // Larger keys are possible, but they require more memory. - excludeCategories 'org.apache.beam.sdk.testing.LargeKeys$Above10MB' - excludeCategories 'org.apache.beam.sdk.testing.UsesCommittedMetrics' - excludeCategories 'org.apache.beam.sdk.testing.UsesExternalService' - excludeCategories 'org.apache.beam.sdk.testing.UsesCustomWindowMerging' - excludeCategories 'org.apache.beam.sdk.testing.UsesFailureMessage' - excludeCategories 'org.apache.beam.sdk.testing.UsesGaugeMetrics' - excludeCategories 'org.apache.beam.sdk.testing.UsesMapState' - excludeCategories 'org.apache.beam.sdk.testing.UsesMultimapState' - excludeCategories 'org.apache.beam.sdk.testing.UsesSetState' - excludeCategories 'org.apache.beam.sdk.testing.UsesOrderedListState' - excludeCategories 'org.apache.beam.sdk.testing.UsesStrictTimerOrdering' - excludeCategories 'org.apache.beam.sdk.testing.UsesOnWindowExpiration' - excludeCategories 'org.apache.beam.sdk.testing.UsesBundleFinalizer' - excludeCategories 'org.apache.beam.sdk.testing.UsesOrderedListState' - excludeCategories 'org.apache.beam.sdk.testing.UsesBoundedSplittableParDo' - excludeCategories 'org.apache.beam.sdk.testing.UsesTestStreamWithProcessingTime' - // TODO(https://github.com/apache/beam/issues/21023) - excludeCategories 'org.apache.beam.sdk.testing.UsesTestStreamWithMultipleStages' - excludeCategories 'org.apache.beam.sdk.testing.UsesUnboundedSplittableParDo' - excludeCategories 'org.apache.beam.sdk.testing.UsesLoopingTimer' - excludeCategories 'org.apache.beam.sdk.testing.UsesTriggeredSideInputs' - }, - testFilter: { - // TODO(https://github.com/apache/beam/issues/21042) - excludeTestsMatching "org.apache.beam.sdk.transforms.FlattenTest.testFlattenWithDifferentInputAndOutputCoders2" - excludeTestsMatching "org.apache.beam.sdk.transforms.FlattenTest.testEmptyFlattenAsSideInput" - excludeTestsMatching "org.apache.beam.sdk.transforms.FlattenTest.testFlattenPCollectionsEmptyThenParDo" - excludeTestsMatching "org.apache.beam.sdk.transforms.FlattenTest.testFlattenPCollectionsEmpty" - // TODO(BEAM-10025) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testOutputTimestampDefaultUnbounded' - // TODO(https://github.com/apache/beam/issues/20703) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testOutputTimestamp' - // TODO(https://github.com/apache/beam/issues/20847) - excludeTestsMatching 'org.apache.beam.sdk.testing.TestStreamTest.testFirstElementLate' - // TODO(https://github.com/apache/beam/issues/20846) - excludeTestsMatching 'org.apache.beam.sdk.testing.TestStreamTest.testLateDataAccumulating' - // TODO(https://github.com/apache/beam/issues/21142) - excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupByKeyTest$WindowTests.testWindowFnPostMerging' - // TODO(https://github.com/apache/beam/issues/21143) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimestampTests.testParDoShiftTimestampInvalid' - // TODO(https://github.com/apache/beam/issues/21144) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimestampTests.testParDoShiftTimestampInvalidZeroAllowed' - // TODO(https://github.com/apache/beam/issues/32520) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoLifecycleTest.testTeardownCalledAfterExceptionIn*Stateful' - // TODO(https://github.com/apache/beam/issues/21145) - excludeTestsMatching 'org.apache.beam.sdk.transforms.DeduplicateTest.testEventTime' - // TODO(https://github.com/apache/beam/issues/21146) - excludeTestsMatching 'org.apache.beam.sdk.io.TFRecordIOTest.testReadInvalidRecord' - // TODO(https://github.com/apache/beam/issues/21147) - excludeTestsMatching 'org.apache.beam.sdk.io.TFRecordIOTest.testReadInvalidDataMask' - // TODO(https://github.com/apache/beam/issues/21148) - excludeTestsMatching 'org.apache.beam.sdk.io.TFRecordIOTest.testReadInvalidLengthMask' - // TODO(https://github.com/apache/beam/issues/21149) - excludeTestsMatching 'org.apache.beam.sdk.io.TextIOReadTest$CompressedReadTest.testCompressedReadWithoutExtension' - // TODO(https://github.com/apache/beam/issues/21150) - excludeTestsMatching 'org.apache.beam.sdk.io.WriteFilesTest.testWithRunnerDeterminedShardingUnbounded' - // TODO(https://github.com/apache/beam/issues/211505) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$MultipleInputsAndOutputTests.testParDoWritingToUndeclaredTag' - // TODO(https://github.com/apache/beam/issues/21152) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$MultipleInputsAndOutputTests.testParDoReadingFromUnknownSideInput' - // TODO(https://github.com/apache/beam/issues/21153) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ViewTest.testMapSideInputWithNullValuesCatchesDuplicates' - - // TODO(https://github.com/apache/beam/issues/21041) - excludeTestsMatching 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testEncodingNPException' - excludeTestsMatching 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testEncodingIOException' - excludeTestsMatching 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testDecodingNPException' - excludeTestsMatching 'org.apache.beam.sdk.coders.PCollectionCustomCoderTest.testDecodingIOException' - // TODO(https://github.com/apache/beam/issues/21040) - excludeTestsMatching 'org.apache.beam.sdk.PipelineTest.testEmptyPipeline' - // TODO(https://github.com/apache/beam/issues/21038) - excludeTestsMatching 'org.apache.beam.sdk.io.AvroIOTest*' - // TODO(https://github.com/apache/beam/issues/21039) - excludeTestsMatching 'org.apache.beam.sdk.io.FileIOTest*' - // TODO(https://github.com/apache/beam/issues/21037) - excludeTestsMatching 'org.apache.beam.sdk.transforms.WithTimestampsTest.withTimestampsBackwardsInTimeShouldThrow' - excludeTestsMatching 'org.apache.beam.sdk.transforms.WithTimestampsTest.withTimestampsWithNullTimestampShouldThrow' - // TODO(https://github.com/apache/beam/issues/21035) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ViewTest.testEmptySingletonSideInput' - excludeTestsMatching 'org.apache.beam.sdk.transforms.ViewTest.testNonSingletonSideInput' - // TODO(https://github.com/apache/beam/issues/21036) - excludeTestsMatching 'org.apache.beam.sdk.transforms.MapElementsTest.testMapSimpleFunction' - // TODO(https://github.com/apache/beam/issues/21033) - excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testInGlobalWindowBatchSizeByteSizeFn' - excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testInStreamingMode' - excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testWithShardedKeyInGlobalWindow' - excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testWithUnevenBatches' - excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupIntoBatchesTest.testInGlobalWindowBatchSizeByteSize' - // TODO(BEAM-10025) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testOutputTimestampDefaultUnbounded' - // TODO(https://github.com/apache/beam/issues/20703) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testOutputTimestamp' - // TODO(https://github.com/apache/beam/issues/20703) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimerTests.testRelativeTimerWithOutputTimestamp' - // TODO(BEAM-13498) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ParDoTest$TimestampTests.testProcessElementSkew' - // TODO(https://github.com/apache/beam/issues/22650) - excludeTestsMatching 'org.apache.beam.sdk.transforms.GroupByKeyTest$BasicTests.testAfterProcessingTimeContinuationTriggerUsingState' - // TODO(https://github.com/apache/beam/issues/29973) - excludeTestsMatching 'org.apache.beam.sdk.transforms.ReshuffleTest.testReshufflePreservesMetadata' - // TODO(https://github.com/apache/beam/issues/31231) - excludeTestsMatching 'org.apache.beam.sdk.transforms.RedistributeTest.testRedistributePreservesMetadata' - } - ) -} - -project.ext.validatesPortableRunnerDocker = portableValidatesRunnerTask("Docker", true) -project.ext.validatesPortableRunnerEmbedded = portableValidatesRunnerTask("Embedded", false) - -tasks.register("validatesPortableRunner") { - dependsOn validatesPortableRunnerDocker - dependsOn validatesPortableRunnerEmbedded -} - -def testJavaVersion = project.findProperty('testJavaVersion') -String testJavaHome = null -if (testJavaVersion) { - testJavaHome = project.findProperty("java${testJavaVersion}Home") -} - -def jobPort = BeamModulePlugin.getRandomPort() -def artifactPort = BeamModulePlugin.getRandomPort() - -def setupTask = project.tasks.register("samzaJobServerSetup", Exec) { - dependsOn shadowJar - def pythonDir = project.project(":sdks:python").projectDir - def samzaJobServerJar = shadowJar.archivePath - if (testJavaHome) { - environment "JAVA_HOME", testJavaHome - } - executable 'sh' - args '-c', "$pythonDir/scripts/run_job_server.sh stop --group_id ${project.name} && $pythonDir/scripts/run_job_server.sh start --group_id ${project.name} --job_port ${jobPort} --artifact_port ${artifactPort} --job_server_jar ${samzaJobServerJar}" -} - -def cleanupTask = project.tasks.register("samzaJobServerCleanup", Exec) { - def pythonDir = project.project(":sdks:python").projectDir - if (testJavaHome) { - environment "JAVA_HOME", testJavaHome - } - executable 'sh' - args '-c', "$pythonDir/scripts/run_job_server.sh stop --group_id ${project.name}" -} - -createCrossLanguageValidatesRunnerTask( - startJobServer: setupTask, - cleanupJobServer: cleanupTask, - classpath: configurations.validatesPortableRunner, - numParallelTests: 1, - pythonPipelineOptions: [ - "--runner=PortableRunner", - "--job_endpoint=localhost:${jobPort}", - "--environment_cache_millis=10000", - "--experiments=beam_fn_api", - ], - javaPipelineOptions: [ - "--runner=PortableRunner", - "--jobEndpoint=localhost:${jobPort}", - "--environmentCacheMillis=10000", - "--experiments=beam_fn_api", - "--customBeamRequirement=${project.project(":sdks:python").projectDir}/build/apache-beam.tar.gz", - ], - goScriptOptions: [ - "--runner samza", - "--tests \"./test/integration/xlang ./test/integration/io/xlang/...\"", - "--endpoint localhost:${jobPort}", - ], -) diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaExecutionContext.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaExecutionContext.java deleted file mode 100644 index 18bb098bf2cc..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaExecutionContext.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import org.apache.beam.runners.samza.metrics.SamzaMetricsContainer; -import org.apache.samza.context.ApplicationContainerContext; -import org.apache.samza.context.ApplicationContainerContextFactory; -import org.apache.samza.context.ContainerContext; -import org.apache.samza.context.ExternalContext; -import org.apache.samza.context.JobContext; -import org.apache.samza.metrics.MetricsRegistryMap; - -/** Runtime context for the Samza runner. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaExecutionContext implements ApplicationContainerContext { - - private final SamzaPipelineOptions options; - private SamzaMetricsContainer metricsContainer; - - public SamzaExecutionContext(SamzaPipelineOptions options) { - this.options = options; - } - - public SamzaPipelineOptions getPipelineOptions() { - return options; - } - - public SamzaMetricsContainer getMetricsContainer() { - return this.metricsContainer; - } - - void setMetricsContainer(SamzaMetricsContainer metricsContainer) { - this.metricsContainer = metricsContainer; - } - - @Override - public void start() {} - - @Override - public void stop() {} - - /** The factory to return this {@link SamzaExecutionContext}. */ - public class Factory implements ApplicationContainerContextFactory { - - @Override - public SamzaExecutionContext create( - ExternalContext externalContext, JobContext jobContext, ContainerContext containerContext) { - - final MetricsRegistryMap metricsRegistry = - (MetricsRegistryMap) containerContext.getContainerMetricsRegistry(); - SamzaExecutionContext.this.setMetricsContainer(new SamzaMetricsContainer(metricsRegistry)); - return SamzaExecutionContext.this; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaExecutionEnvironment.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaExecutionEnvironment.java deleted file mode 100644 index 02c31edfd1dd..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaExecutionEnvironment.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -/** Different Samza execution environments that defines how the Samza job will be deployed. */ -public enum SamzaExecutionEnvironment { - /** - * Runs the Samza job on the local machine with only one container. There is no coordination - * required since there is only one container deployed in a single JVM. This setting is generally - * used for development and testing. - */ - LOCAL, - - /** - * Submits and runs the Samza job on YARN, a remote clustered resource manager. Samza works with - * the YARN to provision and coordinate resources for your application and run it across a cluster - * of machines. It also handles failures of individual instances and automatically restarts them. - */ - YARN, - - /** - * Runs Samza job as a stand alone embedded library mode which can be imported into your Java - * application. You can increase your application's capacity by spinning up multiple instances. - * These instances will then dynamically coordinate with each other and distribute work among - * themselves. If an instance fails, the tasks running on it will be re-assigned to the remaining - * ones. By default, Samza uses Zookeeper for coordination across individual instances. - */ - STANDALONE -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaJobInvoker.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaJobInvoker.java deleted file mode 100644 index 6638b35f377d..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaJobInvoker.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import java.util.UUID; -import javax.annotation.Nullable; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.fnexecution.provisioning.JobInfo; -import org.apache.beam.runners.jobsubmission.JobInvocation; -import org.apache.beam.runners.jobsubmission.JobInvoker; -import org.apache.beam.runners.jobsubmission.PortablePipelineJarCreator; -import org.apache.beam.runners.jobsubmission.PortablePipelineRunner; -import org.apache.beam.sdk.util.construction.PipelineOptionsTranslation; -import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.Struct; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.ListeningExecutorService; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaJobInvoker extends JobInvoker { - - private static final Logger LOG = LoggerFactory.getLogger(SamzaJobInvoker.class); - - public static SamzaJobInvoker create( - SamzaJobServerDriver.SamzaServerConfiguration configuration) { - return new SamzaJobInvoker(); - } - - private SamzaJobInvoker() { - this("samza-runner-job-invoker-%d"); - } - - protected SamzaJobInvoker(String name) { - super(name); - } - - @Override - protected JobInvocation invokeWithExecutor( - RunnerApi.Pipeline pipeline, - Struct options, - @Nullable String retrievalToken, - ListeningExecutorService executorService) { - LOG.trace("Parsing pipeline options"); - final SamzaPortablePipelineOptions samzaOptions = - PipelineOptionsTranslation.fromProto(options).as(SamzaPortablePipelineOptions.class); - - final PortablePipelineRunner pipelineRunner; - if (Strings.isNullOrEmpty(samzaOptions.getOutputExecutablePath())) { - pipelineRunner = new SamzaPipelineRunner(samzaOptions); - } else { - /* - * To support --output_executable_path where bundles the input pipeline along with all - * artifacts, etc. required to run the pipeline into a jar that can be executed later. - */ - pipelineRunner = new PortablePipelineJarCreator(SamzaPipelineRunner.class); - } - - final String invocationId = - String.format("%s_%s", samzaOptions.getJobName(), UUID.randomUUID().toString()); - final JobInfo jobInfo = - JobInfo.create(invocationId, samzaOptions.getJobName(), retrievalToken, options); - return new JobInvocation(jobInfo, executorService, pipeline, pipelineRunner); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaJobServerDriver.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaJobServerDriver.java deleted file mode 100644 index f8139c0d26f2..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaJobServerDriver.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import org.apache.beam.runners.jobsubmission.JobServerDriver; -import org.apache.beam.sdk.fn.server.ServerFactory; -import org.apache.beam.sdk.io.FileSystems; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.kohsuke.args4j.CmdLineException; -import org.kohsuke.args4j.CmdLineParser; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Driver program that starts a job server for the Samza runner. */ -public class SamzaJobServerDriver extends JobServerDriver { - - private static final Logger LOG = LoggerFactory.getLogger(SamzaJobServerDriver.class); - - /** Samza runner-specific Configuration for the jobServer. */ - public static class SamzaServerConfiguration extends ServerConfiguration {} - - public static void main(String[] args) { - // TODO: Expose the fileSystem related options. - PipelineOptions options = PipelineOptionsFactory.create(); - // Register standard file systems. - FileSystems.setDefaultPipelineOptions(options); - fromParams(args).run(); - } - - private static SamzaJobServerDriver fromParams(String[] args) { - return fromConfig(parseArgs(args)); - } - - private static void printUsage(CmdLineParser parser) { - System.err.printf("Usage: java %s arguments...%n", SamzaJobServerDriver.class.getSimpleName()); - parser.printUsage(System.err); - System.err.println(); - } - - private static SamzaJobServerDriver fromConfig(SamzaServerConfiguration configuration) { - return create( - configuration, - createJobServerFactory(configuration), - createArtifactServerFactory(configuration)); - } - - public static SamzaServerConfiguration parseArgs(String[] args) { - SamzaServerConfiguration configuration = new SamzaServerConfiguration(); - CmdLineParser parser = new CmdLineParser(configuration); - try { - parser.parseArgument(args); - } catch (CmdLineException e) { - LOG.error("Unable to parse command line arguments.", e); - printUsage(parser); - throw new IllegalArgumentException("Unable to parse command line arguments.", e); - } - return configuration; - } - - private static SamzaJobServerDriver create( - SamzaServerConfiguration configuration, - ServerFactory jobServerFactory, - ServerFactory artifactServerFactory) { - return new SamzaJobServerDriver(configuration, jobServerFactory, artifactServerFactory); - } - - private SamzaJobServerDriver( - SamzaServerConfiguration configuration, - ServerFactory jobServerFactory, - ServerFactory artifactServerFactory) { - this( - configuration, - jobServerFactory, - artifactServerFactory, - () -> SamzaJobInvoker.create(configuration)); - } - - protected SamzaJobServerDriver( - ServerConfiguration configuration, - ServerFactory jobServerFactory, - ServerFactory artifactServerFactory, - JobInvokerFactory jobInvokerFactory) { - super(configuration, jobServerFactory, artifactServerFactory, jobInvokerFactory); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineExceptionContext.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineExceptionContext.java deleted file mode 100644 index 5bd02b3fc737..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineExceptionContext.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -/** Helper that is used to metadata associated with an exception thrown by Samza Runner. */ -public class SamzaPipelineExceptionContext { - private final String transformFullName; - private final Exception exception; - - public SamzaPipelineExceptionContext(String transformFullName, Exception exception) { - this.transformFullName = transformFullName; - this.exception = exception; - } - - public String getTransformFullName() { - return transformFullName; - } - - public Exception getException() { - return exception; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineLifeCycleListener.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineLifeCycleListener.java deleted file mode 100644 index 47f36a229ac1..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineLifeCycleListener.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import org.apache.samza.config.Config; -import org.apache.samza.context.ExternalContext; - -/** Life cycle listener for a Samza pipeline during runtime. */ -public interface SamzaPipelineLifeCycleListener { - /** Callback when the pipeline options is created. */ - void onInit(Config config, SamzaPipelineOptions options); - - /** Callback when the pipeline is started. */ - ExternalContext onStart(); - - /** - * Callback after the pipeline is submmitted. This will be invoked only for Samza jobs submitted - * to a cluster. - */ - void onSubmit(); - - /** Callback after the pipeline is finished. */ - void onFinish(); - - /** A registrar for {@link SamzaPipelineLifeCycleListener}. */ - interface Registrar { - SamzaPipelineLifeCycleListener getLifeCycleListener(); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java deleted file mode 100644 index a34303d92552..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.Hidden; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.samza.config.ConfigLoaderFactory; -import org.apache.samza.config.loaders.PropertiesConfigLoaderFactory; -import org.apache.samza.metrics.MetricsReporter; - -/** Options which can be used to configure a Samza PortablePipelineRunner. */ -public interface SamzaPipelineOptions extends PipelineOptions { - - @Description( - "The config file for Samza. It is *optional*. By default Samza supports properties config." - + "Without a config file, Samza uses a default config for local execution.") - String getConfigFilePath(); - - void setConfigFilePath(String filePath); - - @Description("The factory to read config file from config file path.") - @Default.Class(PropertiesConfigLoaderFactory.class) - Class getConfigLoaderFactory(); - - void setConfigLoaderFactory(Class configLoaderFactory); - - @Description( - "The config override to set programmatically. It will be applied on " - + "top of config file if it exits, otherwise used directly as the config.") - Map getConfigOverride(); - - void setConfigOverride(Map configs); - - @Description("The instance name of the job") - @Default.String("1") - String getJobInstance(); - - void setJobInstance(String instance); - - @Description( - "Samza application execution environment." - + "See {@link org.apache.beam.runners.samza.SamzaExecutionEnvironment} for detailed environment descriptions.") - @Default.Enum("LOCAL") - SamzaExecutionEnvironment getSamzaExecutionEnvironment(); - - void setSamzaExecutionEnvironment(SamzaExecutionEnvironment environment); - - @Description("The interval to check for watermarks in milliseconds.") - @Default.Long(1000) - long getWatermarkInterval(); - - void setWatermarkInterval(long interval); - - @Description("The maximum number of messages to buffer for a given system.") - @Default.Integer(5000) - int getSystemBufferSize(); - - void setSystemBufferSize(int consumerBufferSize); - - @Description("The maximum number of event-time timers to buffer in memory for a PTransform") - @Default.Integer(50000) - int getEventTimerBufferSize(); - - void setEventTimerBufferSize(int eventTimerBufferSize); - - @Description("The maximum number of ready timers to process at once per watermark.") - @Default.Integer(Integer.MAX_VALUE) - int getMaxReadyTimersToProcessOnce(); - - void setMaxReadyTimersToProcessOnce(int maxReadyTimersToProcessOnce); - - @Description("The maximum parallelism allowed for any data source.") - @Default.Integer(1) - int getMaxSourceParallelism(); - - void setMaxSourceParallelism(int maxSourceParallelism); - - @Description("The batch get size limit for the state store.") - @Default.Integer(10000) - int getStoreBatchGetSize(); - - void setStoreBatchGetSize(int storeBatchGetSize); - - @Description("Enable/disable Beam metrics in Samza Runner") - @Default.Boolean(true) - Boolean getEnableMetrics(); - - void setEnableMetrics(Boolean enableMetrics); - - @Description("Enable/disable Beam Transform throughput, latency metrics in Samza Runner") - @Default.Boolean(false) - Boolean getEnableTransformMetrics(); - - void setEnableTransformMetrics(Boolean enableMetrics); - - @Description("The config for state to be durable") - @Default.Boolean(false) - Boolean getStateDurable(); - - void setStateDurable(Boolean stateDurable); - - @JsonIgnore - @Description("The metrics reporters that will be used to emit metrics.") - List getMetricsReporters(); - - void setMetricsReporters(List reporters); - - @Description("The maximum number of elements in a bundle.") - @Default.Long(1) - long getMaxBundleSize(); - - void setMaxBundleSize(long maxBundleSize); - - @Description("The maximum time to wait before finalising a bundle (in milliseconds).") - @Default.Long(1000) - long getMaxBundleTimeMs(); - - void setMaxBundleTimeMs(long maxBundleTimeMs); - - @Description( - "Wait if necessary for completing a remote bundle processing for at most the given time (in milliseconds). if the value of timeout is negative, wait forever until the bundle processing is completed. Used only in portable mode for now.") - @Default.Long(-1) - long getBundleProcessingTimeout(); - - void setBundleProcessingTimeout(long timeoutMs); - - @Description( - "The number of threads to run DoFn.processElements in parallel within a bundle. Used only in non-portable mode.") - @Default.Integer(1) - int getNumThreadsForProcessElement(); - - void setNumThreadsForProcessElement(int numThreads); - - @JsonIgnore - @Description( - "The ExecutorService instance to run DoFN.processElements in parallel within a bundle. Used only in non-portable mode.") - @Default.InstanceFactory(ProcessElementExecutorServiceFactory.class) - @Hidden - ExecutorService getExecutorServiceForProcessElement(); - - void setExecutorServiceForProcessElement(ExecutorService executorService); - - class ProcessElementExecutorServiceFactory implements DefaultValueFactory { - - @Override - public ExecutorService create(PipelineOptions options) { - return Executors.newFixedThreadPool( - options.as(SamzaPipelineOptions.class).getNumThreadsForProcessElement(), - new ThreadFactoryBuilder().setNameFormat("Process Element Thread-%d").build()); - } - } - - @Description("Enable/disable late data dropping in GroupByKey/Combine transforms") - @Default.Boolean(false) - boolean getDropLateData(); - - void setDropLateData(boolean dropLateData); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidator.java deleted file mode 100644 index 1db0974b5d30..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidator.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.samza.config.JobConfig.JOB_CONTAINER_THREAD_POOL_SIZE; - -import java.util.HashMap; -import java.util.Map; -import org.apache.samza.config.JobConfig; -import org.apache.samza.config.MapConfig; - -/** Validates that the {@link SamzaPipelineOptions} conforms to all the criteria. */ -public class SamzaPipelineOptionsValidator { - public static void validate(SamzaPipelineOptions opts) { - checkArgument(opts.getMaxSourceParallelism() >= 1); - validateBundlingRelatedOptions(opts); - } - - /* - * Perform some bundling related validation for pipeline option. - * Visible for testing. - */ - static void validateBundlingRelatedOptions(SamzaPipelineOptions pipelineOptions) { - if (pipelineOptions.getMaxBundleSize() > 1) { - final Map configs = - pipelineOptions.getConfigOverride() == null - ? new HashMap<>() - : pipelineOptions.getConfigOverride(); - final JobConfig jobConfig = new JobConfig(new MapConfig(configs)); - - // Validate that the threadPoolSize is not override in the code - checkArgument( - jobConfig.getThreadPoolSize() <= 1, - JOB_CONTAINER_THREAD_POOL_SIZE - + " config should be replaced with SamzaPipelineOptions.numThreadsForProcessElement"); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineResult.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineResult.java deleted file mode 100644 index e84cf086edc9..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineResult.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import static org.apache.beam.runners.core.metrics.MetricsContainerStepMap.asAttemptedOnlyMetricResults; -import static org.apache.samza.config.TaskConfig.TASK_SHUTDOWN_MS; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.metrics.MetricResults; -import org.apache.beam.sdk.util.UserCodeException; -import org.apache.samza.config.Config; -import org.apache.samza.job.ApplicationStatus; -import org.apache.samza.runtime.ApplicationRunner; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Duration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** The result from executing a Samza Pipeline. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaPipelineResult implements PipelineResult { - private static final Logger LOG = LoggerFactory.getLogger(SamzaPipelineResult.class); - // allow some buffer on top of samza's own shutdown timeout - private static final long SHUTDOWN_TIMEOUT_BUFFER = 5000L; - private static final long DEFAULT_TASK_SHUTDOWN_MS = 30000L; - - private final SamzaExecutionContext executionContext; - private final ApplicationRunner runner; - private final SamzaPipelineLifeCycleListener listener; - private final long shutdownTiemoutMs; - - public SamzaPipelineResult( - ApplicationRunner runner, - SamzaExecutionContext executionContext, - SamzaPipelineLifeCycleListener listener, - Config config) { - this.executionContext = executionContext; - this.runner = runner; - this.listener = listener; - this.shutdownTiemoutMs = - config.getLong(TASK_SHUTDOWN_MS, DEFAULT_TASK_SHUTDOWN_MS) + SHUTDOWN_TIMEOUT_BUFFER; - } - - @Override - public State getState() { - return getStateInfo().state; - } - - @Override - public State cancel() { - LOG.info("Start to cancel samza pipeline..."); - runner.kill(); - LOG.info("Start awaiting finish for {} ms.", shutdownTiemoutMs); - return waitUntilFinish(Duration.millis(shutdownTiemoutMs)); - } - - @Override - public State waitUntilFinish(@Nullable Duration duration) { - try { - if (duration == null) { - runner.waitForFinish(); - } else { - runner.waitForFinish(java.time.Duration.ofMillis(duration.getMillis())); - } - } catch (Exception e) { - throw new Pipeline.PipelineExecutionException(e); - } - - final StateInfo stateInfo = getStateInfo(); - - if (listener != null && (stateInfo.state == State.DONE || stateInfo.state == State.FAILED)) { - listener.onFinish(); - } - - if (stateInfo.state == State.FAILED) { - throw stateInfo.error; - } - - LOG.info("Pipeline finished. Final state: {}", stateInfo.state); - return stateInfo.state; - } - - @Override - public State waitUntilFinish() { - return waitUntilFinish(null); - } - - @Override - public MetricResults metrics() { - return asAttemptedOnlyMetricResults(executionContext.getMetricsContainer().getContainers()); - } - - @SuppressWarnings("Slf4jDoNotLogMessageOfExceptionExplicitly") - private StateInfo getStateInfo() { - final ApplicationStatus status = runner.status(); - switch (status.getStatusCode()) { - case New: - return new StateInfo(State.STOPPED); - case Running: - return new StateInfo(State.RUNNING); - case SuccessfulFinish: - return new StateInfo(State.DONE); - case UnsuccessfulFinish: - LOG.error("Pipeline execution failed", status.getThrowable()); - return new StateInfo( - State.FAILED, - new Pipeline.PipelineExecutionException(getUserCodeException(status.getThrowable()))); - default: - return new StateInfo(State.UNKNOWN); - } - } - - private static class StateInfo { - private final State state; - private final Pipeline.PipelineExecutionException error; - - private StateInfo(State state) { - this(state, null); - } - - private StateInfo(State state, Pipeline.PipelineExecutionException error) { - this.state = state; - this.error = error; - } - } - - /** - * Some of the Beam unit tests relying on the exception message to do assertion. This function - * will find the original UserCodeException so the message will be exposed directly. - */ - private static Throwable getUserCodeException(Throwable throwable) { - Throwable t = throwable; - while (t != null) { - if (t instanceof UserCodeException) { - return t; - } - - t = t.getCause(); - } - - return throwable; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineRunner.java deleted file mode 100644 index 897b78cf9e47..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineRunner.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.fnexecution.provisioning.JobInfo; -import org.apache.beam.runners.jobsubmission.PortablePipelineResult; -import org.apache.beam.runners.jobsubmission.PortablePipelineRunner; -import org.apache.beam.runners.samza.translation.SamzaPortablePipelineTranslator; -import org.apache.beam.sdk.util.construction.PTransformTranslation; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.util.construction.graph.GreedyPipelineFuser; -import org.apache.beam.sdk.util.construction.graph.ProtoOverrides; -import org.apache.beam.sdk.util.construction.graph.SplittableParDoExpander; -import org.apache.beam.sdk.util.construction.graph.TrivialNativeTransformExpander; -import org.apache.beam.sdk.util.construction.renderer.PipelineDotRenderer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Runs a Samza job via {@link SamzaRunner}. */ -public class SamzaPipelineRunner implements PortablePipelineRunner { - - private static final Logger LOG = LoggerFactory.getLogger(SamzaPipelineRunner.class); - - private final SamzaPipelineOptions options; - - @Override - public PortablePipelineResult run(final RunnerApi.Pipeline pipeline, JobInfo jobInfo) { - // Expand any splittable DoFns within the graph to enable sizing and splitting of bundles. - RunnerApi.Pipeline pipelineWithSdfExpanded = - ProtoOverrides.updateTransform( - PTransformTranslation.PAR_DO_TRANSFORM_URN, - pipeline, - SplittableParDoExpander.createSizedReplacement()); - - // Don't let the fuser fuse any subcomponents of native transforms. - RunnerApi.Pipeline trimmedPipeline = - TrivialNativeTransformExpander.forKnownUrns( - pipelineWithSdfExpanded, SamzaPortablePipelineTranslator.knownUrns()); - - // Fused pipeline proto. - // TODO: Consider supporting partially-fused graphs. - RunnerApi.Pipeline fusedPipeline = - trimmedPipeline.getComponents().getTransformsMap().values().stream() - .anyMatch(proto -> ExecutableStage.URN.equals(proto.getSpec().getUrn())) - ? trimmedPipeline - : GreedyPipelineFuser.fuse(trimmedPipeline).toPipeline(); - - LOG.info("Portable pipeline to run:"); - LOG.info("{}", PipelineDotRenderer.toDotString(fusedPipeline)); - // the pipeline option coming from sdk will set the sdk specific runner which will break - // serialization - // so we need to reset the runner here to a valid Java runner - options.setRunner(SamzaRunner.class); - try { - final SamzaRunner runner = SamzaRunner.fromOptions(options); - final PortablePipelineResult result = runner.runPortablePipeline(fusedPipeline, jobInfo); - - final SamzaExecutionEnvironment exeEnv = options.getSamzaExecutionEnvironment(); - if (exeEnv == SamzaExecutionEnvironment.LOCAL - || exeEnv == SamzaExecutionEnvironment.STANDALONE) { - // Make run() sync for local mode - result.waitUntilFinish(); - } - return result; - } catch (Exception e) { - throw new RuntimeException("Failed to invoke samza job", e); - } - } - - public SamzaPipelineRunner(SamzaPipelineOptions options) { - this.options = options; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPortablePipelineOptions.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPortablePipelineOptions.java deleted file mode 100644 index aa8e7ceb71d7..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPortablePipelineOptions.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PortablePipelineOptions; - -/** Samza pipeline option that contains portability specific logic. For internal usage only. */ -public interface SamzaPortablePipelineOptions - extends SamzaPipelineOptions, PortablePipelineOptions { - @Description( - "The file path for the local file system token. If not set (by default), then the runner would" - + " not use secure server factory.") - String getFsTokenPath(); - - void setFsTokenPath(String path); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPortablePipelineResult.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPortablePipelineResult.java deleted file mode 100644 index a3452097f511..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPortablePipelineResult.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import org.apache.beam.model.jobmanagement.v1.JobApi; -import org.apache.beam.runners.jobsubmission.PortablePipelineResult; -import org.apache.samza.application.StreamApplication; -import org.apache.samza.config.Config; -import org.apache.samza.runtime.ApplicationRunner; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** The result from executing a Samza Portable Pipeline. */ -public class SamzaPortablePipelineResult extends SamzaPipelineResult - implements PortablePipelineResult { - - private static final Logger LOG = LoggerFactory.getLogger(SamzaPortablePipelineResult.class); - - SamzaPortablePipelineResult( - StreamApplication app, - ApplicationRunner runner, - SamzaExecutionContext executionContext, - SamzaPipelineLifeCycleListener listener, - Config config) { - super(runner, executionContext, listener, config); - } - - @Override - public JobApi.MetricResults portableMetrics() throws UnsupportedOperationException { - LOG.warn("Collecting monitoring infos is not implemented yet in Samza portable runner."); - return JobApi.MetricResults.newBuilder().build(); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunner.java deleted file mode 100644 index eb16faa41ac0..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunner.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.ServiceLoader; -import java.util.Set; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.fnexecution.provisioning.JobInfo; -import org.apache.beam.runners.jobsubmission.PortablePipelineResult; -import org.apache.beam.runners.samza.translation.ConfigBuilder; -import org.apache.beam.runners.samza.translation.ConfigContext; -import org.apache.beam.runners.samza.translation.PViewToIdMapper; -import org.apache.beam.runners.samza.translation.PortableTranslationContext; -import org.apache.beam.runners.samza.translation.SamzaPipelineTranslator; -import org.apache.beam.runners.samza.translation.SamzaPortablePipelineTranslator; -import org.apache.beam.runners.samza.translation.SamzaTransformOverrides; -import org.apache.beam.runners.samza.translation.StateIdParser; -import org.apache.beam.runners.samza.translation.TranslationContext; -import org.apache.beam.runners.samza.util.PipelineJsonRenderer; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineRunner; -import org.apache.beam.sdk.metrics.MetricsEnvironment; -import org.apache.beam.sdk.options.ExperimentalOptions; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsValidator; -import org.apache.beam.sdk.util.construction.SplittableParDo; -import org.apache.beam.sdk.util.construction.renderer.PipelineDotRenderer; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterators; -import org.apache.samza.application.StreamApplication; -import org.apache.samza.config.Config; -import org.apache.samza.context.ExternalContext; -import org.apache.samza.metrics.MetricsReporter; -import org.apache.samza.metrics.MetricsReporterFactory; -import org.apache.samza.runtime.ApplicationRunner; -import org.apache.samza.runtime.ApplicationRunners; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A {@link PipelineRunner} that executes the operations in the {@link Pipeline} into an equivalent - * Samza plan. - * - * @deprecated The support for Samza is scheduled for removal in Beam 3.0. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -@Deprecated -public class SamzaRunner extends PipelineRunner { - private static final Logger LOG = LoggerFactory.getLogger(SamzaRunner.class); - private static final String BEAM_DOT_GRAPH = "beamDotGraph"; - public static final String BEAM_JSON_GRAPH = "beamJsonGraph"; - - public static SamzaRunner fromOptions(PipelineOptions opts) { - final SamzaPipelineOptions samzaOptions = - PipelineOptionsValidator.validate(SamzaPipelineOptions.class, opts); - return new SamzaRunner(samzaOptions); - } - - private final SamzaPipelineOptions options; - private final SamzaPipelineLifeCycleListener listener; - - private SamzaRunner(SamzaPipelineOptions options) { - this.options = options; - final Iterator listenerReg = - ServiceLoader.load(SamzaPipelineLifeCycleListener.Registrar.class).iterator(); - this.listener = - listenerReg.hasNext() ? Iterators.getOnlyElement(listenerReg).getLifeCycleListener() : null; - } - - public PortablePipelineResult runPortablePipeline(RunnerApi.Pipeline pipeline, JobInfo jobInfo) { - final String dotGraph = PipelineDotRenderer.toDotString(pipeline); - LOG.info("Portable pipeline to run DOT graph:\n{}", dotGraph); - - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPortablePipelineTranslator.createConfig(pipeline, configBuilder, options); - configBuilder.put(BEAM_DOT_GRAPH, dotGraph); - - final Config config = configBuilder.build(); - options.setConfigOverride(config); - - if (listener != null) { - listener.onInit(config, options); - } - - final SamzaExecutionContext executionContext = new SamzaExecutionContext(options); - final Map reporterFactories = getMetricsReporters(); - final StreamApplication app = - appDescriptor -> { - appDescriptor - .withApplicationContainerContextFactory(executionContext.new Factory()) - .withMetricsReporterFactories(reporterFactories); - SamzaPortablePipelineTranslator.translate( - pipeline, new PortableTranslationContext(appDescriptor, options, jobInfo)); - }; - - ApplicationRunner runner = runSamzaApp(app, config); - return new SamzaPortablePipelineResult(app, runner, executionContext, listener, config); - } - - @Override - public SamzaPipelineResult run(Pipeline pipeline) { - // TODO(https://github.com/apache/beam/issues/20530): Use SDF read as default for non-portable - // execution when we address performance issue. - if (!ExperimentalOptions.hasExperiment(pipeline.getOptions(), "beam_fn_api")) { - SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline); - } - - MetricsEnvironment.setMetricsSupported(true); - - if (LOG.isDebugEnabled()) { - LOG.debug( - "Pre-processed Beam pipeline in dot format:\n{}", - PipelineDotRenderer.toDotString(pipeline)); - } - - pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - - final String dotGraph = PipelineDotRenderer.toDotString(pipeline); - LOG.info("Beam pipeline DOT graph:\n{}", dotGraph); - - final String jsonGraph = PipelineJsonRenderer.toJsonString(pipeline, configCtx); - LOG.info("Beam pipeline JSON graph:\n{}", jsonGraph); - - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - configBuilder.put(BEAM_DOT_GRAPH, dotGraph); - configBuilder.put(BEAM_JSON_GRAPH, jsonGraph); - - final Config config = configBuilder.build(); - options.setConfigOverride(config); - - if (listener != null) { - listener.onInit(config, options); - } - - final SamzaExecutionContext executionContext = new SamzaExecutionContext(options); - final Map reporterFactories = getMetricsReporters(); - - final StreamApplication app = - appDescriptor -> { - appDescriptor.withApplicationContainerContextFactory(executionContext.new Factory()); - appDescriptor.withMetricsReporterFactories(reporterFactories); - - SamzaPipelineTranslator.translate( - pipeline, new TranslationContext(appDescriptor, idMap, nonUniqueStateIds, options)); - }; - - // perform a final round of validation for the pipeline options now that all configs are - // generated - SamzaPipelineOptionsValidator.validate(options); - ApplicationRunner runner = runSamzaApp(app, config); - return new SamzaPipelineResult(runner, executionContext, listener, config); - } - - private Map getMetricsReporters() { - if (options.getMetricsReporters() != null) { - final Map reporters = new HashMap<>(); - for (int i = 0; i < options.getMetricsReporters().size(); i++) { - final String name = "beam-metrics-reporter-" + i; - final MetricsReporter reporter = options.getMetricsReporters().get(i); - - reporters.put(name, (MetricsReporterFactory) (nm, processorId, config) -> reporter); - LOG.info("{}: {}", name, reporter.getClass().getName()); - } - return reporters; - } else { - return Collections.emptyMap(); - } - } - - private ApplicationRunner runSamzaApp(StreamApplication app, Config config) { - - final ApplicationRunner runner = ApplicationRunners.getApplicationRunner(app, config); - - ExternalContext externalContext = null; - if (listener != null) { - externalContext = listener.onStart(); - } - - runner.run(externalContext); - - if (listener != null - && options.getSamzaExecutionEnvironment() == SamzaExecutionEnvironment.YARN) { - listener.onSubmit(); - } - - return runner; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunnerOverrideConfigs.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunnerOverrideConfigs.java deleted file mode 100644 index 4c5fa432ca82..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunnerOverrideConfigs.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import java.time.Duration; - -// TODO: can we get rid of this class? Right now the SamzaPipelineOptionsValidator would force -// the pipeline option to be the type SamzaPipelineOption. Ideally, we should be able to keep -// passing SamzaPortablePipelineOption. Alternative, we could merge portable and non-portable -// pipeline option. -/** A helper class for holding all the beam runner specific samza configs. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaRunnerOverrideConfigs { - public static final String BEAM_RUNNER_CONFIG_PREFIX = "beam.override."; - // whether the job is in portable mode - public static final String IS_PORTABLE_MODE = BEAM_RUNNER_CONFIG_PREFIX + "portable"; - // for portable mode only: port number for fn control api - public static final String FN_CONTROL_PORT = BEAM_RUNNER_CONFIG_PREFIX + "control.port"; - // timeout for waiting for control client to connect - public static final String CONTROL_CLIENT_MAX_WAIT_TIME_MS = "controL.wait.time.ms"; - public static final long DEFAULT_CONTROL_CLIENT_MAX_WAIT_TIME_MS = - Duration.ofMinutes(2).toMillis(); - public static final String FS_TOKEN_PATH = BEAM_RUNNER_CONFIG_PREFIX + "fs.token.path"; - public static final String DEFAULT_FS_TOKEN_PATH = null; - - private static boolean containsKey(SamzaPipelineOptions options, String configKey) { - if (options == null || options.getConfigOverride() == null) { - return false; - } - return options.getConfigOverride().containsKey(configKey); - } - - /** Whether the job is in portable mode based on the config override in the pipeline options. */ - public static boolean isPortableMode(SamzaPipelineOptions options) { - if (containsKey(options, IS_PORTABLE_MODE)) { - return options.getConfigOverride().get(IS_PORTABLE_MODE).equals(String.valueOf(true)); - } else { - return false; - } - } - - /** Get fn control port number based on the config override in the pipeline options. */ - public static int getFnControlPort(SamzaPipelineOptions options) { - if (containsKey(options, FN_CONTROL_PORT)) { - return Integer.parseInt(options.getConfigOverride().get(FN_CONTROL_PORT)); - } else { - return -1; - } - } - - /** Get max wait time for control client connection. */ - public static long getControlClientWaitTimeoutMs(SamzaPipelineOptions options) { - if (containsKey(options, CONTROL_CLIENT_MAX_WAIT_TIME_MS)) { - return Long.parseLong(options.getConfigOverride().get(CONTROL_CLIENT_MAX_WAIT_TIME_MS)); - } else { - return DEFAULT_CONTROL_CLIENT_MAX_WAIT_TIME_MS; - } - } - - /** Get fs token path for portable mode. */ - public static String getFsTokenPath(SamzaPipelineOptions options) { - if (containsKey(options, FS_TOKEN_PATH)) { - return options.getConfigOverride().get(FS_TOKEN_PATH); - } else { - return DEFAULT_FS_TOKEN_PATH; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunnerRegistrar.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunnerRegistrar.java deleted file mode 100644 index 102838975d13..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaRunnerRegistrar.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import com.google.auto.service.AutoService; -import org.apache.beam.sdk.PipelineRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsRegistrar; -import org.apache.beam.sdk.runners.PipelineRunnerRegistrar; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; - -/** - * AutoService registrar - will register SamzaRunner and SamzaOptions as possible pipeline runner - * services. - * - *

It ends up in META-INF/services and gets picked up by Beam. - */ -public class SamzaRunnerRegistrar { - private SamzaRunnerRegistrar() {} - - /** Pipeline runner registrar. */ - @AutoService(PipelineRunnerRegistrar.class) - public static class Runner implements PipelineRunnerRegistrar { - @Override - public Iterable>> getPipelineRunners() { - return ImmutableList.of(SamzaRunner.class, TestSamzaRunner.class); - } - } - - /** Pipeline options registrar. */ - @AutoService(PipelineOptionsRegistrar.class) - public static class Options implements PipelineOptionsRegistrar { - @Override - public Iterable> getPipelineOptions() { - return ImmutableList.of(SamzaPipelineOptions.class); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/TestSamzaRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/TestSamzaRunner.java deleted file mode 100644 index 810fc0c983f7..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/TestSamzaRunner.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import static org.apache.samza.config.JobConfig.JOB_JMX_ENABLED; -import static org.apache.samza.config.JobConfig.JOB_LOGGED_STORE_BASE_DIR; -import static org.apache.samza.config.JobConfig.JOB_NON_LOGGED_STORE_BASE_DIR; - -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import java.io.File; -import java.nio.file.Files; -import java.util.HashMap; -import java.util.Map; -import org.apache.beam.runners.samza.translation.ConfigBuilder; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.PipelineRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsValidator; -import org.apache.commons.io.FileUtils; - -/** Test {@link SamzaRunner}. */ -public class TestSamzaRunner extends PipelineRunner { - - private final SamzaRunner delegate; - private final File storeDir; - - public static TestSamzaRunner fromOptions(PipelineOptions options) { - return new TestSamzaRunner(options); - } - - public static SamzaPipelineOptions createSamzaPipelineOptions( - PipelineOptions options, File storeDir) { - try { - final SamzaPipelineOptions samzaOptions = - PipelineOptionsValidator.validate(SamzaPipelineOptions.class, options); - final Map config = new HashMap<>(ConfigBuilder.localRunConfig()); - config.put(JOB_LOGGED_STORE_BASE_DIR, storeDir.getAbsolutePath()); - config.put(JOB_NON_LOGGED_STORE_BASE_DIR, storeDir.getAbsolutePath()); - config.put(JOB_JMX_ENABLED, "false"); - - if (samzaOptions.getConfigOverride() != null) { - config.putAll(samzaOptions.getConfigOverride()); - } - samzaOptions.setConfigOverride(config); - return samzaOptions; - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - private static File createStoreDir() { - try { - return Files.createTempDirectory("beam-samza-test").toFile(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - public TestSamzaRunner(PipelineOptions options) { - this.storeDir = createStoreDir(); - this.delegate = SamzaRunner.fromOptions(createSamzaPipelineOptions(options, storeDir)); - } - - @Override - @SuppressFBWarnings(value = "DE_MIGHT_IGNORE") - public PipelineResult run(Pipeline pipeline) { - try { - final PipelineResult result = delegate.run(pipeline); - result.waitUntilFinish(); - - return result; - } catch (Throwable t) { - // Search for AssertionError. If present use it as the cause of the pipeline failure. - Throwable current = t; - - while (current != null) { - if (current instanceof AssertionError) { - throw (AssertionError) current; - } - current = current.getCause(); - } - - throw t; - } finally { - try { - // delete the store folder - FileUtils.deleteDirectory(storeDir); - } catch (Exception ignore) { - // Ignore - } - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/BoundedSourceSystem.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/BoundedSourceSystem.java deleted file mode 100644 index 92c9eea4293a..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/BoundedSourceSystem.java +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.adapter; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.Semaphore; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.beam.runners.core.construction.SerializablePipelineOptions; -import org.apache.beam.runners.core.serialization.Base64Serializer; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.metrics.FnWithMetricsWrapper; -import org.apache.beam.runners.samza.metrics.SamzaMetricsContainer; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.sdk.io.BoundedSource; -import org.apache.beam.sdk.io.BoundedSource.BoundedReader; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.samza.Partition; -import org.apache.samza.SamzaException; -import org.apache.samza.config.Config; -import org.apache.samza.metrics.MetricsRegistry; -import org.apache.samza.metrics.MetricsRegistryMap; -import org.apache.samza.system.IncomingMessageEnvelope; -import org.apache.samza.system.SystemAdmin; -import org.apache.samza.system.SystemConsumer; -import org.apache.samza.system.SystemFactory; -import org.apache.samza.system.SystemProducer; -import org.apache.samza.system.SystemStreamMetadata; -import org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata; -import org.apache.samza.system.SystemStreamPartition; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A Samza system that supports reading from a Beam {@link BoundedSource}. The source is treated as - * though it has a single partition and does not support checkpointing via a changelog stream. If - * the job is restarted the bounded source will be consumed from the beginning. - */ -// TODO: instrumentation for the consumer -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class BoundedSourceSystem { - private static final Logger LOG = LoggerFactory.getLogger(BoundedSourceSystem.class); - - private static List> split( - BoundedSource source, SamzaPipelineOptions pipelineOptions) throws Exception { - final int numSplits = pipelineOptions.getMaxSourceParallelism(); - if (numSplits > 1) { - final long estimatedSize = source.getEstimatedSizeBytes(pipelineOptions); - // calculate the size of each split, rounded up to the ceiling. - final long bundleSize = (estimatedSize + numSplits - 1) / numSplits; - @SuppressWarnings("unchecked") - final List> splits = - (List>) source.split(bundleSize, pipelineOptions); - // Need the empty check here because Samza doesn't handle empty partition well - if (!splits.isEmpty()) { - return splits; - } - } - return Collections.singletonList(source); - } - - /** A {@link SystemAdmin} for {@link BoundedSourceSystem}. */ - public static class Admin implements SystemAdmin { - private final BoundedSource source; - private final SamzaPipelineOptions pipelineOptions; - - public Admin(BoundedSource source, SamzaPipelineOptions pipelineOptions) { - this.source = source; - this.pipelineOptions = pipelineOptions; - } - - @Override - public Map getOffsetsAfter( - Map offsets) { - // BEAM checkpoints the next offset so here we just need to return the map itself - return offsets; - } - - @Override - public Map getSystemStreamMetadata(Set streamNames) { - return streamNames.stream() - .collect( - Collectors.toMap( - Function.identity(), - streamName -> { - try { - List> splits = split(source, pipelineOptions); - final Map partitionMetaData = - new HashMap<>(); - // we assume that the generated splits are stable, - // this is necessary so that the mapping of partition to source is correct - // in each container. - for (int i = 0; i < splits.size(); i++) { - partitionMetaData.put( - new Partition(i), new SystemStreamPartitionMetadata(null, null, null)); - } - return new SystemStreamMetadata(streamName, partitionMetaData); - } catch (Exception e) { - throw new SamzaException("Fail to read stream metadata", e); - } - })); - } - - @Override - public Integer offsetComparator(String offset1, String offset2) { - if (offset1 == null) { - return offset2 == null ? 0 : -1; - } - - if (offset2 == null) { - return 1; - } - - return Long.valueOf(offset1).compareTo(Long.valueOf(offset2)); - } - } - - /** - * A {@link SystemConsumer} for a {@link BoundedSource}. See {@link BoundedSourceSystem} for more - * details. - */ - public static class Consumer implements SystemConsumer { - private static final Logger LOG = LoggerFactory.getLogger(Consumer.class); - private static final AtomicInteger NEXT_ID = new AtomicInteger(); - - private final List> splits; - private final SamzaPipelineOptions pipelineOptions; - private final Map, SystemStreamPartition> readerToSsp = new HashMap<>(); - private final SamzaMetricsContainer metricsContainer; - private final String stepName; - - private ReaderTask readerTask; - - Consumer( - BoundedSource source, - SamzaPipelineOptions pipelineOptions, - SamzaMetricsContainer metricsContainer, - String stepName) { - try { - splits = split(source, pipelineOptions); - } catch (Exception e) { - throw new SamzaException("Fail to split source", e); - } - this.pipelineOptions = pipelineOptions; - this.metricsContainer = metricsContainer; - this.stepName = stepName; - } - - @Override - public void start() { - if (this.readerToSsp.isEmpty()) { - throw new IllegalArgumentException( - "Attempted to call start without assigned system stream partitions"); - } - - final int capacity = pipelineOptions.getSystemBufferSize(); - final FnWithMetricsWrapper metricsWrapper = - pipelineOptions.getEnableMetrics() - ? new FnWithMetricsWrapper(metricsContainer, stepName) - : null; - readerTask = new ReaderTask<>(readerToSsp, capacity, metricsWrapper); - final Thread thread = - new Thread(readerTask, "bounded-source-system-consumer-" + NEXT_ID.getAndIncrement()); - thread.start(); - } - - @Override - public void stop() { - // NOTE: this is not a blocking shutdown - if (readerTask != null) { - readerTask.stop(); - } - } - - @Override - public void register(SystemStreamPartition ssp, String offset) { - final int partitionId = ssp.getPartition().getPartitionId(); - try { - final BoundedReader reader = splits.get(partitionId).createReader(pipelineOptions); - readerToSsp.put(reader, ssp); - } catch (Exception e) { - throw new SamzaException("Error while creating source reader for ssp: " + ssp, e); - } - } - - @Override - public Map> poll( - Set systemStreamPartitions, long timeout) - throws InterruptedException { - assert !readerToSsp.isEmpty(); // start should be called before poll - - final Map> envelopes = new HashMap<>(); - for (SystemStreamPartition ssp : systemStreamPartitions) { - envelopes.put(ssp, readerTask.getNextMessages(ssp, timeout)); - } - return envelopes; - } - - private static class ReaderTask implements Runnable { - private final Map, SystemStreamPartition> readerToSsp; - private final Map> queues; - private final Semaphore available; - private final FnWithMetricsWrapper metricsWrapper; - - // NOTE: we do not support recovery with a bounded source (we restart from the beginning), - // so we do not need to have a way to tie an offset to a position in the bounded source. - private long offset; - private volatile Thread readerThread; - private volatile boolean stopInvoked = false; - private volatile Exception lastException; - - private ReaderTask( - Map, SystemStreamPartition> readerToSsp, - int capacity, - FnWithMetricsWrapper metricsWrapper) { - this.readerToSsp = readerToSsp; - this.available = new Semaphore(capacity); - this.metricsWrapper = metricsWrapper; - - final Map> qs = - new HashMap<>(); - readerToSsp.values().forEach(ssp -> qs.put(ssp, new LinkedBlockingQueue<>())); - this.queues = ImmutableMap.copyOf(qs); - } - - @Override - public void run() { - readerThread = Thread.currentThread(); - - final Set> availableReaders = new HashSet<>(readerToSsp.keySet()); - try { - for (BoundedReader reader : readerToSsp.keySet()) { - boolean hasData = invoke(reader::start); - if (hasData) { - enqueueMessage(reader); - } else { - enqueueMaxWatermarkAndEndOfStream(reader); - reader.close(); - availableReaders.remove(reader); - } - } - - while (!stopInvoked && !availableReaders.isEmpty()) { - final Iterator> iter = availableReaders.iterator(); - while (iter.hasNext()) { - final BoundedReader reader = iter.next(); - final boolean hasData = invoke(reader::advance); - if (hasData) { - enqueueMessage(reader); - } else { - enqueueMaxWatermarkAndEndOfStream(reader); - reader.close(); - iter.remove(); - } - } - } - } catch (InterruptedException e) { - // We use an interrupt to wake the reader from a blocking read under normal termination, - // so ignore it here. - } catch (Exception e) { - setError(e); - } finally { - availableReaders.forEach( - reader -> { - try { - reader.close(); - } catch (IOException e) { - LOG.error( - "Reader task failed to close reader for ssp {}", readerToSsp.get(reader), e); - } - }); - } - } - - private X invoke(FnWithMetricsWrapper.SupplierWithException fn) throws Exception { - if (metricsWrapper != null) { - return metricsWrapper.wrap(fn, true); - } else { - return fn.get(); - } - } - - private void enqueueMessage(BoundedReader reader) throws InterruptedException { - final T value = reader.getCurrent(); - final WindowedValue windowedValue = - WindowedValues.timestampedValueInGlobalWindow(value, reader.getCurrentTimestamp()); - final SystemStreamPartition ssp = readerToSsp.get(reader); - final IncomingMessageEnvelope envelope = - new IncomingMessageEnvelope( - ssp, Long.toString(offset++), null, OpMessage.ofElement(windowedValue)); - - available.acquire(); - queues.get(ssp).put(envelope); - } - - private void enqueueMaxWatermarkAndEndOfStream(BoundedReader reader) { - final SystemStreamPartition ssp = readerToSsp.get(reader); - // Send the max watermark to force completion of any open windows. - final IncomingMessageEnvelope watermarkEnvelope = - IncomingMessageEnvelope.buildWatermarkEnvelope( - ssp, BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()); - enqueueUninterruptibly(watermarkEnvelope); - - final IncomingMessageEnvelope endOfStreamEnvelope = - IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp); - enqueueUninterruptibly(endOfStreamEnvelope); - } - - private void stop() { - stopInvoked = true; - - final Thread readerThread = this.readerThread; - if (readerThread != null) { - readerThread.interrupt(); - } - } - - private List getNextMessages( - SystemStreamPartition ssp, long timeoutMillis) throws InterruptedException { - if (lastException != null) { - throw new RuntimeException(lastException); - } - - final List envelopes = new ArrayList<>(); - final BlockingQueue queue = queues.get(ssp); - final IncomingMessageEnvelope envelope = queue.poll(timeoutMillis, TimeUnit.MILLISECONDS); - - if (envelope != null) { - envelopes.add(envelope); - queue.drainTo(envelopes); - } - - available.release(envelopes.size()); - - if (lastException != null) { - throw new RuntimeException(lastException); - } - - return envelopes; - } - - private void setError(Exception exception) { - this.lastException = exception; - // A dummy message used to force the consumer to wake up immediately and check the - // lastException field, which will be populated. - readerToSsp - .values() - .forEach( - ssp -> { - final IncomingMessageEnvelope checkLastExceptionEvelope = - new IncomingMessageEnvelope(ssp, null, null, null); - enqueueUninterruptibly(checkLastExceptionEvelope); - }); - } - - private void enqueueUninterruptibly(IncomingMessageEnvelope envelope) { - final BlockingQueue queue = - queues.get(envelope.getSystemStreamPartition()); - while (true) { - try { - queue.put(envelope); - return; - } catch (InterruptedException e) { - // Some events require that we post an envelope to the queue even if the interrupt - // flag was set (i.e. during a call to stop) to ensure that the consumer properly - // shuts down. Consequently, if we receive an interrupt here we ignore it and retry - // the put operation. - } - } - } - } - } - - /** - * A {@link SystemFactory} that produces a {@link BoundedSourceSystem} for a particular {@link - * BoundedSource} registered in {@link Config}. - */ - public static class Factory implements SystemFactory { - @Override - public SystemConsumer getConsumer(String systemName, Config config, MetricsRegistry registry) { - final String streamPrefix = "systems." + systemName; - final Config scopedConfig = config.subset(streamPrefix + ".", true); - - return new Consumer( - getBoundedSource(scopedConfig), - getPipelineOptions(config), - new SamzaMetricsContainer((MetricsRegistryMap) registry), - scopedConfig.get("stepName")); - } - - @Override - public SystemProducer getProducer(String systemName, Config config, MetricsRegistry registry) { - LOG.info("System {} does not have producer.", systemName); - return null; - } - - @Override - public SystemAdmin getAdmin(String systemName, Config config) { - final Config scopedConfig = config.subset("systems." + systemName + ".", true); - return new Admin(getBoundedSource(scopedConfig), getPipelineOptions(config)); - } - - private static BoundedSource getBoundedSource(Config config) { - @SuppressWarnings("unchecked") - final BoundedSource source = - Base64Serializer.deserializeUnchecked(config.get("source"), BoundedSource.class); - return source; - } - - private static SamzaPipelineOptions getPipelineOptions(Config config) { - return Base64Serializer.deserializeUnchecked( - config.get("beamPipelineOptions"), SerializablePipelineOptions.class) - .get() - .as(SamzaPipelineOptions.class); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystem.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystem.java deleted file mode 100644 index ffab2ff59ce5..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystem.java +++ /dev/null @@ -1,533 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.adapter; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Base64; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.Semaphore; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils; -import org.apache.beam.runners.core.construction.SerializablePipelineOptions; -import org.apache.beam.runners.core.serialization.Base64Serializer; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.metrics.FnWithMetricsWrapper; -import org.apache.beam.runners.samza.metrics.SamzaMetricsContainer; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.io.UnboundedSource.CheckpointMark; -import org.apache.beam.sdk.io.UnboundedSource.UnboundedReader; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.samza.Partition; -import org.apache.samza.SamzaException; -import org.apache.samza.config.Config; -import org.apache.samza.metrics.MetricsRegistry; -import org.apache.samza.metrics.MetricsRegistryMap; -import org.apache.samza.system.IncomingMessageEnvelope; -import org.apache.samza.system.SystemAdmin; -import org.apache.samza.system.SystemConsumer; -import org.apache.samza.system.SystemFactory; -import org.apache.samza.system.SystemProducer; -import org.apache.samza.system.SystemStreamMetadata; -import org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata; -import org.apache.samza.system.SystemStreamPartition; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A Samza system that supports reading from a Beam {@link UnboundedSource}. The source is split - * into partitions. Samza creates the job model by assigning partitions to Samza tasks. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class UnboundedSourceSystem { - private static final Logger LOG = LoggerFactory.getLogger(UnboundedSourceSystem.class); - - // A dummy message used to force the consumer to wake up immediately and check the - // lastException field, which will be populated. - private static final IncomingMessageEnvelope CHECK_LAST_EXCEPTION_ENVELOPE = - new IncomingMessageEnvelope(null, null, null, null); - - /** - * For better parallelism in Samza, we need to configure a large split number for {@link - * UnboundedSource} like Kafka. This will most likely make each split contain a single partition, - * and be assigned to a Samza task. A large split number is safe since the actual split is bounded - * by the number of source partitions. - */ - private static - List> split( - UnboundedSource source, SamzaPipelineOptions pipelineOptions) - throws Exception { - final int numSplits = pipelineOptions.getMaxSourceParallelism(); - if (numSplits > 1) { - @SuppressWarnings("unchecked") - final List> splits = - (List>) source.split(numSplits, pipelineOptions); - // Need the empty check here because Samza doesn't handle empty partition well - if (!splits.isEmpty()) { - return splits; - } - } - return Collections.singletonList(source); - } - - /** A {@link SystemAdmin} for {@link UnboundedSourceSystem}. */ - public static class Admin implements SystemAdmin { - private final UnboundedSource source; - private final SamzaPipelineOptions pipelineOptions; - - public Admin(UnboundedSource source, SamzaPipelineOptions pipelineOptions) { - this.source = source; - this.pipelineOptions = pipelineOptions; - } - - @Override - public Map getOffsetsAfter( - Map offsets) { - // BEAM checkpoints the next offset so here we just need to return the map itself - return offsets; - } - - @Override - public Map getSystemStreamMetadata(Set streamNames) { - return streamNames.stream() - .collect( - Collectors.toMap( - Function.identity(), - streamName -> { - try { - final List> splits = - split(source, pipelineOptions); - final Map partitionMetaData = - new HashMap<>(); - // we assume that the generated splits are stable, - // this is necessary so that the mapping of partition to source is correct - // in each container. - for (int i = 0; i < splits.size(); i++) { - partitionMetaData.put( - new Partition(i), new SystemStreamPartitionMetadata(null, null, null)); - } - return new SystemStreamMetadata(streamName, partitionMetaData); - } catch (Exception e) { - throw new SamzaException("Fail to read stream metadata", e); - } - })); - } - - @Override - public Integer offsetComparator(String offset1, String offset2) { - // BEAM will fetch the exact offset. So we don't need to compare them. - // Return null indicating it's caught up. - return null; - } - } - - /** - * A {@link SystemConsumer} for a {@link UnboundedSource}. See {@link UnboundedSourceSystem} for - * more details. - */ - public static class Consumer - implements SystemConsumer { - private static final Logger LOG = LoggerFactory.getLogger(Consumer.class); - - private static final AtomicInteger NEXT_ID = new AtomicInteger(); - - private final Coder checkpointMarkCoder; - private final List> splits; - private final SamzaPipelineOptions pipelineOptions; - private final Map readerToSsp = new HashMap<>(); - private final SamzaMetricsContainer metricsContainer; - private final String stepName; - - private ReaderTask readerTask; - - Consumer( - UnboundedSource source, - SamzaPipelineOptions pipelineOptions, - SamzaMetricsContainer metricsContainer, - String stepName) { - try { - this.splits = split(source, pipelineOptions); - } catch (Exception e) { - throw new SamzaException("Fail to split source", e); - } - this.checkpointMarkCoder = source.getCheckpointMarkCoder(); - this.pipelineOptions = pipelineOptions; - this.metricsContainer = metricsContainer; - this.stepName = stepName; - } - - @Override - public void start() { - if (this.readerToSsp.isEmpty()) { - throw new IllegalArgumentException( - "Attempted to call start without assigned system stream partitions"); - } - - final FnWithMetricsWrapper metricsWrapper = - pipelineOptions.getEnableMetrics() - ? new FnWithMetricsWrapper(metricsContainer, stepName) - : null; - readerTask = - new ReaderTask<>( - readerToSsp, - checkpointMarkCoder, - pipelineOptions.getSystemBufferSize(), - pipelineOptions.getWatermarkInterval(), - metricsWrapper); - final Thread thread = - new Thread(readerTask, "unbounded-source-system-consumer-" + NEXT_ID.getAndIncrement()); - thread.start(); - } - - @Override - public void stop() { - // NOTE: this is not a blocking shutdown - readerTask.stop(); - } - - @Override - public void register(SystemStreamPartition ssp, String offset) { - CheckpointMarkT checkpoint = null; - if (StringUtils.isNoneEmpty(offset)) { - final byte[] offsetBytes = Base64.getDecoder().decode(offset); - final ByteArrayInputStream bais = new ByteArrayInputStream(offsetBytes); - try { - checkpoint = checkpointMarkCoder.decode(bais); - } catch (Exception e) { - throw new SamzaException("Error in decode offset", e); - } - } - - // Create unbounded reader with checkpoint - final int partitionId = ssp.getPartition().getPartitionId(); - try { - final UnboundedReader reader = - splits.get(partitionId).createReader(pipelineOptions, checkpoint); - readerToSsp.put(reader, ssp); - } catch (Exception e) { - throw new SamzaException("Error while creating source reader for ssp: " + ssp, e); - } - } - - @Override - public Map> poll( - Set systemStreamPartitions, long timeout) - throws InterruptedException { - assert !readerToSsp.isEmpty(); // start should be called before poll - - final Map> envelopes = new HashMap<>(); - for (SystemStreamPartition ssp : systemStreamPartitions) { - envelopes.put(ssp, readerTask.getNextMessages(ssp, timeout)); - } - return envelopes; - } - - private static class ReaderTask implements Runnable { - private final Map readerToSsp; - private final List readers; - private final Coder checkpointMarkCoder; - private final Map currentWatermarks = new HashMap<>(); - private final Map> queues; - private final long watermarkInterval; - private final Semaphore available; - private final FnWithMetricsWrapper metricsWrapper; - - private volatile boolean running; - private volatile Exception lastException; - private long lastWatermarkTime = 0L; - - private ReaderTask( - Map readerToSsp, - Coder checkpointMarkCoder, - int capacity, - long watermarkInterval, - FnWithMetricsWrapper metricsWrapper) { - this.readerToSsp = readerToSsp; - this.checkpointMarkCoder = checkpointMarkCoder; - this.readers = ImmutableList.copyOf(readerToSsp.keySet()); - this.watermarkInterval = watermarkInterval; - this.available = new Semaphore(capacity); - this.metricsWrapper = metricsWrapper; - - final Map> qs = - new HashMap<>(); - readerToSsp.values().forEach(ssp -> qs.put(ssp, new LinkedBlockingQueue<>())); - this.queues = ImmutableMap.copyOf(qs); - } - - @Override - public void run() { - this.running = true; - - try { - for (UnboundedReader reader : readers) { - final boolean hasData = invoke(reader::start); - if (hasData) { - available.acquire(); - enqueueMessage(reader); - } - } - - while (running) { - boolean elementAvailable = false; - for (UnboundedReader reader : readers) { - final boolean hasData = invoke(reader::advance); - if (hasData) { - while (!available.tryAcquire( - 1, - Math.max(lastWatermarkTime + watermarkInterval - System.currentTimeMillis(), 1), - TimeUnit.MILLISECONDS)) { - updateWatermark(); - } - enqueueMessage(reader); - elementAvailable = true; - } - } - - updateWatermark(); - - if (!elementAvailable) { - // TODO: make poll interval configurable - Thread.sleep(50); - } - } - } catch (Exception e) { - lastException = e; - running = false; - } finally { - readers.forEach( - reader -> { - try { - reader.close(); - } catch (IOException e) { - LOG.error("Reader task failed to close reader", e); - } - }); - } - - if (lastException != null) { - // Force any pollers to wake up - queues - .values() - .forEach( - queue -> { - queue.clear(); - queue.add(CHECK_LAST_EXCEPTION_ENVELOPE); - }); - } - } - - private X invoke(FnWithMetricsWrapper.SupplierWithException fn) throws Exception { - if (metricsWrapper != null) { - return metricsWrapper.wrap(fn, true); - } else { - return fn.get(); - } - } - - private void updateWatermark() throws InterruptedException { - final long time = System.currentTimeMillis(); - if (time - lastWatermarkTime > watermarkInterval) { - for (UnboundedReader reader : readers) { - final SystemStreamPartition ssp = readerToSsp.get(reader); - final Instant currentWatermark = - currentWatermarks.containsKey(ssp) - ? currentWatermarks.get(ssp) - : BoundedWindow.TIMESTAMP_MIN_VALUE; - final Instant nextWatermark = reader.getWatermark(); - if (currentWatermark.isBefore(nextWatermark)) { - currentWatermarks.put(ssp, nextWatermark); - if (BoundedWindow.TIMESTAMP_MAX_VALUE.isAfter(nextWatermark)) { - enqueueWatermark(reader); - } else { - // Max watermark has been reached for this reader. - enqueueMaxWatermarkAndEndOfStream(reader); - running = false; - } - } - } - - lastWatermarkTime = time; - } - } - - private void enqueueWatermark(UnboundedReader reader) throws InterruptedException { - final SystemStreamPartition ssp = readerToSsp.get(reader); - final IncomingMessageEnvelope envelope = - IncomingMessageEnvelope.buildWatermarkEnvelope(ssp, reader.getWatermark().getMillis()); - - queues.get(ssp).put(envelope); - } - - private void enqueueMessage(UnboundedReader reader) throws InterruptedException { - @SuppressWarnings("unchecked") - final T value = (T) reader.getCurrent(); - final Instant time = reader.getCurrentTimestamp(); - final SystemStreamPartition ssp = readerToSsp.get(reader); - final WindowedValue windowedValue = - WindowedValues.timestampedValueInGlobalWindow(value, time); - - final OpMessage opMessage = OpMessage.ofElement(windowedValue); - final IncomingMessageEnvelope envelope = - new IncomingMessageEnvelope(ssp, getOffset(reader), null, opMessage); - - queues.get(ssp).put(envelope); - } - - // Send an max watermark message and an end of stream message to the corresponding ssp to - // close windows and finish the task. - private void enqueueMaxWatermarkAndEndOfStream(UnboundedReader reader) { - final SystemStreamPartition ssp = readerToSsp.get(reader); - // Send the max watermark to force completion of any open windows. - final IncomingMessageEnvelope watermarkEnvelope = - IncomingMessageEnvelope.buildWatermarkEnvelope( - ssp, BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()); - enqueueUninterruptibly(watermarkEnvelope); - - final IncomingMessageEnvelope endOfStreamEnvelope = - IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp); - enqueueUninterruptibly(endOfStreamEnvelope); - } - - private void enqueueUninterruptibly(IncomingMessageEnvelope envelope) { - final BlockingQueue queue = - queues.get(envelope.getSystemStreamPartition()); - while (true) { - try { - queue.put(envelope); - return; - } catch (InterruptedException e) { - // Some events require that we post an envelope to the queue even if the interrupt - // flag was set (i.e. during a call to stop) to ensure that the consumer properly - // shuts down. Consequently, if we receive an interrupt here we ignore it and retry - // the put operation. - } - } - } - - void stop() { - running = false; - } - - List getNextMessages(SystemStreamPartition ssp, long timeoutMillis) - throws InterruptedException { - if (lastException != null) { - throw new RuntimeException(lastException); - } - - final List envelopes = new ArrayList<>(); - final BlockingQueue queue = queues.get(ssp); - final IncomingMessageEnvelope envelope = queue.poll(timeoutMillis, TimeUnit.MILLISECONDS); - - if (envelope != null) { - envelopes.add(envelope); - queue.drainTo(envelopes); - } - - final int numElements = - (int) envelopes.stream().filter(ev -> (ev.getMessage() instanceof OpMessage)).count(); - available.release(numElements); - - if (lastException != null) { - throw new RuntimeException(lastException); - } - - return envelopes; - } - - private String getOffset(UnboundedReader reader) { - try { - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - @SuppressWarnings("unchecked") - final CheckpointMarkT checkpointMark = - (CheckpointMarkT) invoke(reader::getCheckpointMark); - checkpointMarkCoder.encode(checkpointMark, baos); - return Base64.getEncoder().encodeToString(baos.toByteArray()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - } - } - - /** - * A {@link SystemFactory} that produces a {@link UnboundedSourceSystem} for a particular {@link - * UnboundedSource} registered in {@link Config}. - */ - public static class Factory implements SystemFactory { - @Override - public SystemConsumer getConsumer(String systemName, Config config, MetricsRegistry registry) { - final String streamPrefix = "systems." + systemName; - final Config scopedConfig = config.subset(streamPrefix + ".", true); - return new Consumer( - getUnboundedSource(scopedConfig), - getPipelineOptions(config), - new SamzaMetricsContainer((MetricsRegistryMap) registry), - scopedConfig.get("stepName")); - } - - @Override - public SystemProducer getProducer(String systemName, Config config, MetricsRegistry registry) { - LOG.info("System {} does not have producer.", systemName); - return null; - } - - @Override - public SystemAdmin getAdmin(String systemName, Config config) { - final Config scopedConfig = config.subset("systems." + systemName + ".", true); - return new Admin( - getUnboundedSource(scopedConfig), getPipelineOptions(config)); - } - - private static - UnboundedSource getUnboundedSource(Config config) { - @SuppressWarnings("unchecked") - final UnboundedSource source = - Base64Serializer.deserializeUnchecked(config.get("source"), UnboundedSource.class); - return source; - } - - private static SamzaPipelineOptions getPipelineOptions(Config config) { - return Base64Serializer.deserializeUnchecked( - config.get("beamPipelineOptions"), SerializablePipelineOptions.class) - .get() - .as(SamzaPipelineOptions.class); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/package-info.java deleted file mode 100644 index 582194440c9b..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/adapter/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza.adapter; diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/BeamContainerRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/container/BeamContainerRunner.java deleted file mode 100644 index 0f4a1c7a6905..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/BeamContainerRunner.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.container; - -import java.time.Duration; -import org.apache.samza.application.SamzaApplication; -import org.apache.samza.application.descriptors.ApplicationDescriptor; -import org.apache.samza.application.descriptors.ApplicationDescriptorImpl; -import org.apache.samza.application.descriptors.ApplicationDescriptorUtil; -import org.apache.samza.config.Config; -import org.apache.samza.config.ShellCommandConfig; -import org.apache.samza.context.ExternalContext; -import org.apache.samza.job.ApplicationStatus; -import org.apache.samza.runtime.ApplicationRunner; -import org.apache.samza.runtime.ContainerLaunchUtil; -import org.apache.samza.util.SamzaUncaughtExceptionHandler; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Runs the beam Yarn container, using the static global job model. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class BeamContainerRunner implements ApplicationRunner { - private static final Logger LOG = LoggerFactory.getLogger(BeamContainerRunner.class); - - @SuppressWarnings("rawtypes") - private final ApplicationDescriptorImpl appDesc; - - @SuppressWarnings("rawtypes") - public BeamContainerRunner(SamzaApplication app, Config config) { - this.appDesc = ApplicationDescriptorUtil.getAppDescriptor(app, config); - } - - @Override - public void run(ExternalContext externalContext) { - Thread.setDefaultUncaughtExceptionHandler( - new SamzaUncaughtExceptionHandler( - () -> { - LOG.info("Exiting process now."); - System.exit(1); - })); - - ContainerLaunchUtil.run( - appDesc, System.getenv(ShellCommandConfig.ENV_CONTAINER_ID), ContainerCfgLoader.jobModel); - } - - @Override - public void kill() { - // Do nothing. Yarn will kill the container. - } - - @Override - public ApplicationStatus status() { - // The container is running during the life span of this object. - return ApplicationStatus.Running; - } - - @Override - public void waitForFinish() { - // Container run is synchronous - // so calling waitForFinish() after run() should return immediately - LOG.info("Container has stopped"); - } - - @Override - public boolean waitForFinish(Duration timeout) { - // Container run is synchronous - // so calling waitForFinish() after run() should return immediately - LOG.info("Container has stopped"); - return true; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/BeamJobCoordinatorRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/container/BeamJobCoordinatorRunner.java deleted file mode 100644 index fb00a018fb29..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/BeamJobCoordinatorRunner.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.container; - -import java.time.Duration; -import org.apache.samza.application.SamzaApplication; -import org.apache.samza.application.descriptors.ApplicationDescriptor; -import org.apache.samza.clustermanager.JobCoordinatorLaunchUtil; -import org.apache.samza.config.Config; -import org.apache.samza.context.ExternalContext; -import org.apache.samza.job.ApplicationStatus; -import org.apache.samza.runtime.ApplicationRunner; - -/** Runs on Yarn AM, execute planning and launches JobCoordinator. */ -public class BeamJobCoordinatorRunner implements ApplicationRunner { - - @SuppressWarnings("rawtypes") - private final SamzaApplication app; - - private final Config config; - - /** - * Constructors a {@link BeamJobCoordinatorRunner} to run the {@code app} with the {@code config}. - * - * @param app application to run - * @param config configuration for the application - */ - @SuppressWarnings("rawtypes") - public BeamJobCoordinatorRunner( - SamzaApplication app, Config config) { - this.app = app; - this.config = config; - } - - @Override - public void run(ExternalContext externalContext) { - JobCoordinatorLaunchUtil.run(app, config); - } - - @Override - public void kill() { - throw new UnsupportedOperationException( - "BeamJobCoordinatorRunner#kill should never be invoked."); - } - - @Override - public ApplicationStatus status() { - throw new UnsupportedOperationException( - "BeamJobCoordinatorRunner#status should never be invoked."); - } - - @Override - public void waitForFinish() { - throw new UnsupportedOperationException( - "BeamJobCoordinatorRunner#waitForFinish should never be invoked."); - } - - @Override - public boolean waitForFinish(Duration timeout) { - throw new UnsupportedOperationException( - "BeamJobCoordinatorRunner#waitForFinish should never be invoked."); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/ContainerCfgLoader.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/container/ContainerCfgLoader.java deleted file mode 100644 index 9437aea56561..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/ContainerCfgLoader.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.container; - -import java.util.HashMap; -import java.util.Map; -import java.util.Random; -import org.apache.samza.config.Config; -import org.apache.samza.config.ConfigLoader; -import org.apache.samza.config.MapConfig; -import org.apache.samza.config.ShellCommandConfig; -import org.apache.samza.container.SamzaContainer; -import org.apache.samza.job.model.JobModel; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Loader for the Beam yarn container to load job model. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class ContainerCfgLoader implements ConfigLoader { - private static final Logger LOG = LoggerFactory.getLogger(ContainerCfgLoader.class); - - private static final Object LOCK = new Object(); - static volatile JobModel jobModel; - private static final Random RANDOM = new Random(); - - @Override - public Config getConfig() { - if (jobModel == null) { - synchronized (LOCK) { - if (jobModel == null) { - final String containerId = System.getenv(ShellCommandConfig.ENV_CONTAINER_ID); - LOG.info("Got container ID: {}", containerId); - final String coordinatorUrl = System.getenv(ShellCommandConfig.ENV_COORDINATOR_URL); - LOG.info("Got coordinator URL: {}", coordinatorUrl); - final int delay = RANDOM.nextInt(SamzaContainer.DEFAULT_READ_JOBMODEL_DELAY_MS()) + 1; - jobModel = SamzaContainer.readJobModel(coordinatorUrl, delay); - } - } - } - - final Map config = new HashMap<>(jobModel.getConfig()); - config.put("app.runner.class", BeamContainerRunner.class.getName()); - return new MapConfig(config); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/ContainerCfgLoaderFactory.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/container/ContainerCfgLoaderFactory.java deleted file mode 100644 index d3b090d6e20a..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/ContainerCfgLoaderFactory.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.container; - -import org.apache.samza.config.Config; -import org.apache.samza.config.ConfigLoader; -import org.apache.samza.config.ConfigLoaderFactory; - -/** Factory for the Beam yarn container to get loader to load job model. */ -public class ContainerCfgLoaderFactory implements ConfigLoaderFactory { - @Override - public ConfigLoader getLoader(Config config) { - return new ContainerCfgLoader(); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/container/package-info.java deleted file mode 100644 index 58a09e6023de..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/container/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza.container; diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/DoFnRunnerWithMetrics.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/DoFnRunnerWithMetrics.java deleted file mode 100644 index 7bec91abb34d..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/DoFnRunnerWithMetrics.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.CausedByDrain; -import org.apache.beam.sdk.values.WindowedValue; -import org.joda.time.Instant; - -/** - * {@link DoFnRunner} wrapper with metrics. The class uses {@link SamzaMetricsContainer} to keep - * BEAM metrics results and update Samza metrics. - */ -public class DoFnRunnerWithMetrics implements DoFnRunner { - private final DoFnRunner underlying; - private final FnWithMetricsWrapper metricsWrapper; - - private DoFnRunnerWithMetrics( - DoFnRunner underlying, SamzaMetricsContainer metricsContainer, String stepName) { - this.underlying = underlying; - this.metricsWrapper = new FnWithMetricsWrapper(metricsContainer, stepName); - } - - public static DoFnRunner wrap( - DoFnRunner doFnRunner, SamzaMetricsContainer metricsContainer, String stepName) { - return new DoFnRunnerWithMetrics<>(doFnRunner, metricsContainer, stepName); - } - - @Override - public void startBundle() { - withMetrics(underlying::startBundle, false); - } - - @Override - public void processElement(WindowedValue elem) { - withMetrics(() -> underlying.processElement(elem), false); - } - - @Override - public void onTimer( - String timerId, - String timerFamilyId, - KeyT key, - BoundedWindow window, - Instant timestamp, - Instant outputTimestamp, - TimeDomain timeDomain, - CausedByDrain causedByDrain) { - withMetrics( - () -> - underlying.onTimer( - timerId, - timerFamilyId, - key, - window, - timestamp, - outputTimestamp, - timeDomain, - causedByDrain), - false); - } - - @Override - public void finishBundle() { - withMetrics(underlying::finishBundle, true); - } - - @Override - public void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) { - underlying.onWindowExpiration(window, timestamp, key); - } - - @Override - public DoFn getFn() { - return underlying.getFn(); - } - - private void withMetrics(Runnable runnable, boolean shouldUpdateMetrics) { - try { - metricsWrapper.wrap( - () -> { - runnable.run(); - return (Void) null; - }, - shouldUpdateMetrics); - } catch (Exception e) { - throw new RuntimeException(e); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/FnWithMetricsWrapper.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/FnWithMetricsWrapper.java deleted file mode 100644 index 8052e0d66146..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/FnWithMetricsWrapper.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import java.io.Closeable; -import org.apache.beam.sdk.metrics.MetricsEnvironment; - -/** This class wraps a {@link java.util.function.Supplier} function call with BEAM metrics. */ -public class FnWithMetricsWrapper { - - /** Interface for functions to be wrapped with metrics. */ - public interface SupplierWithException { - T get() throws Exception; - } - - private final SamzaMetricsContainer metricsContainer; - private final String stepName; - - public FnWithMetricsWrapper(SamzaMetricsContainer metricsContainer, String stepName) { - this.metricsContainer = metricsContainer; - this.stepName = stepName; - } - - public T wrap(SupplierWithException fn, boolean shouldUpdateMetrics) throws Exception { - try (Closeable closeable = - MetricsEnvironment.scopedMetricsContainer(metricsContainer.getContainer(stepName))) { - T result = fn.get(); - // Skip updating metrics if not necessary to improve performance - if (shouldUpdateMetrics) { - metricsContainer.updateMetrics(stepName); - } - return result; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaGBKMetricOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaGBKMetricOp.java deleted file mode 100644 index 9b6375171c23..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaGBKMetricOp.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.beam.runners.samza.runtime.KeyedTimerData; -import org.apache.beam.runners.samza.runtime.Op; -import org.apache.beam.runners.samza.runtime.OpEmitter; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.samza.config.Config; -import org.apache.samza.context.Context; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * SamzaGBKMetricOp is a {@link Op} that emits & maintains default metrics for input or output - * PCollection for GroupByKey. - * - *

For Input PCollection: It emits the input throughput and maintains avg input time for input - * PCollection per windowId. - * - *

For Output PCollection: It emits the output throughput and maintains avg output time for - * output PCollection per windowId. It is also responsible for emitting latency metric per windowId - * once the watermark passes the end of window timestamp. - * - *

Assumes that {@code SamzaGBKMetricOp#processWatermark(Instant, OpEmitter)} is exclusive of - * {@code SamzaGBKMetricOp#processElement(Instant, OpEmitter)}. Specifically, the processWatermark - * method assumes that no calls to processElement will be made during its execution, and vice versa. - * - * @param The type of the elements in the input PCollection. - */ -class SamzaGBKMetricOp implements Op { - private static final Logger LOG = LoggerFactory.getLogger(SamzaGBKMetricOp.class); - // Unique name of the PTransform this MetricOp is associated with - private final String transformFullName; - private final SamzaTransformMetricRegistry samzaTransformMetricRegistry; - // Type of the processing operation - private final SamzaMetricOpFactory.OpType opType; - - private final String pValue; - // Counters for keeping sum of arrival time and count of elements per windowId - @SuppressFBWarnings("SE_BAD_FIELD") - private final ConcurrentHashMap sumOfTimestampsPerWindowId; - - @SuppressFBWarnings("SE_BAD_FIELD") - private final ConcurrentHashMap sumOfCountPerWindowId; - // Name of the task, for logging purpose - private transient String task; - - @Override - @SuppressWarnings({"rawtypes", "unchecked"}) - public void open( - Config config, - Context context, - Scheduler> timerRegistry, - OpEmitter emitter) { - // for logging / debugging purposes - this.task = context.getTaskContext().getTaskModel().getTaskName().getTaskName(); - // Register the transform with SamzaTransformMetricRegistry - samzaTransformMetricRegistry.register(transformFullName, pValue, context); - } - - // Some fields are initialized in open() method, which is called after the constructor. - @SuppressWarnings("initialization.fields.uninitialized") - public SamzaGBKMetricOp( - String pValue, - String transformFullName, - SamzaMetricOpFactory.OpType opType, - SamzaTransformMetricRegistry samzaTransformMetricRegistry) { - this.pValue = pValue; - this.transformFullName = transformFullName; - this.opType = opType; - this.samzaTransformMetricRegistry = samzaTransformMetricRegistry; - this.sumOfTimestampsPerWindowId = new ConcurrentHashMap<>(); - this.sumOfCountPerWindowId = new ConcurrentHashMap<>(); - } - - @Override - public void processElement(WindowedValue inputElement, OpEmitter emitter) { - // one element can belong to multiple windows - for (BoundedWindow windowId : inputElement.getWindows()) { - // Atomic updates to counts - sumOfCountPerWindowId.compute( - windowId, - (key, value) -> { - value = value == null ? Long.valueOf(0) : value; - return ++value; - }); - // Atomic updates to sum of arrival timestamps - sumOfTimestampsPerWindowId.compute( - windowId, - (key, value) -> { - value = value == null ? BigInteger.ZERO : value; - return value.add(BigInteger.valueOf(System.nanoTime())); - }); - } - - switch (opType) { - case INPUT: - samzaTransformMetricRegistry - .getTransformMetrics() - .getTransformInputThroughput(transformFullName) - .inc(); - break; - case OUTPUT: - samzaTransformMetricRegistry - .getTransformMetrics() - .getTransformOutputThroughput(transformFullName) - .inc(); - break; - } - emitter.emitElement(inputElement); - } - - @Override - public void processWatermark(Instant watermark, OpEmitter emitter) { - final List closedWindows = new ArrayList<>(); - sumOfCountPerWindowId.keySet().stream() - .filter(windowId -> watermark.isAfter(windowId.maxTimestamp())) // window is closed - .forEach( - windowId -> { - // In case if BigInteger overflows for long we only retain the last 64 bits of the sum - long sumOfTimestamps = - sumOfTimestampsPerWindowId.get(windowId) != null - ? sumOfTimestampsPerWindowId.get(windowId).longValue() - : 0L; - long count = sumOfCountPerWindowId.get(windowId); - closedWindows.add(windowId); - - if (LOG.isDebugEnabled()) { - LOG.debug( - "Processing {} Watermark for Transform: {}, WindowId:{}, count: {}, sumOfTimestamps: {}, task: {}", - opType, - transformFullName, - windowId, - count, - sumOfTimestamps, - task); - } - - // if the window is closed and there is some data - if (sumOfTimestamps > 0 && count > 0) { - switch (opType) { - case INPUT: - // Update the arrival time for the window - samzaTransformMetricRegistry.updateArrivalTimeMap( - transformFullName, windowId, Math.floorDiv(sumOfTimestamps, count)); - break; - case OUTPUT: - // Compute the latency if there is some data for the window - samzaTransformMetricRegistry.emitLatencyMetric( - transformFullName, windowId, Math.floorDiv(sumOfTimestamps, count), task); - break; - } - } - }); - - // remove the closed windows - sumOfCountPerWindowId.keySet().removeAll(closedWindows); - sumOfTimestampsPerWindowId.keySet().removeAll(closedWindows); - - // Update the watermark progress for the transform output - if (opType == SamzaMetricOpFactory.OpType.OUTPUT) { - samzaTransformMetricRegistry - .getTransformMetrics() - .getTransformWatermarkProgress(transformFullName) - .set(watermark.getMillis()); - } - - emitter.emitWatermark(watermark); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricOp.java deleted file mode 100644 index 0f5334546c7c..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricOp.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; -import org.apache.beam.runners.samza.runtime.KeyedTimerData; -import org.apache.beam.runners.samza.runtime.Op; -import org.apache.beam.runners.samza.runtime.OpEmitter; -import org.apache.beam.runners.samza.util.PipelineJsonRenderer; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.samza.config.Config; -import org.apache.samza.context.Context; -import org.apache.samza.operators.Scheduler; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * SamzaMetricOp is a metric Op that emits & maintains default transform metrics for inputs & - * outputs PCollection to the non data-shuffle transform. It emits the output throughput and - * maintains avg arrival time for input & output PCollection per watermark. - * - *

Assumes that {@code SamzaMetricOp#processWatermark(Instant, OpEmitter)} is exclusive of {@code - * SamzaMetricOp#processElement(Instant, OpEmitter)}. Specifically, the processWatermark method - * assumes that no calls to processElement will be made during its execution, and vice versa. - * - * @param The type of the elements in the output PCollection. - */ -class SamzaMetricOp implements Op { - // Unique name of the PTransform this MetricOp is associated with - private final String transformFullName; - private final SamzaTransformMetricRegistry samzaTransformMetricRegistry; - // Name or identifier of the PCollection which PTransform is processing - private final String pValue; - // Counters for output throughput - private final AtomicLong count; - private final AtomicReference sumOfTimestamps; - // Type of the PTransform input or output - private final SamzaMetricOpFactory.OpType opType; - // List of input PValue(s) for all PCollections processing the PTransform - private transient List transformInputs; - // List of output PValue(s) for all PCollections processing the PTransform - private transient List transformOutputs; - // Name of the task, for logging purpose - private transient String task; - - private static final Logger LOG = LoggerFactory.getLogger(SamzaMetricOp.class); - - // Some fields are initialized in open() method, which is called after the constructor. - @SuppressWarnings("initialization.fields.uninitialized") - public SamzaMetricOp( - @NonNull String pValue, - @NonNull String transformFullName, - SamzaMetricOpFactory.OpType opType, - @NonNull SamzaTransformMetricRegistry samzaTransformMetricRegistry) { - this.transformFullName = transformFullName; - this.samzaTransformMetricRegistry = samzaTransformMetricRegistry; - this.pValue = pValue; - this.opType = opType; - this.count = new AtomicLong(0L); - this.sumOfTimestamps = new AtomicReference<>(BigInteger.ZERO); - } - - @Override - @SuppressWarnings({"rawtypes", "unchecked"}) - public void open( - Config config, - Context context, - Scheduler> timerRegistry, - OpEmitter emitter) { - final Map.Entry, List> transformInputOutput = - PipelineJsonRenderer.getTransformIOMap(config).get(transformFullName); - this.transformInputs = - transformInputOutput != null ? transformInputOutput.getKey() : new ArrayList(); - this.transformOutputs = - transformInputOutput != null ? transformInputOutput.getValue() : new ArrayList(); - // for logging / debugging purposes - this.task = context.getTaskContext().getTaskModel().getTaskName().getTaskName(); - // Register the transform with SamzaTransformMetricRegistry - samzaTransformMetricRegistry.register(transformFullName, pValue, context); - } - - @Override - public void processElement(WindowedValue inputElement, OpEmitter emitter) { - // update counters for timestamps - count.incrementAndGet(); - sumOfTimestamps.updateAndGet(sum -> sum.add(BigInteger.valueOf(System.nanoTime()))); - switch (opType) { - case INPUT: - samzaTransformMetricRegistry - .getTransformMetrics() - .getTransformInputThroughput(transformFullName) - .inc(); - break; - case OUTPUT: - samzaTransformMetricRegistry - .getTransformMetrics() - .getTransformOutputThroughput(transformFullName) - .inc(); - break; - } - emitter.emitElement(inputElement); - } - - @Override - @SuppressWarnings({"CompareToZero"}) - public void processWatermark(Instant watermark, OpEmitter emitter) { - if (LOG.isDebugEnabled()) { - LOG.debug( - "Processing Output Watermark for Transform: {} Count: {} SumOfTimestamps: {} for Watermark: {} for Task: {}", - transformFullName, - count.get(), - sumOfTimestamps.get().longValue(), - watermark.getMillis(), - task); - } - - // if there is no input data then counters will be zero and only watermark will progress - if (count.get() > 0) { - // if BigInt.longValue is out of range for long then only the low-order 64 bits are retained - long avg = Math.floorDiv(sumOfTimestamps.get().longValue(), count.get()); - // Update MetricOp Registry with avg arrival for the pValue - samzaTransformMetricRegistry.updateArrivalTimeMap( - transformFullName, pValue, watermark.getMillis(), avg); - if (opType == SamzaMetricOpFactory.OpType.OUTPUT) { - // compute & emit the latency metric if the opType is OUTPUT - samzaTransformMetricRegistry.emitLatencyMetric( - transformFullName, transformInputs, transformOutputs, watermark.getMillis(), task); - } - } - - if (opType == SamzaMetricOpFactory.OpType.OUTPUT) { - // update output watermark progress metric - samzaTransformMetricRegistry - .getTransformMetrics() - .getTransformWatermarkProgress(transformFullName) - .set(watermark.getMillis()); - } - - // reset all counters - count.set(0L); - this.sumOfTimestamps.set(BigInteger.ZERO); - emitter.emitWatermark(watermark); - } - - @VisibleForTesting - void init(List transformInputs, List transformOutputs) { - this.transformInputs = transformInputs; - this.transformOutputs = transformOutputs; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricOpFactory.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricOpFactory.java deleted file mode 100644 index a4112a510459..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricOpFactory.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import org.apache.beam.runners.samza.runtime.Op; -import org.apache.beam.sdk.util.construction.PTransformTranslation; -import org.checkerframework.checker.nullness.qual.NonNull; - -/** - * Factory class to create {@link Op} for default transform metric computation. - * - *

Each metric Op computes and emits default throughput, latency & watermark progress metric per - * transform for Beam Samza Runner. A metric Op can be either attached to Input PCollection or - * Output PCollection of a PTransform. - * - *

Each concrete metric OP is responsible for following metrics computation: 1. Throughput: Emit - * the number of elements processed in the PCollection 2. Watermark Progress: Emit the output - * watermark progress of the PCollection 3. Latency: Maintain the avg arrival time per watermark - * across elements it processes, compute & emit the latency - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://issues.apache.org/jira/browse/BEAM-10556) - "nullness" -}) // TODO(https://issues.apache.org/jira/browse/BEAM-10402) -public class SamzaMetricOpFactory { - public enum OpType { - INPUT, - OUTPUT - } - - /** - * Create a {@link Op} for default transform metric computation. - * - * @param urn URN of the PCollection metric Op is processing - * @param pValue name of the PCollection metric Op is processing - * @param transformName name of the PTransform for which metric Op is created - * @param opType type of the metric - * @param samzaTransformMetricRegistry metric registry - * @param type of the message - * @return a {@link Op} for default transform metric computation - */ - public static @NonNull Op createMetricOp( - @NonNull String urn, - @NonNull String pValue, - @NonNull String transformName, - @NonNull OpType opType, - @NonNull SamzaTransformMetricRegistry samzaTransformMetricRegistry) { - if (isDataShuffleTransform(urn)) { - return new SamzaGBKMetricOp<>(pValue, transformName, opType, samzaTransformMetricRegistry); - } - return new SamzaMetricOp<>(pValue, transformName, opType, samzaTransformMetricRegistry); - } - - private static boolean isDataShuffleTransform(String urn) { - return urn.equals(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN) - || urn.equals(PTransformTranslation.COMBINE_PER_KEY_TRANSFORM_URN); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricsContainer.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricsContainer.java deleted file mode 100644 index 1679b748b10b..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaMetricsContainer.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import static org.apache.beam.runners.core.metrics.MetricsContainerStepMap.asAttemptedOnlyMetricResults; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Consumer; -import org.apache.beam.runners.core.metrics.MetricsContainerStepMap; -import org.apache.beam.sdk.metrics.GaugeResult; -import org.apache.beam.sdk.metrics.MetricQueryResults; -import org.apache.beam.sdk.metrics.MetricResult; -import org.apache.beam.sdk.metrics.MetricResults; -import org.apache.beam.sdk.metrics.MetricsContainer; -import org.apache.beam.sdk.metrics.MetricsFilter; -import org.apache.samza.metrics.Counter; -import org.apache.samza.metrics.Gauge; -import org.apache.samza.metrics.Metric; -import org.apache.samza.metrics.MetricsRegistryMap; - -/** - * This class holds the {@link MetricsContainer}s for BEAM metrics, and update the results to Samza - * metrics. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaMetricsContainer { - private static final String BEAM_METRICS_GROUP = "BeamMetrics"; - - private final MetricsContainerStepMap metricsContainers = new MetricsContainerStepMap(); - private final MetricsRegistryMap metricsRegistry; - - public SamzaMetricsContainer(MetricsRegistryMap metricsRegistry) { - this.metricsRegistry = metricsRegistry; - this.metricsRegistry.metrics().put(BEAM_METRICS_GROUP, new ConcurrentHashMap<>()); - } - - public MetricsContainer getContainer(String stepName) { - return this.metricsContainers.getContainer(stepName); - } - - public MetricsContainerStepMap getContainers() { - return this.metricsContainers; - } - - public void updateMetrics(String stepName) { - assert metricsRegistry != null; - - final MetricResults metricResults = asAttemptedOnlyMetricResults(metricsContainers); - final MetricQueryResults results = - metricResults.queryMetrics(MetricsFilter.builder().addStep(stepName).build()); - - final CounterUpdater updateCounter = new CounterUpdater(); - results.getCounters().forEach(updateCounter); - - final GaugeUpdater updateGauge = new GaugeUpdater(); - results.getGauges().forEach(updateGauge); - - // TODO(https://github.com/apache/beam/issues/21043): add distribution metrics to Samza - } - - public void updateExecutableStageBundleMetric(String metricName, long time) { - @SuppressWarnings("unchecked") - Gauge gauge = (Gauge) getSamzaMetricFor(metricName); - if (gauge == null) { - gauge = metricsRegistry.newGauge(BEAM_METRICS_GROUP, metricName, 0L); - } - gauge.set(time); - } - - private class CounterUpdater implements Consumer> { - @Override - public void accept(MetricResult metricResult) { - final String metricName = getMetricName(metricResult); - Counter counter = (Counter) getSamzaMetricFor(metricName); - if (counter == null) { - counter = metricsRegistry.newCounter(BEAM_METRICS_GROUP, metricName); - } - counter.dec(counter.getCount()); - counter.inc(metricResult.getAttempted()); - } - } - - private class GaugeUpdater implements Consumer> { - @Override - public void accept(MetricResult metricResult) { - final String metricName = getMetricName(metricResult); - @SuppressWarnings("unchecked") - Gauge gauge = (Gauge) getSamzaMetricFor(metricName); - if (gauge == null) { - gauge = metricsRegistry.newGauge(BEAM_METRICS_GROUP, metricName, 0L); - } - gauge.set(metricResult.getAttempted().getValue()); - } - } - - private Metric getSamzaMetricFor(String metricName) { - return metricsRegistry.getGroup(BEAM_METRICS_GROUP).get(metricName); - } - - private static String getMetricName(MetricResult metricResult) { - return metricResult.getKey().toString(); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaTransformMetricRegistry.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaTransformMetricRegistry.java deleted file mode 100644 index add207752f06..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaTransformMetricRegistry.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import java.io.Serializable; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.samza.context.Context; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * SamzaTransformMetricRegistry is a registry that maintains the metrics for each transform. It - * maintains the average arrival time for each PCollection for a primitive transform. - * - *

For a non-data shuffling primitive transform, the average arrival time is calculated per - * watermark, per PCollection {@link org.apache.beam.sdk.values.PValue} and updated in - * avgArrivalTimeMap - */ -public class SamzaTransformMetricRegistry implements Serializable { - private static final Logger LOG = LoggerFactory.getLogger(SamzaTransformMetricRegistry.class); - - // TransformName -> PValue for pCollection -> Map - private final ConcurrentHashMap>> - avgArrivalTimeMap; - // TransformName -> Map - @SuppressFBWarnings("SE_BAD_FIELD") - private final ConcurrentHashMap> - avgArrivalTimeMapForGbk; - - // Per Transform Metrics for each primitive transform - private final SamzaTransformMetrics transformMetrics; - - public SamzaTransformMetricRegistry() { - this.avgArrivalTimeMap = new ConcurrentHashMap<>(); - this.avgArrivalTimeMapForGbk = new ConcurrentHashMap<>(); - this.transformMetrics = new SamzaTransformMetrics(); - } - - @VisibleForTesting - SamzaTransformMetricRegistry(SamzaTransformMetrics samzaTransformMetrics) { - this.transformMetrics = samzaTransformMetrics; - this.avgArrivalTimeMap = new ConcurrentHashMap<>(); - this.avgArrivalTimeMapForGbk = new ConcurrentHashMap<>(); - } - - public void register(String transformFullName, String pValue, Context ctx) { - transformMetrics.register(transformFullName, ctx); - // initialize the map for the transform - avgArrivalTimeMap.putIfAbsent(transformFullName, new ConcurrentHashMap<>()); - avgArrivalTimeMap.get(transformFullName).putIfAbsent(pValue, new ConcurrentHashMap<>()); - avgArrivalTimeMapForGbk.putIfAbsent(transformFullName, new ConcurrentHashMap<>()); - } - - public SamzaTransformMetrics getTransformMetrics() { - return transformMetrics; - } - - public void updateArrivalTimeMap(String transformName, String pValue, long watermark, long avg) { - if (avgArrivalTimeMap.get(transformName) != null - && avgArrivalTimeMap.get(transformName).get(pValue) != null) { - ConcurrentHashMap avgArrivalTimeMapForPValue = - avgArrivalTimeMap.get(transformName).get(pValue); - // update the average arrival time for the latest watermark - avgArrivalTimeMapForPValue.put(watermark, avg); - // remove any stale entries which are lesser than the watermark - avgArrivalTimeMapForPValue.entrySet().removeIf(entry -> entry.getKey() < watermark); - } - } - - public void updateArrivalTimeMap(String transformName, BoundedWindow windowId, long avg) { - ConcurrentHashMap avgArrivalTimeMapForTransform = - avgArrivalTimeMapForGbk.get(transformName); - if (avgArrivalTimeMapForTransform != null) { - avgArrivalTimeMapForTransform.put(windowId, avg); - } - } - - @SuppressWarnings("nullness") - public void emitLatencyMetric( - String transformName, BoundedWindow windowId, long avgArrivalEndTime, String taskName) { - Long avgArrivalStartTime = - avgArrivalTimeMapForGbk.get(transformName) != null - ? avgArrivalTimeMapForGbk.get(transformName).remove(windowId) - : null; - - if (avgArrivalStartTime == null || avgArrivalStartTime == 0 || avgArrivalEndTime == 0) { - LOG.debug( - "Failure to Emit Metric for Transform: {}, Start-Time: {} or End-Time: {} found is 0/null for windowId: {}, task: {}", - transformName, - avgArrivalStartTime, - avgArrivalEndTime, - windowId, - taskName); - return; - } - - if (LOG.isDebugEnabled()) { - LOG.debug( - "Success Emit Metric for Transform: {}, window: {} for task: {}", - transformName, - windowId, - taskName); - } - transformMetrics - .getTransformLatencyMetric(transformName) - .update(avgArrivalEndTime - avgArrivalStartTime); - - transformMetrics - .getTransformCacheSize(transformName) - .set((long) avgArrivalTimeMapForGbk.get(transformName).size()); - } - - // Checker framework bug: https://github.com/typetools/checker-framework/issues/979 - @SuppressWarnings("return") - public void emitLatencyMetric( - String transformName, - List inputs, - List outputs, - Long watermark, - String taskName) { - final ConcurrentHashMap> avgArrivalTimeMapForTransform = - avgArrivalTimeMap.get(transformName); - - if (avgArrivalTimeMapForTransform == null || inputs.isEmpty() || outputs.isEmpty()) { - return; - } - - // get the avg arrival times for all the input PValues - final List inputPValuesAvgArrivalTimes = - inputs.stream() - .map(avgArrivalTimeMapForTransform::get) - .map(map -> map == null ? null : map.remove(watermark)) - .filter(avgArrivalTime -> avgArrivalTime != null) - .collect(Collectors.toList()); - - // get the avg arrival times for all the output PValues - final List outputPValuesAvgArrivalTimes = - outputs.stream() - .map(avgArrivalTimeMapForTransform::get) - .map(map -> map == null ? null : map.remove(watermark)) - .filter(avgArrivalTime -> avgArrivalTime != null) - .collect(Collectors.toList()); - - if (inputPValuesAvgArrivalTimes.isEmpty() || outputPValuesAvgArrivalTimes.isEmpty()) { - LOG.debug( - "Failure to Emit Metric for Transform: {} inputArrivalTime: {} or outputArrivalTime: {} not found for Watermark: {} Task: {}", - transformName, - inputPValuesAvgArrivalTimes, - inputPValuesAvgArrivalTimes, - watermark, - taskName); - return; - } - - final long startTime = Collections.min(inputPValuesAvgArrivalTimes); - final long endTime = Collections.max(outputPValuesAvgArrivalTimes); - final long latency = endTime - startTime; - transformMetrics.getTransformLatencyMetric(transformName).update(latency); - - transformMetrics - .getTransformCacheSize(transformName) - .set( - avgArrivalTimeMapForTransform.values().stream() - .mapToLong(ConcurrentHashMap::size) - .sum()); - - LOG.debug( - "Success Emit Metric Transform: {} for watermark: {} for task: {}", - transformName, - watermark, - taskName); - } - - @VisibleForTesting - @Nullable - ConcurrentHashMap> getAverageArrivalTimeMap( - String transformName) { - return avgArrivalTimeMap.get(transformName); - } - - @VisibleForTesting - @Nullable - ConcurrentHashMap getAverageArrivalTimeMapForGBK(String transformName) { - return avgArrivalTimeMapForGbk.get(transformName); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaTransformMetrics.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaTransformMetrics.java deleted file mode 100644 index 229b6da4e7c0..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/SamzaTransformMetrics.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import java.io.Serializable; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.samza.context.Context; -import org.apache.samza.metrics.Counter; -import org.apache.samza.metrics.Gauge; -import org.apache.samza.metrics.MetricsRegistry; -import org.apache.samza.metrics.SlidingTimeWindowReservoir; -import org.apache.samza.metrics.Timer; - -/** - * Metrics like throughput, latency and watermark progress for each Beam transform for Samza Runner. - */ -@SuppressWarnings("return") -public class SamzaTransformMetrics implements Serializable { - private static final String ENABLE_TASK_METRICS = "runner.samza.transform.enable.task.metrics"; - - private static final int DEFAULT_LOOKBACK_TIMER_WINDOW_SIZE_MS = 180000; - private static final String GROUP = "SamzaBeamTransformMetrics"; - private static final String TRANSFORM_LATENCY_METRIC = "handle-message-ns"; - private static final String TRANSFORM_WATERMARK_PROGRESS = "output-watermark-ms"; - private static final String TRANSFORM_IP_THROUGHPUT = "num-input-messages"; - private static final String TRANSFORM_OP_THROUGHPUT = "num-output-messages"; - - private static final String TRANSFORM_ARRIVAL_TIME_CACHE_SIZE = "in-mem-cache-size"; - - // Transform name to metric maps - @SuppressFBWarnings("SE_BAD_FIELD") - private final Map transformLatency; - - @SuppressFBWarnings("SE_BAD_FIELD") - private final Map> transformWatermarkProgress; - - @SuppressFBWarnings("SE_BAD_FIELD") - private final Map transformInputThroughput; - - @SuppressFBWarnings("SE_BAD_FIELD") - private final Map transformOutputThroughPut; - - @SuppressFBWarnings("SE_BAD_FIELD") - private final Map> transformCacheSize; - - public SamzaTransformMetrics() { - this.transformLatency = new ConcurrentHashMap<>(); - this.transformOutputThroughPut = new ConcurrentHashMap<>(); - this.transformWatermarkProgress = new ConcurrentHashMap<>(); - this.transformInputThroughput = new ConcurrentHashMap<>(); - this.transformCacheSize = new ConcurrentHashMap<>(); - } - - public void register(String transformName, Context ctx) { - // Output Watermark metric per transform will always be per transform, per task, since per - // container output watermark is not useful for debugging - transformWatermarkProgress.putIfAbsent( - transformName, - ctx.getTaskContext() - .getTaskMetricsRegistry() - .newGauge( - GROUP, getMetricNameWithPrefix(TRANSFORM_WATERMARK_PROGRESS, transformName), 0L)); - - // Latency, throughput metrics can be per container (default) or per task - final boolean enablePerTaskMetrics = - ctx.getJobContext().getConfig().getBoolean(ENABLE_TASK_METRICS, false); - final MetricsRegistry metricsRegistry = - enablePerTaskMetrics - ? ctx.getTaskContext().getTaskMetricsRegistry() - : ctx.getContainerContext().getContainerMetricsRegistry(); - transformLatency.putIfAbsent( - transformName, - metricsRegistry.newTimer(GROUP, getTimerWithCustomizedLookBackWindow(transformName))); - transformOutputThroughPut.putIfAbsent( - transformName, - metricsRegistry.newCounter( - GROUP, getMetricNameWithPrefix(TRANSFORM_OP_THROUGHPUT, transformName))); - transformInputThroughput.putIfAbsent( - transformName, - metricsRegistry.newCounter( - GROUP, getMetricNameWithPrefix(TRANSFORM_IP_THROUGHPUT, transformName))); - transformCacheSize.putIfAbsent( - transformName, - ctx.getTaskContext() - .getTaskMetricsRegistry() - .newGauge( - GROUP, - getMetricNameWithPrefix(TRANSFORM_ARRIVAL_TIME_CACHE_SIZE, transformName), - 0L)); - } - - public Timer getTransformLatencyMetric(String transformName) { - return transformLatency.get(transformName); - } - - public Counter getTransformInputThroughput(String transformName) { - return transformInputThroughput.get(transformName); - } - - public Counter getTransformOutputThroughput(String transformName) { - return transformOutputThroughPut.get(transformName); - } - - public Gauge getTransformCacheSize(String transformName) { - return transformCacheSize.get(transformName); - } - - public Gauge getTransformWatermarkProgress(String transformName) { - return transformWatermarkProgress.get(transformName); - } - - // Customize in-memory window size for timer, default from samza is 5 mins which causes memory - // pressure if a lot of timers are registered - private static Timer getTimerWithCustomizedLookBackWindow(String transformName) { - return new Timer( - getMetricNameWithPrefix(TRANSFORM_LATENCY_METRIC, transformName), - new SlidingTimeWindowReservoir(DEFAULT_LOOKBACK_TIMER_WINDOW_SIZE_MS)); - } - - private static String getMetricNameWithPrefix(String metricName, String transformName) { - // Replace all non-alphanumeric characters with underscore - final String samzaSafeMetricName = transformName.replaceAll("[^A-Za-z0-9_]", "_"); - return String.format("%s-%s", samzaSafeMetricName, metricName); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/package-info.java deleted file mode 100644 index 97415846e310..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/metrics/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza.metrics; diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/package-info.java deleted file mode 100644 index 549a4d81c2de..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza; diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/AsyncDoFnRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/AsyncDoFnRunner.java deleted file mode 100644 index e1bc9251a304..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/AsyncDoFnRunner.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collection; -import java.util.Collections; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.stream.Collectors; -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.CausedByDrain; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.WindowedValue; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This {@link DoFnRunner} adds the capability of executing the {@link - * org.apache.beam.sdk.transforms.DoFn.ProcessElement} in the thread pool, and returns the future to - * the collector for the underlying async execution. - */ -public class AsyncDoFnRunner implements DoFnRunner { - private static final Logger LOG = LoggerFactory.getLogger(AsyncDoFnRunner.class); - - // A dummy key to represent null keys - private static final Object NULL_KEY = new Object(); - - private final DoFnRunner underlying; - private final ExecutorService executor; - private final OpEmitter emitter; - private final FutureCollector futureCollector; - private final boolean isStateful; - - /** - * This map keeps track of the last outputFutures for a certain key. When the next element of the - * key comes in, its outputFutures will be chained from the last outputFutures in the map. When - * all futures of a key have been complete, the key entry will be removed. The map is bounded by - * (bundle size * 2). - */ - private final Map>>> keyedOutputFutures; - - public static AsyncDoFnRunner create( - DoFnRunner runner, - OpEmitter emitter, - FutureCollector futureCollector, - boolean isStateful, - SamzaPipelineOptions options) { - - LOG.info("Run DoFn with {}", AsyncDoFnRunner.class.getName()); - return new AsyncDoFnRunner<>(runner, emitter, futureCollector, isStateful, options); - } - - private AsyncDoFnRunner( - DoFnRunner runner, - OpEmitter emitter, - FutureCollector futureCollector, - boolean isStateful, - SamzaPipelineOptions options) { - this.underlying = runner; - this.executor = options.getExecutorServiceForProcessElement(); - this.emitter = emitter; - this.futureCollector = futureCollector; - this.isStateful = isStateful; - this.keyedOutputFutures = new ConcurrentHashMap<>(); - } - - @Override - public void startBundle() { - underlying.startBundle(); - } - - @Override - public void processElement(WindowedValue elem) { - final CompletableFuture>> outputFutures = - isStateful ? processStateful(elem) : processElement(elem, null); - - futureCollector.addAll(outputFutures); - } - - private CompletableFuture>> processElement( - WindowedValue elem, - @Nullable CompletableFuture>> prevOutputFuture) { - - final CompletableFuture>> prevFuture = - prevOutputFuture == null - ? CompletableFuture.completedFuture(Collections.emptyList()) - : prevOutputFuture; - - // For ordering by key, we chain the processing of the elem to the completion of - // the previous output of the same key - return prevFuture.thenApplyAsync( - x -> { - underlying.processElement(elem); - - return emitter.collectOutput().stream() - .map(OpMessage::getElement) - .collect(Collectors.toList()); - }, - executor); - } - - private CompletableFuture>> processStateful( - WindowedValue elem) { - final Object key = getKey(elem); - - final CompletableFuture>> outputFutures = - processElement(elem, keyedOutputFutures.get(key)); - - // Update the latest outputFuture for key - keyedOutputFutures.put(key, outputFutures); - - // Remove the outputFuture from the map once it's complete. - // This ensures the map will be cleaned up immediately. - return outputFutures.thenApply( - output -> { - // Under the condition that the outputFutures has not been updated - keyedOutputFutures.remove(key, outputFutures); - return output; - }); - } - - /** Package private for testing. */ - boolean hasOutputFuturesForKey(Object key) { - return keyedOutputFutures.containsKey(key); - } - - @Override - public void onTimer( - String timerId, - String timerFamilyId, - KeyT key, - BoundedWindow window, - Instant timestamp, - Instant outputTimestamp, - TimeDomain timeDomain, - CausedByDrain causedByDrain) { - underlying.onTimer( - timerId, timerFamilyId, key, window, timestamp, outputTimestamp, timeDomain, causedByDrain); - } - - @Override - public void finishBundle() { - underlying.finishBundle(); - } - - @Override - public void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) { - underlying.onWindowExpiration(window, timestamp, key); - } - - @Override - public DoFn getFn() { - return underlying.getFn(); - } - - private Object getKey(WindowedValue elem) { - KV kv = (KV) elem.getValue(); - if (kv == null) { - return NULL_KEY; - } else { - Object key = kv.getKey(); - return key == null ? NULL_KEY : key; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/BundleManager.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/BundleManager.java deleted file mode 100644 index 36ba19d7da3c..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/BundleManager.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import org.joda.time.Instant; - -/** - * Bundle management for the {@link DoFnOp} that handles lifecycle of a bundle. It also serves as a - * proxy for the {@link DoFnOp} to process watermark and decides to 1. Hold watermark if there is at - * least one bundle in progress. 2. Propagates the watermark to downstream DAG, if all the previous - * bundles have completed. - * - *

A bundle is considered complete only when the outputs corresponding to each element in the - * bundle have been resolved and the watermark associated with the bundle(if any) is propagated - * downstream. The output of an element is considered resolved based on the nature of the ParDoFn 1. - * In case of synchronous ParDo, outputs of the element is resolved immediately after the - * processElement returns. 2. In case of asynchronous ParDo, outputs of the element is resolved when - * all the future emitted by the processElement is resolved. - * - * @param output type of the {@link DoFnOp} - */ -public interface BundleManager { - /** Starts a new bundle if not already started, then adds an element to the existing bundle. */ - void tryStartBundle(); - - /** - * Signals a watermark event arrived. The BundleManager will decide if the watermark needs to be - * processed, and notify the listener if needed. - * - * @param watermark - * @param emitter - */ - void processWatermark(Instant watermark, OpEmitter emitter); - - /** - * Signals the BundleManager that a timer is up. - * - * @param keyedTimerData - * @param emitter - */ - void processTimer(KeyedTimerData keyedTimerData, OpEmitter emitter); - - /** - * Fails the current bundle, throws away the pending output, and resets the bundle to an empty - * state. - * - * @param t the throwable that caused the failure. - */ - void signalFailure(Throwable t); - - /** - * Tries to close the bundle, and reset the bundle to an empty state. - * - * @param emitter - */ - void tryFinishBundle(OpEmitter emitter); - - /** - * A listener used to track the lifecycle of a bundle. Typically, the lifecycle of a bundle - * consists of 1. Start bundle - Invoked when the bundle is started 2. Finish bundle - Invoked - * when the bundle is complete. Refer to the docs under {@link BundleManager} for definition on - * when a bundle is considered complete. 3. onWatermark - Invoked when watermark is ready to be - * propagated to downstream DAG. Refer to the docs under {@link BundleManager} on when watermark - * is held vs propagated. - * - * @param - */ - interface BundleProgressListener { - void onBundleStarted(); - - void onBundleFinished(OpEmitter emitter); - - void onWatermark(Instant watermark, OpEmitter emitter); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/ClassicBundleManager.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/ClassicBundleManager.java deleted file mode 100644 index 53b6968e1119..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/ClassicBundleManager.java +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collection; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.BiConsumer; -import javax.annotation.Nullable; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.CausedByDrain; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@inheritDoc} Implementation of BundleManager for non-portable mode. Keeps track of the async - * function completions. - * - *

This class is not thread safe and the current implementation relies on the assumption that - * messages are dispatched to BundleManager in a single threaded mode. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class ClassicBundleManager implements BundleManager { - private static final Logger LOG = LoggerFactory.getLogger(ClassicBundleManager.class); - private static final long MIN_BUNDLE_CHECK_TIME_MS = 10L; - - private final long maxBundleSize; - private final long maxBundleTimeMs; - private final BundleProgressListener bundleProgressListener; - private final FutureCollector futureCollector; - private final Scheduler> bundleTimerScheduler; - private final String bundleCheckTimerId; - - // Number elements belonging to the current active bundle - private transient AtomicLong currentBundleElementCount; - // Number of bundles that are in progress but not yet finished - private transient AtomicLong pendingBundleCount; - // Denotes the start time of the current active bundle - private transient AtomicLong bundleStartTime; - // Denotes if there is an active in progress bundle. Note at a given time, we can have multiple - // bundle in progress. - // This flag denotes if there is a bundle that is current and hasn't been closed. - private transient AtomicBoolean isBundleStarted; - // Holder for watermark which gets propagated when the bundle is finished. - private transient Instant bundleWatermarkHold; - // A future that is completed once all futures belonging to the current active bundle are - // completed. The value is null if there are no futures in the current active bundle. - private transient AtomicReference> currentActiveBundleDoneFutureReference; - private transient CompletionStage watermarkFuture; - - public ClassicBundleManager( - BundleProgressListener bundleProgressListener, - FutureCollector futureCollector, - long maxBundleSize, - long maxBundleTimeMs, - Scheduler> bundleTimerScheduler, - String bundleCheckTimerId) { - this.maxBundleSize = maxBundleSize; - this.maxBundleTimeMs = maxBundleTimeMs; - this.bundleProgressListener = bundleProgressListener; - this.bundleTimerScheduler = bundleTimerScheduler; - this.bundleCheckTimerId = bundleCheckTimerId; - this.futureCollector = futureCollector; - - if (maxBundleSize > 1) { - scheduleNextBundleCheck(); - } - - // instance variable initialization for bundle tracking - this.bundleStartTime = new AtomicLong(Long.MAX_VALUE); - this.currentActiveBundleDoneFutureReference = new AtomicReference<>(); - this.currentBundleElementCount = new AtomicLong(0L); - this.isBundleStarted = new AtomicBoolean(false); - this.pendingBundleCount = new AtomicLong(0L); - this.watermarkFuture = CompletableFuture.completedFuture(null); - } - - /* - * Schedule in processing time to check whether the current bundle should be closed. Note that - * we only approximately achieve max bundle time by checking as frequent as half of the max bundle - * time set by users. This would violate the max bundle time by up to half of it but should - * acceptable in most cases (and cheaper than scheduling a timer at the beginning of every bundle). - */ - private void scheduleNextBundleCheck() { - final Instant nextBundleCheckTime = - Instant.now().plus(Duration.millis(maxBundleTimeMs / 2 + MIN_BUNDLE_CHECK_TIME_MS)); - final TimerInternals.TimerData timerData = - TimerInternals.TimerData.of( - this.bundleCheckTimerId, - StateNamespaces.global(), - nextBundleCheckTime, - nextBundleCheckTime, - TimeDomain.PROCESSING_TIME, - CausedByDrain.NORMAL); - bundleTimerScheduler.schedule( - new KeyedTimerData<>(new byte[0], null, timerData), nextBundleCheckTime.getMillis()); - } - - @Override - public void tryStartBundle() { - futureCollector.prepare(); - - if (isBundleStarted.compareAndSet(false, true)) { - LOG.debug("Starting a new bundle."); - // make sure the previous bundle is sealed and futures are cleared - Preconditions.checkArgument( - currentActiveBundleDoneFutureReference.get() == null, - "Current active bundle done future should be null before starting a new bundle."); - bundleStartTime.set(System.currentTimeMillis()); - pendingBundleCount.incrementAndGet(); - bundleProgressListener.onBundleStarted(); - } - - currentBundleElementCount.incrementAndGet(); - } - - @Override - public void processWatermark(Instant watermark, OpEmitter emitter) { - // propagate watermark immediately if no bundle is in progress and all the previous bundles have - // completed. - if (!isBundleStarted() && pendingBundleCount.get() == 0) { - LOG.debug("Propagating watermark: {} directly since no bundle in progress.", watermark); - bundleProgressListener.onWatermark(watermark, emitter); - return; - } - - // hold back the watermark since there is either a bundle in progress or previously closed - // bundles are unfinished. - this.bundleWatermarkHold = watermark; - - // for batch mode, the max watermark should force the bundle to close - if (BoundedWindow.TIMESTAMP_MAX_VALUE.equals(watermark)) { - /* - * Due to lack of async watermark function, we block on the previous watermark futures before propagating the watermark - * downstream. If a bundle is in progress tryFinishBundle() fill force the bundle to close and emit watermark. - * If no bundle in progress, we progress watermark explicitly after the completion of previous watermark futures. - */ - if (isBundleStarted()) { - LOG.info( - "Received max watermark. Triggering finish bundle before flushing the watermark downstream."); - tryFinishBundle(emitter); - watermarkFuture.toCompletableFuture().join(); - } else { - LOG.info( - "Received max watermark. Waiting for previous bundles to complete before flushing the watermark downstream."); - watermarkFuture.toCompletableFuture().join(); - bundleProgressListener.onWatermark(watermark, emitter); - } - } - } - - @Override - public void processTimer(KeyedTimerData keyedTimerData, OpEmitter emitter) { - // this is internal timer in processing time to check whether a bundle should be closed - if (bundleCheckTimerId.equals(keyedTimerData.getTimerData().getTimerId())) { - tryFinishBundle(emitter); - scheduleNextBundleCheck(); - } - } - - /** - * Signal the bundle manager to handle failure. We discard the output collected as part of - * processing the current element and reset the bundle count. - * - * @param t failure cause - */ - @Override - public void signalFailure(Throwable t) { - LOG.error("Encountered error during processing the message. Discarding the output due to: ", t); - futureCollector.discard(); - // reset the bundle start flag only if the bundle has started - isBundleStarted.compareAndSet(true, false); - - // bundle start may not necessarily mean we have actually started the bundle since some of the - // invariant check conditions within bundle start could throw exceptions. so rely on bundle - // start time - if (bundleStartTime.get() != Long.MAX_VALUE) { - currentBundleElementCount.set(0L); - bundleStartTime.set(Long.MAX_VALUE); - pendingBundleCount.decrementAndGet(); - currentActiveBundleDoneFutureReference.set(null); - } - } - - @Override - public void tryFinishBundle(OpEmitter emitter) { - - // we need to seal the output for each element within a bundle irrespective of the whether we - // decide to finish the - // bundle or not - CompletionStage>> outputFuture = futureCollector.finish(); - - if (shouldFinishBundle() && isBundleStarted.compareAndSet(true, false)) { - LOG.debug("Finishing the current bundle."); - - // reset the bundle count - // seal the bundle and emit the result future (collection of results) - // chain the finish bundle invocation on the finish bundle - currentBundleElementCount.set(0L); - bundleStartTime.set(Long.MAX_VALUE); - Instant watermarkHold = bundleWatermarkHold; - bundleWatermarkHold = null; - - CompletionStage currentActiveBundleDoneFuture = - currentActiveBundleDoneFutureReference.get(); - outputFuture = - outputFuture.thenCombine( - currentActiveBundleDoneFuture != null - ? currentActiveBundleDoneFuture - : CompletableFuture.completedFuture(null), - (res, ignored) -> { - bundleProgressListener.onBundleFinished(emitter); - return res; - }); - - BiConsumer>, Void> watermarkPropagationFn; - if (watermarkHold == null) { - watermarkPropagationFn = (ignored, res) -> pendingBundleCount.decrementAndGet(); - } else { - watermarkPropagationFn = - (ignored, res) -> { - LOG.debug("Propagating watermark: {} to downstream.", watermarkHold); - bundleProgressListener.onWatermark(watermarkHold, emitter); - pendingBundleCount.decrementAndGet(); - }; - } - - // We chain the current watermark emission with previous watermark and the output futures - // since bundles can finish out of order but we still want the watermark to be emitted in - // order. - watermarkFuture = outputFuture.thenAcceptBoth(watermarkFuture, watermarkPropagationFn); - currentActiveBundleDoneFutureReference.set(null); - } else if (isBundleStarted.get()) { - final CompletableFuture>> finalOutputFuture = - outputFuture.toCompletableFuture(); - currentActiveBundleDoneFutureReference.updateAndGet( - maybePrevFuture -> { - CompletableFuture prevFuture = - maybePrevFuture != null ? maybePrevFuture : CompletableFuture.completedFuture(null); - - return CompletableFuture.allOf(prevFuture, finalOutputFuture); - }); - } - - // emit the future to the propagate it to rest of the DAG - emitter.emitFuture(outputFuture); - } - - @VisibleForTesting - long getCurrentBundleElementCount() { - return currentBundleElementCount.longValue(); - } - - @VisibleForTesting - @Nullable - CompletionStage getCurrentBundleDoneFuture() { - return currentActiveBundleDoneFutureReference.get(); - } - - @VisibleForTesting - void setCurrentBundleDoneFuture(CompletableFuture currentBundleResultFuture) { - this.currentActiveBundleDoneFutureReference.set(currentBundleResultFuture); - } - - @VisibleForTesting - long getPendingBundleCount() { - return pendingBundleCount.longValue(); - } - - @VisibleForTesting - void setPendingBundleCount(long value) { - pendingBundleCount.set(value); - } - - @VisibleForTesting - boolean isBundleStarted() { - return isBundleStarted.get(); - } - - @VisibleForTesting - void setBundleWatermarkHold(Instant watermark) { - this.bundleWatermarkHold = watermark; - } - - /** - * We close the current bundle in progress if one of the following criteria is met 1. The bundle - * count ≥ maxBundleSize 2. Time elapsed since the bundle started is ≥ maxBundleTimeMs 3. - * Watermark hold equals to TIMESTAMP_MAX_VALUE which usually is the case for bounded jobs - * - * @return true - if one of the criteria above is satisfied; false - otherwise - */ - private boolean shouldFinishBundle() { - return isBundleStarted.get() - && (currentBundleElementCount.get() >= maxBundleSize - || System.currentTimeMillis() - bundleStartTime.get() >= maxBundleTimeMs - || BoundedWindow.TIMESTAMP_MAX_VALUE.equals(bundleWatermarkHold)); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnOp.java deleted file mode 100644 index bc87e2460ec4..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnOp.java +++ /dev/null @@ -1,579 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.ServiceLoader; -import java.util.concurrent.CompletionStage; -import java.util.function.Function; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.core.PushbackSideInputDoFnRunner; -import org.apache.beam.runners.core.SideInputHandler; -import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.fnexecution.control.ExecutableStageContext; -import org.apache.beam.runners.fnexecution.control.StageBundleFactory; -import org.apache.beam.runners.fnexecution.provisioning.JobInfo; -import org.apache.beam.runners.samza.SamzaExecutionContext; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.util.DoFnUtils; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.join.RawUnionValue; -import org.apache.beam.sdk.transforms.reflect.DoFnInvoker; -import org.apache.beam.sdk.transforms.reflect.DoFnInvokers; -import org.apache.beam.sdk.transforms.reflect.DoFnSignature; -import org.apache.beam.sdk.transforms.reflect.DoFnSignatures; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.WindowedValueMultiReceiver; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterators; -import org.apache.samza.config.Config; -import org.apache.samza.context.Context; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Samza operator for {@link DoFn}. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class DoFnOp implements Op { - private static final Logger LOG = LoggerFactory.getLogger(DoFnOp.class); - - private final TupleTag mainOutputTag; - private final DoFn doFn; - private final Coder keyCoder; - private final Collection> sideInputs; - private final List> sideOutputTags; - private final WindowingStrategy windowingStrategy; - private final OutputManagerFactory outputManagerFactory; - // NOTE: we use HashMap here to guarantee Serializability - // Mapping from view id to a view - private final HashMap> idToViewMap; - private final String transformFullName; - private final String transformId; - private final Coder inputCoder; - private final Coder> windowedValueCoder; - private final HashMap, Coder> outputCoders; - private final PCollection.IsBounded isBounded; - private final String bundleCheckTimerId; - private final String bundleStateId; - - // portable api related - private final boolean isPortable; - private final RunnerApi.ExecutableStagePayload stagePayload; - private final JobInfo jobInfo; - private final HashMap> idToTupleTagMap; - - private transient SamzaTimerInternalsFactory timerInternalsFactory; - private transient DoFnRunner fnRunner; - private transient PushbackSideInputDoFnRunner pushbackFnRunner; - private transient SideInputHandler sideInputHandler; - private transient DoFnInvoker doFnInvoker; - private transient SamzaPipelineOptions samzaPipelineOptions; - - // This is derivable from pushbackValues which is persisted to a store. - // TODO: eagerly initialize the hold in init - @edu.umd.cs.findbugs.annotations.SuppressWarnings( - justification = "No bug", - value = "SE_TRANSIENT_FIELD_NOT_RESTORED") - private transient Instant pushbackWatermarkHold; - - // TODO: add this to checkpointable state - private transient Instant inputWatermark; - private transient BundleManager bundleManager; - private transient Instant sideInputWatermark; - private transient List> pushbackValues; - private transient ExecutableStageContext stageContext; - private transient StageBundleFactory stageBundleFactory; - private transient boolean bundleDisabled; - - private final DoFnSchemaInformation doFnSchemaInformation; - private final Map> sideInputMapping; - private final Map stateIdToStoreMapping; - - public DoFnOp( - TupleTag mainOutputTag, - DoFn doFn, - Coder keyCoder, - Coder inputCoder, - Coder> windowedValueCoder, - Map, Coder> outputCoders, - Collection> sideInputs, - List> sideOutputTags, - WindowingStrategy windowingStrategy, - Map> idToViewMap, - OutputManagerFactory outputManagerFactory, - String transformFullName, - String transformId, - PCollection.IsBounded isBounded, - boolean isPortable, - RunnerApi.ExecutableStagePayload stagePayload, - JobInfo jobInfo, - Map> idToTupleTagMap, - DoFnSchemaInformation doFnSchemaInformation, - Map> sideInputMapping, - Map stateIdToStoreMapping) { - this.mainOutputTag = mainOutputTag; - this.doFn = doFn; - this.sideInputs = sideInputs; - this.sideOutputTags = sideOutputTags; - this.inputCoder = inputCoder; - this.windowedValueCoder = windowedValueCoder; - this.outputCoders = new HashMap<>(outputCoders); - this.windowingStrategy = windowingStrategy; - this.idToViewMap = new HashMap<>(idToViewMap); - this.outputManagerFactory = outputManagerFactory; - this.transformFullName = transformFullName; - this.transformId = transformId; - this.keyCoder = keyCoder; - this.isBounded = isBounded; - this.isPortable = isPortable; - this.stagePayload = stagePayload; - this.jobInfo = jobInfo; - this.idToTupleTagMap = new HashMap<>(idToTupleTagMap); - this.bundleCheckTimerId = "_samza_bundle_check_" + transformId; - this.bundleStateId = "_samza_bundle_" + transformId; - this.doFnSchemaInformation = doFnSchemaInformation; - this.sideInputMapping = sideInputMapping; - this.stateIdToStoreMapping = stateIdToStoreMapping; - } - - @Override - @SuppressWarnings("unchecked") - public void open( - Config config, - Context context, - Scheduler> timerRegistry, - OpEmitter emitter) { - this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; - this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; - this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE; - - final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); - final SamzaExecutionContext samzaExecutionContext = - (SamzaExecutionContext) context.getApplicationContainerContext(); - this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions(); - this.bundleDisabled = samzaPipelineOptions.getMaxBundleSize() <= 1; - - final String stateId = "pardo-" + transformId; - final SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory = - SamzaStoreStateInternals.createNonKeyedStateInternalsFactory( - stateId, context.getTaskContext(), samzaPipelineOptions); - final FutureCollector outputFutureCollector = createFutureCollector(); - - this.bundleManager = - isPortable - ? new PortableBundleManager<>( - createBundleProgressListener(), - samzaPipelineOptions.getMaxBundleSize(), - samzaPipelineOptions.getMaxBundleTimeMs(), - timerRegistry, - bundleCheckTimerId) - : new ClassicBundleManager<>( - createBundleProgressListener(), - outputFutureCollector, - samzaPipelineOptions.getMaxBundleSize(), - samzaPipelineOptions.getMaxBundleTimeMs(), - timerRegistry, - bundleCheckTimerId); - - this.timerInternalsFactory = - SamzaTimerInternalsFactory.createTimerInternalFactory( - keyCoder, - (Scheduler) timerRegistry, - getTimerStateId(signature), - nonKeyedStateInternalsFactory, - windowingStrategy, - isBounded, - samzaPipelineOptions); - - this.sideInputHandler = - new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null)); - - if (isPortable) { - final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload); - stageContext = SamzaExecutableStageContextFactory.getInstance().get(jobInfo); - stageBundleFactory = stageContext.getStageBundleFactory(executableStage); - this.fnRunner = - SamzaDoFnRunners.createPortable( - transformId, - DoFnUtils.toStepName(executableStage), - bundleStateId, - windowedValueCoder, - executableStage, - sideInputMapping, - sideInputHandler, - nonKeyedStateInternalsFactory, - timerInternalsFactory, - samzaPipelineOptions, - outputManagerFactory.create(emitter, outputFutureCollector), - stageBundleFactory, - samzaExecutionContext, - mainOutputTag, - idToTupleTagMap, - context, - transformFullName); - } else { - this.fnRunner = - SamzaDoFnRunners.create( - samzaPipelineOptions, - doFn, - windowingStrategy, - transformFullName, - stateId, - context, - mainOutputTag, - sideInputHandler, - timerInternalsFactory, - keyCoder, - outputManagerFactory.create(emitter, outputFutureCollector), - inputCoder, - sideOutputTags, - outputCoders, - doFnSchemaInformation, - (Map>) sideInputMapping, - stateIdToStoreMapping, - emitter, - outputFutureCollector); - } - - this.pushbackFnRunner = - SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler); - this.pushbackValues = new ArrayList<>(); - - final Iterator invokerReg = - ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator(); - if (!invokerReg.hasNext()) { - // use the default invoker here - doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, samzaPipelineOptions); - } else { - doFnInvoker = - Iterators.getOnlyElement(invokerReg).invokerSetupFor(doFn, samzaPipelineOptions, context); - } - } - - FutureCollector createFutureCollector() { - return new FutureCollectorImpl<>(); - } - - private String getTimerStateId(DoFnSignature signature) { - final StringBuilder builder = new StringBuilder("timer"); - if (signature.usesTimers()) { - signature.timerDeclarations().keySet().forEach(builder::append); - } - return builder.toString(); - } - - @Override - public void processElement(WindowedValue inputElement, OpEmitter emitter) { - try { - bundleManager.tryStartBundle(); - final Iterable> rejectedValues = - pushbackFnRunner.processElementInReadyWindows(inputElement); - for (WindowedValue rejectedValue : rejectedValues) { - if (rejectedValue.getTimestamp().compareTo(pushbackWatermarkHold) < 0) { - pushbackWatermarkHold = rejectedValue.getTimestamp(); - } - pushbackValues.add(rejectedValue); - } - - bundleManager.tryFinishBundle(emitter); - } catch (Throwable t) { - LOG.error("Encountered error during process element", t); - bundleManager.signalFailure(t); - throw t; - } - } - - private void doProcessWatermark(Instant watermark, OpEmitter emitter) { - this.inputWatermark = watermark; - - if (sideInputWatermark.isEqual(BoundedWindow.TIMESTAMP_MAX_VALUE)) { - // this means we will never see any more side input - emitAllPushbackValues(); - } - - final Instant actualInputWatermark = - pushbackWatermarkHold.isBefore(inputWatermark) ? pushbackWatermarkHold : inputWatermark; - - timerInternalsFactory.setInputWatermark(actualInputWatermark); - - Collection> readyTimers = timerInternalsFactory.removeReadyTimers(); - if (!readyTimers.isEmpty()) { - pushbackFnRunner.startBundle(); - for (KeyedTimerData keyedTimerData : readyTimers) { - fireTimer(keyedTimerData); - } - pushbackFnRunner.finishBundle(); - } - - if (timerInternalsFactory.getOutputWatermark() == null - || timerInternalsFactory.getOutputWatermark().isBefore(actualInputWatermark)) { - timerInternalsFactory.setOutputWatermark(actualInputWatermark); - emitter.emitWatermark(timerInternalsFactory.getOutputWatermark()); - } - } - - @Override - public void processWatermark(Instant watermark, OpEmitter emitter) { - bundleManager.processWatermark(watermark, emitter); - } - - @Override - public void processSideInput( - String id, WindowedValue> elements, OpEmitter emitter) { - checkState( - bundleDisabled, "Side input not supported in bundling mode. Please disable bundling."); - @SuppressWarnings("unchecked") - final WindowedValue> retypedElements = (WindowedValue>) elements; - - final PCollectionView view = idToViewMap.get(id); - if (view == null) { - throw new IllegalArgumentException("No mapping of id " + id + " to view."); - } - - sideInputHandler.addSideInputValue(view, retypedElements); - - final List> previousPushbackValues = new ArrayList<>(pushbackValues); - pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE; - pushbackValues.clear(); - - for (final WindowedValue value : previousPushbackValues) { - processElement(value, emitter); - } - - // We may be able to advance the output watermark since we may have played some pushed back - // events. - processWatermark(this.inputWatermark, emitter); - } - - @Override - public void processSideInputWatermark(Instant watermark, OpEmitter emitter) { - checkState( - bundleDisabled, "Side input not supported in bundling mode. Please disable bundling."); - sideInputWatermark = watermark; - - if (sideInputWatermark.isEqual(BoundedWindow.TIMESTAMP_MAX_VALUE)) { - // this means we will never see any more side input - processWatermark(this.inputWatermark, emitter); - } - } - - @Override - @SuppressWarnings("unchecked") - public void processTimer(KeyedTimerData keyedTimerData, OpEmitter emitter) { - // this is internal timer in processing time to check whether a bundle should be closed - if (bundleCheckTimerId.equals(keyedTimerData.getTimerData().getTimerId())) { - bundleManager.processTimer(keyedTimerData, emitter); - return; - } - - pushbackFnRunner.startBundle(); - fireTimer(keyedTimerData); - pushbackFnRunner.finishBundle(); - - this.timerInternalsFactory.removeProcessingTimer((KeyedTimerData) keyedTimerData); - } - - @Override - public void close() { - doFnInvoker.invokeTeardown(); - try (AutoCloseable factory = stageBundleFactory; - AutoCloseable context = stageContext) { - // do nothing - } catch (Exception e) { - LOG.error("Failed to close stage bundle factory", e); - } - } - - private void fireTimer(KeyedTimerData keyedTimerData) { - final TimerInternals.TimerData timer = keyedTimerData.getTimerData(); - LOG.debug("Firing timer {}", timer); - - final StateNamespace namespace = timer.getNamespace(); - // NOTE: not sure why this is safe, but DoFnOperator makes this assumption - final BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow(); - - fnRunner.onTimer( - timer.getTimerId(), - timer.getTimerFamilyId(), - keyedTimerData.getKey(), - window, - timer.getTimestamp(), - timer.getOutputTimestamp(), - timer.getDomain(), - timer.causedByDrain()); - } - - // todo: should this go through bundle manager to start and finish the bundle? - private void emitAllPushbackValues() { - if (!pushbackValues.isEmpty()) { - pushbackFnRunner.startBundle(); - - final List> previousPushbackValues = new ArrayList<>(pushbackValues); - pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE; - pushbackValues.clear(); - - for (final WindowedValue value : previousPushbackValues) { - fnRunner.processElement(value); - } - - pushbackFnRunner.finishBundle(); - } - } - - private BundleManager.BundleProgressListener createBundleProgressListener() { - return new BundleManager.BundleProgressListener() { - @Override - public void onBundleStarted() { - pushbackFnRunner.startBundle(); - } - - @Override - public void onBundleFinished(OpEmitter emitter) { - pushbackFnRunner.finishBundle(); - } - - @Override - public void onWatermark(Instant watermark, OpEmitter emitter) { - doProcessWatermark(watermark, emitter); - } - }; - } - - static CompletionStage> createOutputFuture( - WindowedValue windowedValue, - CompletionStage valueFuture, - Function valueMapper) { - return valueFuture.thenApply( - res -> - WindowedValues.of( - valueMapper.apply(res), - windowedValue.getTimestamp(), - windowedValue.getWindows(), - windowedValue.getPaneInfo())); - } - - /** - * Factory class to create an {@link org.apache.beam.sdk.util.WindowedValueMultiReceiver} that - * emits values to the main output only, which is a single {@link - * org.apache.beam.sdk.values.PCollection}. - * - * @param type of the output element. - */ - public static class SingleOutputManagerFactory implements OutputManagerFactory { - @Override - public WindowedValueMultiReceiver create(OpEmitter emitter) { - return createOutputManager(emitter, null); - } - - @Override - public WindowedValueMultiReceiver create( - OpEmitter emitter, FutureCollector collector) { - return createOutputManager(emitter, collector); - } - - private WindowedValueMultiReceiver createOutputManager( - OpEmitter emitter, FutureCollector collector) { - return new WindowedValueMultiReceiver() { - @Override - @SuppressWarnings("unchecked") - public void output(TupleTag tupleTag, WindowedValue windowedValue) { - // With only one input we know that T is of type OutT. - if (windowedValue.getValue() instanceof CompletionStage) { - CompletionStage valueFuture = (CompletionStage) windowedValue.getValue(); - if (collector != null) { - collector.add(createOutputFuture(windowedValue, valueFuture, value -> (OutT) value)); - } - } else { - final WindowedValue retypedWindowedValue = (WindowedValue) windowedValue; - emitter.emitElement(retypedWindowedValue); - } - } - }; - } - } - - /** - * Factory class to create an {@link org.apache.beam.runners.core.WindowedValueMultiReceiver} that - * emits values to the main output as well as the side outputs via union type {@link - * RawUnionValue}. - */ - public static class MultiOutputManagerFactory implements OutputManagerFactory { - private final Map, Integer> tagToIndexMap; - - public MultiOutputManagerFactory(Map, Integer> tagToIndexMap) { - this.tagToIndexMap = tagToIndexMap; - } - - @Override - public WindowedValueMultiReceiver create(OpEmitter emitter) { - return createOutputManager(emitter, null); - } - - @Override - public WindowedValueMultiReceiver create( - OpEmitter emitter, FutureCollector collector) { - return createOutputManager(emitter, collector); - } - - private WindowedValueMultiReceiver createOutputManager( - OpEmitter emitter, FutureCollector collector) { - return new WindowedValueMultiReceiver() { - @Override - @SuppressWarnings("unchecked") - public void output(TupleTag tupleTag, WindowedValue windowedValue) { - final int index = tagToIndexMap.get(tupleTag); - final T rawValue = windowedValue.getValue(); - if (rawValue instanceof CompletionStage) { - CompletionStage valueFuture = (CompletionStage) rawValue; - if (collector != null) { - collector.add( - createOutputFuture( - windowedValue, valueFuture, res -> new RawUnionValue(index, res))); - } - } else { - final RawUnionValue rawUnionValue = new RawUnionValue(index, rawValue); - emitter.emitElement(windowedValue.withValue(rawUnionValue)); - } - } - }; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnRunnerWithKeyedInternals.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnRunnerWithKeyedInternals.java deleted file mode 100644 index 84cf5b26c505..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/DoFnRunnerWithKeyedInternals.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.core.KeyedWorkItem; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.CausedByDrain; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.WindowedValue; -import org.joda.time.Instant; - -/** This class wraps a DoFnRunner with keyed StateInternals and TimerInternals access. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class DoFnRunnerWithKeyedInternals implements DoFnRunner { - private final DoFnRunner underlying; - private final KeyedInternals keyedInternals; - - DoFnRunnerWithKeyedInternals( - DoFnRunner doFnRunner, KeyedInternals keyedInternals) { - this.underlying = doFnRunner; - this.keyedInternals = keyedInternals; - } - - @Override - public void startBundle() { - underlying.startBundle(); - } - - @Override - public void processElement(WindowedValue elem) { - // NOTE: this is thread-safe if we only allow concurrency on the per-key basis. - setKeyedInternals(elem.getValue()); - - try { - underlying.processElement(elem); - } finally { - clearKeyedInternals(); - } - } - - @Override - public void onTimer( - String timerId, - String timerFamilyId, - KeyT key, - BoundedWindow window, - Instant timestamp, - Instant outputTimestamp, - TimeDomain timeDomain, - CausedByDrain causedByDrain) { - // Note: wrap with KV.of(key, null) as a special use case of setKeyedInternals() to set key - // directly. - setKeyedInternals(KV.of(key, null)); - - try { - underlying.onTimer( - timerId, - timerFamilyId, - key, - window, - timestamp, - outputTimestamp, - timeDomain, - causedByDrain); - } finally { - clearKeyedInternals(); - } - } - - @Override - public void finishBundle() { - underlying.finishBundle(); - } - - @Override - public void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) { - underlying.onWindowExpiration(window, timestamp, key); - } - - @Override - public DoFn getFn() { - return underlying.getFn(); - } - - private void setKeyedInternals(Object value) { - if (value instanceof KeyedWorkItem) { - keyedInternals.setKey(((KeyedWorkItem) value).key()); - } else if (value instanceof KeyedTimerData) { - final Object key = ((KeyedTimerData) value).getKey(); - if (key != null) { - keyedInternals.setKey(key); - } - } else if (value instanceof KV) { - keyedInternals.setKey(((KV) value).getKey()); - } else { - throw new UnsupportedOperationException( - String.format( - "%s is not supported in %s", value.getClass(), DoFnRunnerWithKeyedInternals.class)); - } - } - - private void clearKeyedInternals() { - keyedInternals.clearKey(); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/FutureCollector.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/FutureCollector.java deleted file mode 100644 index 750e42d96e26..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/FutureCollector.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collection; -import java.util.concurrent.CompletionStage; -import org.apache.beam.sdk.values.WindowedValue; - -/** - * A future collector that buffers the output from the users {@link - * org.apache.beam.sdk.transforms.DoFn} and propagates the result future to downstream operators - * only after {@link #finish()} is invoked. - * - * @param type of the output element - */ -public interface FutureCollector { - /** - * Outputs the element to the collector. - * - * @param element to add to the collector - */ - void add(CompletionStage> element); - - /** - * Outputs a collection of elements to the collector. - * - * @param elements to add to the collector - */ - void addAll(CompletionStage>> elements); - - /** - * Discards the elements within the collector. Once the elements have been discarded, callers need - * to prepare the collector again before invoking {@link #add(CompletionStage)}. - */ - void discard(); - - /** - * Seals this {@link FutureCollector}, returning a {@link CompletionStage} containing all of the - * elements that were added to it. The {@link #add(CompletionStage)} method will throw an {@link - * IllegalStateException} if called after a call to finish. - * - *

The {@link FutureCollector} needs to be started again to collect newer batch of output. - */ - CompletionStage>> finish(); - - /** - * Prepares the {@link FutureCollector} to accept output elements. The {@link - * #add(CompletionStage)} method will throw an {@link IllegalStateException} if called without - * preparing the collector. - */ - void prepare(); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/FutureCollectorImpl.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/FutureCollectorImpl.java deleted file mode 100644 index e364eb7c4078..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/FutureCollectorImpl.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; -import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.beam.runners.samza.util.FutureUtils; -import org.apache.beam.sdk.values.WindowedValue; - -class FutureCollectorImpl implements FutureCollector { - private final AtomicBoolean collectorSealed; - private CompletionStage>> outputFuture; - - FutureCollectorImpl() { - outputFuture = CompletableFuture.completedFuture(new ArrayList<>()); - collectorSealed = new AtomicBoolean(true); - } - - @Override - public void add(CompletionStage> element) { - checkState( - !collectorSealed.get(), - "Cannot add element to an unprepared collector. Make sure prepare() is invoked before adding elements."); - - // We need synchronize guard against scenarios when watermark/finish bundle trigger outputs. - synchronized (this) { - outputFuture = - outputFuture.thenCombine( - element, - (collection, event) -> { - collection.add(event); - return collection; - }); - } - } - - @Override - public void addAll(CompletionStage>> elements) { - checkState( - !collectorSealed.get(), - "Cannot add elements to an unprepared collector. Make sure prepare() is invoked before adding elements."); - - synchronized (this) { - outputFuture = FutureUtils.combineFutures(outputFuture, elements); - } - } - - @Override - public void discard() { - collectorSealed.compareAndSet(false, true); - - synchronized (this) { - outputFuture = CompletableFuture.completedFuture(new ArrayList<>()); - } - } - - @Override - public CompletionStage>> finish() { - /* - * We can ignore the results here because its okay to call finish without invoking prepare. It will be a no-op - * and an empty collection will be returned. - */ - collectorSealed.compareAndSet(false, true); - - synchronized (this) { - final CompletionStage>> sealedOutputFuture = outputFuture; - outputFuture = CompletableFuture.completedFuture(new ArrayList<>()); - return sealedOutputFuture; - } - } - - @Override - public void prepare() { - boolean isCollectorSealed = collectorSealed.compareAndSet(true, false); - checkState( - isCollectorSealed, - "Failed to prepare the collector. Collector needs to be sealed before prepare() is invoked."); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java deleted file mode 100644 index a9fe11de4d92..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collection; -import java.util.Collections; -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.core.DoFnRunners; -import org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn; -import org.apache.beam.runners.core.KeyedWorkItem; -import org.apache.beam.runners.core.KeyedWorkItemCoder; -import org.apache.beam.runners.core.KeyedWorkItems; -import org.apache.beam.runners.core.NullSideInputReader; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateInternalsFactory; -import org.apache.beam.runners.core.StepContext; -import org.apache.beam.runners.core.SystemReduceFn; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.core.TimerInternals.TimerData; -import org.apache.beam.runners.samza.SamzaExecutionContext; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.metrics.DoFnRunnerWithMetrics; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.WindowedValueMultiReceiver; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection.IsBounded; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.samza.config.Config; -import org.apache.samza.context.Context; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Samza operator for {@link org.apache.beam.sdk.transforms.GroupByKey}. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class GroupByKeyOp - implements Op, KV, K> { - private static final Logger LOG = LoggerFactory.getLogger(GroupByKeyOp.class); - private static final String TIMER_STATE_ID = "timer"; - - private final TupleTag> mainOutputTag; - private final KeyedWorkItemCoder inputCoder; - private final WindowingStrategy windowingStrategy; - private final OutputManagerFactory> outputManagerFactory; - private final Coder keyCoder; - private final SystemReduceFn reduceFn; - private final String transformFullName; - private final String transformId; - private final IsBounded isBounded; - - private transient StateInternalsFactory stateInternalsFactory; - private transient SamzaTimerInternalsFactory timerInternalsFactory; - private transient DoFnRunner, KV> fnRunner; - private transient SamzaPipelineOptions pipelineOptions; - - public GroupByKeyOp( - TupleTag> mainOutputTag, - Coder> inputCoder, - SystemReduceFn reduceFn, - WindowingStrategy windowingStrategy, - OutputManagerFactory> outputManagerFactory, - String transformFullName, - String transformId, - IsBounded isBounded) { - this.mainOutputTag = mainOutputTag; - this.windowingStrategy = windowingStrategy; - this.outputManagerFactory = outputManagerFactory; - this.transformFullName = transformFullName; - this.transformId = transformId; - this.isBounded = isBounded; - - if (!(inputCoder instanceof KeyedWorkItemCoder)) { - throw new IllegalArgumentException( - String.format( - "GroupByKeyOp requires input to use KeyedWorkItemCoder. Got: %s", - inputCoder.getClass())); - } - this.inputCoder = (KeyedWorkItemCoder) inputCoder; - this.keyCoder = this.inputCoder.getKeyCoder(); - this.reduceFn = reduceFn; - } - - @Override - public void open( - Config config, - Context context, - Scheduler> timerRegistry, - OpEmitter> emitter) { - - final SamzaExecutionContext samzaExecutionContext = - (SamzaExecutionContext) context.getApplicationContainerContext(); - this.pipelineOptions = samzaExecutionContext.getPipelineOptions(); - - final SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory = - SamzaStoreStateInternals.createNonKeyedStateInternalsFactory( - transformId, context.getTaskContext(), pipelineOptions); - - final WindowedValueMultiReceiver outputManager = outputManagerFactory.create(emitter); - - this.stateInternalsFactory = - new SamzaStoreStateInternals.Factory<>( - transformId, - Collections.singletonMap( - SamzaStoreStateInternals.BEAM_STORE, - SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), - keyCoder, - pipelineOptions.getStoreBatchGetSize()); - - this.timerInternalsFactory = - SamzaTimerInternalsFactory.createTimerInternalFactory( - keyCoder, - timerRegistry, - TIMER_STATE_ID, - nonKeyedStateInternalsFactory, - windowingStrategy, - isBounded, - pipelineOptions); - - final DoFn, KV> doFn = - GroupAlsoByWindowViaWindowSetNewDoFn.create( - windowingStrategy, - stateInternalsFactory, - timerInternalsFactory, - NullSideInputReader.of(Collections.emptyList()), - reduceFn, - outputManager, - mainOutputTag); - - final KeyedInternals keyedInternals = - new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory); - - final StepContext stepContext = - new StepContext() { - @Override - public StateInternals stateInternals() { - return keyedInternals.stateInternals(); - } - - @Override - public TimerInternals timerInternals() { - return keyedInternals.timerInternals(); - } - }; - - final DoFnRunner, KV> doFnRunner = - DoFnRunners.simpleRunner( - PipelineOptionsFactory.create(), - doFn, - NullSideInputReader.of(Collections.emptyList()), - outputManager, - mainOutputTag, - Collections.emptyList(), - stepContext, - null, - Collections.emptyMap(), - windowingStrategy, - DoFnSchemaInformation.create(), - Collections.emptyMap()); - - final DoFnRunner, KV> dropLateDataRunner = - pipelineOptions.getDropLateData() - ? DoFnRunners.lateDataDroppingRunner( - doFnRunner, keyedInternals.timerInternals(), windowingStrategy) - : doFnRunner; - - final SamzaExecutionContext executionContext = - (SamzaExecutionContext) context.getApplicationContainerContext(); - final DoFnRunner, KV> doFnRunnerWithMetrics = - DoFnRunnerWithMetrics.wrap( - dropLateDataRunner, executionContext.getMetricsContainer(), transformFullName); - - this.fnRunner = new DoFnRunnerWithKeyedInternals<>(doFnRunnerWithMetrics, keyedInternals); - } - - @Override - public void processElement( - WindowedValue> inputElement, OpEmitter> emitter) { - fnRunner.startBundle(); - fnRunner.processElement(inputElement); - fnRunner.finishBundle(); - } - - @Override - public void processWatermark(Instant watermark, OpEmitter> emitter) { - timerInternalsFactory.setInputWatermark(watermark); - - Collection> readyTimers = timerInternalsFactory.removeReadyTimers(); - if (!readyTimers.isEmpty()) { - fnRunner.startBundle(); - for (KeyedTimerData keyedTimerData : readyTimers) { - fireTimer(keyedTimerData.getKey(), keyedTimerData.getTimerData()); - } - fnRunner.finishBundle(); - } - - if (timerInternalsFactory.getOutputWatermark() == null - || timerInternalsFactory.getOutputWatermark().isBefore(watermark)) { - timerInternalsFactory.setOutputWatermark(watermark); - emitter.emitWatermark(timerInternalsFactory.getOutputWatermark()); - } - } - - @Override - public void processTimer(KeyedTimerData keyedTimerData, OpEmitter> emitter) { - fnRunner.startBundle(); - fireTimer(keyedTimerData.getKey(), keyedTimerData.getTimerData()); - fnRunner.finishBundle(); - - timerInternalsFactory.removeProcessingTimer(keyedTimerData); - } - - private void fireTimer(K key, TimerData timer) { - LOG.debug("Firing timer {} for key {}", timer, key); - fnRunner.processElement( - WindowedValues.valueInGlobalWindow( - KeyedWorkItems.timersWorkItem(key, Collections.singletonList(timer)))); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedInternals.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedInternals.java deleted file mode 100644 index dc442d88ac32..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedInternals.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import java.util.ArrayList; -import java.util.List; -import javax.annotation.concurrent.ThreadSafe; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateInternalsFactory; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateTag; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.core.TimerInternalsFactory; -import org.apache.beam.runners.samza.state.SamzaMapState; -import org.apache.beam.runners.samza.state.SamzaSetState; -import org.apache.beam.sdk.state.State; -import org.apache.beam.sdk.state.StateContext; -import org.apache.beam.sdk.state.TimeDomain; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** Provides access to the keyed StateInternals and TimerInternals. */ -@ThreadSafe -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class KeyedInternals { - - private static class KeyedStates { - private final K key; - private final List states; - - private KeyedStates(K key) { - this.key = key; - this.states = new ArrayList<>(); - } - } - - private static final ThreadLocal threadLocalKeyedStates = new ThreadLocal<>(); - private final StateInternalsFactory stateFactory; - private final TimerInternalsFactory timerFactory; - - KeyedInternals(StateInternalsFactory stateFactory, TimerInternalsFactory timerFactory) { - this.stateFactory = stateFactory; - this.timerFactory = timerFactory; - } - - StateInternals stateInternals() { - return new KeyedStateInternals(); - } - - TimerInternals timerInternals() { - return new KeyedTimerInternals(); - } - - void setKey(K key) { - checkState( - threadLocalKeyedStates.get() == null, - "States for key %s is not cleared before processing", - key); - - threadLocalKeyedStates.set(new KeyedStates(key)); - } - - K getKey() { - KeyedStates keyedStates = threadLocalKeyedStates.get(); - return keyedStates == null ? null : keyedStates.key; - } - - void clearKey() { - final List states = threadLocalKeyedStates.get().states; - states.forEach( - state -> { - if (state instanceof SamzaMapState) { - ((SamzaMapState) state).closeIterators(); - } else if (state instanceof SamzaSetState) { - ((SamzaSetState) state).closeIterators(); - } - }); - states.clear(); - - threadLocalKeyedStates.remove(); - } - - private class KeyedStateInternals implements StateInternals { - - @Override - public K getKey() { - return KeyedInternals.this.getKey(); - } - - @Override - public T state( - StateNamespace namespace, StateTag address, StateContext c) { - checkState(getKey() != null, "Key is not set before state access in Stateful ParDo."); - - final T state = stateFactory.stateInternalsForKey(getKey()).state(namespace, address, c); - threadLocalKeyedStates.get().states.add(state); - return state; - } - } - - private class KeyedTimerInternals implements TimerInternals { - - private TimerInternals getInternals() { - return timerFactory.timerInternalsForKey(getKey()); - } - - @Override - public void setTimer( - StateNamespace namespace, - String timerId, - String timerFamilyId, - Instant target, - Instant outputTimestamp, - TimeDomain timeDomain) { - getInternals() - .setTimer(namespace, timerId, timerFamilyId, target, outputTimestamp, timeDomain); - } - - @Override - public void setTimer(TimerData timerData) { - getInternals().setTimer(timerData); - } - - @Override - public void deleteTimer( - StateNamespace namespace, String timerId, String timerFamilyId, TimeDomain timeDomain) { - getInternals().deleteTimer(namespace, timerId, timerFamilyId, timeDomain); - } - - @Override - public void deleteTimer(StateNamespace namespace, String timerId, String timerFamilyId) { - getInternals().deleteTimer(namespace, timerId, timerFamilyId); - } - - @Override - public void deleteTimer(TimerData timerKey) { - getInternals().deleteTimer(timerKey); - } - - @Override - public Instant currentProcessingTime() { - return getInternals().currentProcessingTime(); - } - - @Override - public @Nullable Instant currentSynchronizedProcessingTime() { - return getInternals().currentSynchronizedProcessingTime(); - } - - @Override - public Instant currentInputWatermarkTime() { - return getInternals().currentInputWatermarkTime(); - } - - @Override - public @Nullable Instant currentOutputWatermarkTime() { - return getInternals().currentOutputWatermarkTime(); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java deleted file mode 100644 index bd7c62dbb323..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KeyedTimerData.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.List; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.core.TimerInternals.TimerData; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.coders.InstantCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.StructuredCoder; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** - * {@link TimerInternals.TimerData} with key, used by {@link SamzaTimerInternalsFactory}. Implements - * {@link Comparable} by first comparing the wrapped TimerData then the key. - */ -@SuppressWarnings({ - "keyfor", - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class KeyedTimerData implements Comparable> { - private final byte[] keyBytes; - private final K key; - private final TimerInternals.TimerData timerData; - - public KeyedTimerData(byte[] keyBytes, K key, TimerData timerData) { - this.keyBytes = keyBytes; - this.key = key; - this.timerData = timerData; - } - - public K getKey() { - return key; - } - - public byte[] getKeyBytes() { - return keyBytes; - } - - public TimerInternals.TimerData getTimerData() { - return timerData; - } - - @Override - public int compareTo(KeyedTimerData other) { - final int timerCompare = getTimerData().compareTo(other.getTimerData()); - if (timerCompare != 0) { - return timerCompare; - } - - if (keyBytes == null) { - return other.keyBytes == null ? 0 : -1; - } - - if (other.keyBytes == null) { - return 1; - } - - if (keyBytes.length < other.keyBytes.length) { - return -1; - } - - if (keyBytes.length > other.keyBytes.length) { - return 1; - } - - for (int i = 0; i < keyBytes.length; ++i) { - final char b1 = (char) keyBytes[i]; - final char b2 = (char) other.keyBytes[i]; - if (b1 != b2) { - return b1 - b2; - } - } - - return 0; - } - - @Override - public String toString() { - return "KeyedTimerData{" - + "key=" - + key - + ", keyBytes=" - + Arrays.toString(keyBytes) - + ", timerData=" - + timerData - + '}'; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof KeyedTimerData)) { - return false; - } - - final KeyedTimerData that = (KeyedTimerData) o; - - return Arrays.equals(keyBytes, that.keyBytes) && timerData.equals(that.timerData); - } - - @Override - public int hashCode() { - int result = Arrays.hashCode(keyBytes); - result = 31 * result + timerData.hashCode(); - return result; - } - - /** - * Coder for {@link KeyedTimerData}. Note we don't use the {@link TimerInternals.TimerDataCoderV2} - * here directly since we want to en/decode timestamp first so the timers will be sorted in the - * state. - */ - public static class KeyedTimerDataCoder extends StructuredCoder> { - private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of(); - private static final InstantCoder INSTANT_CODER = InstantCoder.of(); - - private final Coder keyCoder; - private final Coder windowCoder; - - KeyedTimerDataCoder(Coder keyCoder, Coder windowCoder) { - this.keyCoder = keyCoder; - this.windowCoder = windowCoder; - } - - @Override - public void encode(KeyedTimerData value, OutputStream outStream) - throws CoderException, IOException { - - final TimerData timer = value.getTimerData(); - // encode the timestamps first - // all new fields should be encoded at last - INSTANT_CODER.encode(timer.getTimestamp(), outStream); - STRING_CODER.encode(timer.getTimerId(), outStream); - STRING_CODER.encode(timer.getNamespace().stringKey(), outStream); - STRING_CODER.encode(timer.getDomain().name(), outStream); - - if (keyCoder != null) { - keyCoder.encode(value.key, outStream); - } - - STRING_CODER.encode(timer.getTimerFamilyId(), outStream); - INSTANT_CODER.encode(timer.getOutputTimestamp(), outStream); - } - - @Override - public KeyedTimerData decode(InputStream inStream) throws CoderException, IOException { - // decode the timestamp first - final Instant timestamp = INSTANT_CODER.decode(inStream); - final String timerId = STRING_CODER.decode(inStream); - final StateNamespace namespace = - StateNamespaces.fromString(STRING_CODER.decode(inStream), windowCoder); - final TimeDomain domain = TimeDomain.valueOf(STRING_CODER.decode(inStream)); - - byte[] keyBytes = null; - K key = null; - if (keyCoder != null) { - key = keyCoder.decode(inStream); - - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - keyCoder.encode(key, baos); - } catch (IOException e) { - throw new RuntimeException("Could not encode key: " + key, e); - } - keyBytes = baos.toByteArray(); - } - - final String timerFamilyId = inStream.available() > 0 ? STRING_CODER.decode(inStream) : ""; - final Instant outputTimestamp = - inStream.available() > 0 ? INSTANT_CODER.decode(inStream) : timestamp; - final TimerData timer = - TimerData.of(timerId, timerFamilyId, namespace, timestamp, outputTimestamp, domain); - return new KeyedTimerData<>(keyBytes, key, timer); - } - - @Override - public List> getCoderArguments() { - return Arrays.asList(keyCoder, windowCoder); - } - - @Override - public void verifyDeterministic() throws NonDeterministicException {} - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KvToKeyedWorkItemOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KvToKeyedWorkItemOp.java deleted file mode 100644 index 7403c4480768..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/KvToKeyedWorkItemOp.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import org.apache.beam.runners.core.KeyedWorkItem; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.WindowedValue; - -/** Samza operator to map input stream of {@link KV} to {@link KeyedWorkItem}. */ -public class KvToKeyedWorkItemOp implements Op, KeyedWorkItem, K> { - - @Override - public void processElement( - WindowedValue> inputElement, OpEmitter> emitter) { - final KV kv = inputElement.getValue(); - for (WindowedValue> windowedValue : inputElement.explodeWindows()) { - final KeyedWorkItem workItem = - new SingletonKeyedWorkItem<>(kv.getKey(), windowedValue.withValue(kv.getValue())); - emitter.emitElement(windowedValue.withValue(workItem)); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/Op.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/Op.java deleted file mode 100644 index 14e0151bb708..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/Op.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.io.Serializable; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.samza.config.Config; -import org.apache.samza.context.Context; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Instant; - -/** - * Interface of Samza operator for BEAM. This interface demultiplexes messages from BEAM so that - * elements and side inputs can be handled separately in Samza. Watermark propagation can be - * overridden so we can hold watermarks for side inputs. The output values and watermark will be - * collected via {@link OpEmitter}. - */ -public interface Op extends Serializable { - /** - * A hook that allows initialization for any non-serializable operator state, such as getting - * stores. - * - *

While an emitter is supplied to this function it is not usable except in the methods {@link - * #processElement(WindowedValue, OpEmitter)}, {@link #processWatermark(Instant, OpEmitter)}, and - * {@link #processSideInput(String, WindowedValue, OpEmitter)}. - */ - default void open( - Config config, - Context context, - Scheduler> timerRegistry, - OpEmitter emitter) {} - - void processElement(WindowedValue inputElement, OpEmitter emitter); - - default void processWatermark(Instant watermark, OpEmitter emitter) { - emitter.emitWatermark(watermark); - } - - default void processSideInput( - String id, WindowedValue> elements, OpEmitter emitter) { - throw new UnsupportedOperationException("Side inputs not supported for: " + this.getClass()); - } - - default void processSideInputWatermark(Instant watermark, OpEmitter emitter) {} - - default void processTimer(KeyedTimerData keyedTimerData, OpEmitter emitter) {} - - default void close() {} -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpAdapter.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpAdapter.java deleted file mode 100644 index f2eecbbbc9c7..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpAdapter.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Queue; -import java.util.ServiceLoader; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import org.apache.beam.runners.samza.SamzaPipelineExceptionContext; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.translation.TranslationContext; -import org.apache.beam.runners.samza.util.FutureUtils; -import org.apache.beam.runners.samza.util.SamzaPipelineExceptionListener; -import org.apache.beam.sdk.util.UserCodeException; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.samza.config.Config; -import org.apache.samza.context.Context; -import org.apache.samza.operators.Scheduler; -import org.apache.samza.operators.functions.AsyncFlatMapFunction; -import org.apache.samza.operators.functions.ScheduledFunction; -import org.apache.samza.operators.functions.WatermarkFunction; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Adaptor class that runs a Samza {@link Op} for BEAM in the Samza {@link AsyncFlatMapFunction}. - * This class is initialized once for each Op within a Task for each Task. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class OpAdapter - implements AsyncFlatMapFunction, OpMessage>, - WatermarkFunction>, - ScheduledFunction, OpMessage>, - Serializable { - private static final Logger LOG = LoggerFactory.getLogger(OpAdapter.class); - - private final Op op; - private final String transformFullName; - private final transient SamzaPipelineOptions samzaPipelineOptions; - private transient OpEmitter emitter; - private transient Config config; - private transient Context context; - private transient List exceptionListeners; - - public static AsyncFlatMapFunction, OpMessage> adapt( - Op op, TranslationContext ctx) { - return new OpAdapter<>(op, ctx.getTransformFullName(), ctx.getPipelineOptions()); - } - - private OpAdapter( - Op op, String transformFullName, SamzaPipelineOptions samzaPipelineOptions) { - this.op = op; - this.transformFullName = transformFullName; - this.samzaPipelineOptions = samzaPipelineOptions; - } - - @Override - public final void init(Context context) { - this.emitter = new OpEmitterImpl<>(); - this.config = context.getJobContext().getConfig(); - this.context = context; - this.exceptionListeners = - StreamSupport.stream( - ServiceLoader.load(SamzaPipelineExceptionListener.Registrar.class).spliterator(), - false) - .collect(Collectors.toList()); - } - - @Override - public final void schedule(Scheduler> timerRegistry) { - assert context != null; - - op.open(config, context, timerRegistry, emitter); - } - - @Override - public synchronized CompletionStage>> apply(OpMessage message) { - try { - switch (message.getType()) { - case ELEMENT: - op.processElement(message.getElement(), emitter); - break; - case SIDE_INPUT: - op.processSideInput(message.getViewId(), message.getViewElements(), emitter); - break; - case SIDE_INPUT_WATERMARK: - op.processSideInputWatermark(message.getSideInputWatermark(), emitter); - break; - default: - throw new IllegalArgumentException( - String.format("Unexpected input type: %s", message.getType())); - } - } catch (Exception e) { - LOG.error("Exception happened in transform: {}", transformFullName, e); - notifyExceptionListeners(transformFullName, e, samzaPipelineOptions); - throw UserCodeException.wrap(e); - } - - CompletionStage>> resultFuture = - CompletableFuture.completedFuture(emitter.collectOutput()); - - return FutureUtils.combineFutures(resultFuture, emitter.collectFuture()); - } - - @Override - public synchronized Collection> processWatermark(long time) { - try { - op.processWatermark(new Instant(time), emitter); - } catch (Exception e) { - LOG.error( - "Op {} threw an exception during processing watermark", this.getClass().getName(), e); - throw UserCodeException.wrap(e); - } - - return emitter.collectOutput(); - } - - @Override - public synchronized Long getOutputWatermark() { - return emitter.collectWatermark(); - } - - @Override - public synchronized Collection> onCallback( - KeyedTimerData keyedTimerData, long time) { - try { - op.processTimer(keyedTimerData, emitter); - } catch (Exception e) { - LOG.error("Op {} threw an exception during processing timer", this.getClass().getName(), e); - throw UserCodeException.wrap(e); - } - - return emitter.collectOutput(); - } - - @Override - public void close() { - op.close(); - } - - static class OpEmitterImpl implements OpEmitter { - private final Queue> outputQueue; - private CompletionStage>> outputFuture; - private Instant outputWatermark; - - OpEmitterImpl() { - outputQueue = new ConcurrentLinkedQueue<>(); - } - - @Override - public void emitElement(WindowedValue element) { - outputQueue.add(OpMessage.ofElement(element)); - } - - @Override - public void emitFuture(CompletionStage>> resultFuture) { - final CompletionStage>> resultFutureWrapped = - resultFuture.thenApply( - res -> res.stream().map(OpMessage::ofElement).collect(Collectors.toList())); - - outputFuture = FutureUtils.combineFutures(outputFuture, resultFutureWrapped); - } - - @Override - public void emitWatermark(Instant watermark) { - outputWatermark = watermark; - } - - @Override - public void emitView(String id, WindowedValue> elements) { - outputQueue.add(OpMessage.ofSideInput(id, elements)); - } - - @Override - public Collection> collectOutput() { - final List> outputList = new ArrayList<>(); - OpMessage output; - while ((output = outputQueue.poll()) != null) { - outputList.add(output); - } - return outputList; - } - - @Override - public CompletionStage>> collectFuture() { - final CompletionStage>> future = outputFuture; - outputFuture = null; - return future; - } - - @Override - public Long collectWatermark() { - final Instant watermark = outputWatermark; - outputWatermark = null; - return watermark == null ? null : watermark.getMillis(); - } - } - - private void notifyExceptionListeners( - String transformFullName, Exception e, SamzaPipelineOptions samzaPipelineOptions) { - try { - exceptionListeners.forEach( - listener -> { - listener - .getExceptionListener(samzaPipelineOptions) - .onException(new SamzaPipelineExceptionContext(transformFullName, e)); - }); - } catch (Exception t) { - // ignore exception/interruption by listeners - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpEmitter.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpEmitter.java deleted file mode 100644 index c74d1cf1e11e..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpEmitter.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collection; -import java.util.concurrent.CompletionStage; -import org.apache.beam.sdk.values.WindowedValue; -import org.joda.time.Instant; - -/** Output emitter for Samza {@link Op}. */ -public interface OpEmitter { - - void emitFuture(CompletionStage>> resultFuture); - - void emitElement(WindowedValue element); - - void emitWatermark(Instant watermark); - - void emitView(String id, WindowedValue> elements); - - Collection> collectOutput(); - - CompletionStage>> collectFuture(); - - Long collectWatermark(); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpMessage.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpMessage.java deleted file mode 100644 index 33b221a8bbce..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OpMessage.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import org.apache.beam.sdk.values.WindowedValue; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** - * Actual message type used in Samza {@link org.apache.samza.application.StreamApplication}. It - * contains either an element of main inputs or the collection results from a view (used as side - * input). - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class OpMessage { - /** - * Type of the element(s) in the message. - * - *

    - *
  • ELEMENT - an element from main inputs. - *
  • SIDE_INPUT - a collection of elements from a view. - *
- */ - public enum Type { - ELEMENT, - SIDE_INPUT, - SIDE_INPUT_WATERMARK - } - - private final Type type; - private final WindowedValue element; - private final String viewId; - private final WindowedValue> viewElements; - private final Instant sideInputWatermark; - - public static OpMessage ofElement(WindowedValue element) { - return new OpMessage<>(Type.ELEMENT, element, null, null, null); - } - - public static OpMessage ofSideInput( - String viewId, WindowedValue> elements) { - return new OpMessage<>(Type.SIDE_INPUT, null, viewId, elements, null); - } - - public static OpMessage ofSideInputWatermark(Instant watermark) { - return new OpMessage<>(Type.SIDE_INPUT_WATERMARK, null, null, null, watermark); - } - - private OpMessage( - Type type, - WindowedValue element, - String viewId, - WindowedValue> viewElements, - Instant sideInputWatermark) { - this.type = type; - this.element = element; - this.viewId = viewId; - this.viewElements = viewElements; - this.sideInputWatermark = sideInputWatermark; - } - - public Type getType() { - return type; - } - - public WindowedValue getElement() { - ensureType(Type.ELEMENT, "getElement"); - return element; - } - - public String getViewId() { - ensureType(Type.SIDE_INPUT, "getViewId"); - return viewId; - } - - public WindowedValue> getViewElements() { - ensureType(Type.SIDE_INPUT, "getViewElements"); - return viewElements; - } - - public Instant getSideInputWatermark() { - return sideInputWatermark; - } - - private void ensureType(Type type, String method) { - if (this.type != type) { - throw new IllegalStateException( - String.format("Calling %s requires type %s, but was type %s", method, type, this.type)); - } - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof OpMessage)) { - return false; - } - - OpMessage opMessage = (OpMessage) o; - - if (type != opMessage.type) { - return false; - } - - if (element != null ? !element.equals(opMessage.element) : opMessage.element != null) { - return false; - } - - if (viewId != null ? !viewId.equals(opMessage.viewId) : opMessage.viewId != null) { - return false; - } - - return viewElements != null - ? viewElements.equals(opMessage.viewElements) - : opMessage.viewElements == null; - } - - @Override - public int hashCode() { - int result = type.hashCode(); - result = 31 * result + (element != null ? element.hashCode() : 0); - result = 31 * result + (viewId != null ? viewId.hashCode() : 0); - result = 31 * result + (viewElements != null ? viewElements.hashCode() : 0); - return result; - } - - @Override - public String toString() { - return "OpMessage{" - + "type=" - + type - + ", element=" - + element - + ", viewId='" - + viewId - + '\'' - + ", viewElements=" - + viewElements - + '}'; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OutputManagerFactory.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OutputManagerFactory.java deleted file mode 100644 index c7bb7caa19aa..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/OutputManagerFactory.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.io.Serializable; -import org.apache.beam.sdk.util.WindowedValueMultiReceiver; - -/** Factory class to create {@link WindowedValueMultiReceiver}. */ -public interface OutputManagerFactory extends Serializable { - WindowedValueMultiReceiver create(OpEmitter emitter); - - default WindowedValueMultiReceiver create( - OpEmitter emitter, FutureCollector collector) { - return create(emitter); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/PortableBundleManager.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/PortableBundleManager.java deleted file mode 100644 index 26cc73c76f91..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/PortableBundleManager.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.CausedByDrain; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Bundle management for the {@link DoFnOp} that handles lifecycle of a bundle. It also serves as a - * proxy for the {@link DoFnOp} to process watermark and decides to 1. Hold watermark if there is at - * least one bundle in progress. 2. Propagates the watermark to downstream DAG, if all the previous - * bundles have completed. - * - *

This class is not thread safe and the current implementation relies on the assumption that - * messages are dispatched to BundleManager in a single threaded mode. - * - * @param output type of the {@link DoFnOp} - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class PortableBundleManager implements BundleManager { - private static final Logger LOG = LoggerFactory.getLogger(PortableBundleManager.class); - private static final long MIN_BUNDLE_CHECK_TIME_MS = 10L; - - private final long maxBundleSize; - private final long maxBundleTimeMs; - private final BundleProgressListener bundleProgressListener; - private final Scheduler> bundleTimerScheduler; - private final String bundleCheckTimerId; - - // Number elements belonging to the current active bundle - private AtomicLong currentBundleElementCount; - // Number of bundles that are in progress but not yet finished - private AtomicLong pendingBundleCount; - // Denotes the start time of the current active bundle - private AtomicLong bundleStartTime; - // Denotes if there is an active in progress bundle. Note at a given time, we can have multiple - // bundle in progress. - // This flag denotes if there is a bundle that is current and hasn't been closed. - private AtomicBoolean isBundleStarted; - // Holder for watermark which gets propagated when the bundle is finished. - private volatile Instant bundleWatermarkHold; - - public PortableBundleManager( - BundleProgressListener bundleProgressListener, - long maxBundleSize, - long maxBundleTimeMs, - Scheduler> bundleTimerScheduler, - String bundleCheckTimerId) { - this.maxBundleSize = maxBundleSize; - this.maxBundleTimeMs = maxBundleTimeMs; - this.bundleProgressListener = bundleProgressListener; - this.bundleTimerScheduler = bundleTimerScheduler; - this.bundleCheckTimerId = bundleCheckTimerId; - - if (maxBundleSize > 1) { - scheduleNextBundleCheck(); - } - - // instance variable initialization for bundle tracking - this.bundleStartTime = new AtomicLong(Long.MAX_VALUE); - this.currentBundleElementCount = new AtomicLong(0); - this.isBundleStarted = new AtomicBoolean(false); - this.pendingBundleCount = new AtomicLong(0); - } - - /* - * Schedule in processing time to check whether the current bundle should be closed. Note that - * we only approximately achieve max bundle time by checking as frequent as half of the max bundle - * time set by users. This would violate the max bundle time by up to half of it but should - * acceptable in most cases (and cheaper than scheduling a timer at the beginning of every bundle). - */ - private void scheduleNextBundleCheck() { - final Instant nextBundleCheckTime = - Instant.now().plus(Duration.millis(maxBundleTimeMs / 2 + MIN_BUNDLE_CHECK_TIME_MS)); - final TimerInternals.TimerData timerData = - TimerInternals.TimerData.of( - this.bundleCheckTimerId, - StateNamespaces.global(), - nextBundleCheckTime, - nextBundleCheckTime, - TimeDomain.PROCESSING_TIME, - CausedByDrain.NORMAL); - bundleTimerScheduler.schedule( - new KeyedTimerData<>(new byte[0], null, timerData), nextBundleCheckTime.getMillis()); - } - - @Override - public void tryStartBundle() { - inconsistentStateCheck(); - - LOG.debug( - "tryStartBundle: elementCount={}, Bundle={}", currentBundleElementCount, this.toString()); - - if (isBundleStarted.compareAndSet(false, true)) { - LOG.debug("Starting a new bundle."); - bundleStartTime.set(System.currentTimeMillis()); - pendingBundleCount.getAndIncrement(); - bundleProgressListener.onBundleStarted(); - } - - currentBundleElementCount.incrementAndGet(); - } - - @Override - public void processWatermark(Instant watermark, OpEmitter emitter) { - // propagate watermark immediately if no bundle is in progress and all the previous bundles have - // completed. - if (shouldProcessWatermark()) { - LOG.debug("Propagating watermark: {} directly since no bundle in progress.", watermark); - bundleProgressListener.onWatermark(watermark, emitter); - return; - } - - // hold back the watermark since there is either a bundle in progress or previously closed - // bundles are unfinished. - this.bundleWatermarkHold = watermark; - } - - @Override - public void processTimer(KeyedTimerData keyedTimerData, OpEmitter emitter) { - inconsistentStateCheck(); - // this is internal timer in processing time to check whether a bundle should be closed - if (bundleCheckTimerId.equals(keyedTimerData.getTimerData().getTimerId())) { - tryFinishBundle(emitter); - scheduleNextBundleCheck(); - } - } - - /** - * Signal the bundle manager to handle failure. We discard the output collected as part of - * processing the current element and reset the bundle count. - * - * @param t failure cause - */ - @Override - public void signalFailure(Throwable t) { - inconsistentStateCheck(); - LOG.error("Encountered error during processing the message. Discarding the output due to: ", t); - - isBundleStarted.set(false); - currentBundleElementCount.set(0); - bundleStartTime.set(Long.MAX_VALUE); - pendingBundleCount.decrementAndGet(); - } - - @Override - public void tryFinishBundle(OpEmitter emitter) { - LOG.debug("tryFinishBundle: elementCount={}", currentBundleElementCount); - inconsistentStateCheck(); - if (shouldFinishBundle() && isBundleStarted.compareAndSet(true, false)) { - LOG.debug("Finishing the current bundle. Bundle={}", this); - currentBundleElementCount.set(0); - bundleStartTime.set(Long.MAX_VALUE); - - Instant watermarkHold = bundleWatermarkHold; - bundleWatermarkHold = null; - - pendingBundleCount.decrementAndGet(); - - bundleProgressListener.onBundleFinished(emitter); - if (watermarkHold != null) { - bundleProgressListener.onWatermark(watermarkHold, emitter); - } - } - } - - public void inconsistentStateCheck() { - if (!isBundleStarted.get() && currentBundleElementCount.get() != 0) { - LOG.warn( - "Bundle is in a inconsistent state. isBundleStarted = false, but currentBundleElementCount = {}", - currentBundleElementCount); - } - } - - private boolean shouldProcessWatermark() { - return !isBundleStarted.get() && pendingBundleCount.get() == 0; - } - - /** - * We close the current bundle in progress if one of the following criteria is met 1. The bundle - * count ≥ maxBundleSize 2. Time elapsed since the bundle started is ≥ maxBundleTimeMs 3. - * Watermark hold equals to TIMESTAMP_MAX_VALUE which usually is the case for bounded jobs - * - * @return true - if one of the criteria above is satisfied; false - otherwise - */ - private boolean shouldFinishBundle() { - return (currentBundleElementCount.get() >= maxBundleSize - || System.currentTimeMillis() - bundleStartTime.get() >= maxBundleTimeMs - || BoundedWindow.TIMESTAMP_MAX_VALUE.equals(bundleWatermarkHold)); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/PortableDoFnOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/PortableDoFnOp.java deleted file mode 100644 index 468e4b9aa8dc..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/PortableDoFnOp.java +++ /dev/null @@ -1,467 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.ServiceLoader; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.core.PushbackSideInputDoFnRunner; -import org.apache.beam.runners.core.SideInputHandler; -import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.fnexecution.control.ExecutableStageContext; -import org.apache.beam.runners.fnexecution.control.StageBundleFactory; -import org.apache.beam.runners.fnexecution.provisioning.JobInfo; -import org.apache.beam.runners.samza.SamzaExecutionContext; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.util.DoFnUtils; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.reflect.DoFnInvoker; -import org.apache.beam.sdk.transforms.reflect.DoFnInvokers; -import org.apache.beam.sdk.transforms.reflect.DoFnSignature; -import org.apache.beam.sdk.transforms.reflect.DoFnSignatures; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterators; -import org.apache.samza.config.Config; -import org.apache.samza.context.Context; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Samza operator for {@link DoFn}. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class PortableDoFnOp implements Op { - private static final Logger LOG = LoggerFactory.getLogger(PortableDoFnOp.class); - - private final TupleTag mainOutputTag; - private final DoFn doFn; - private final Coder keyCoder; - private final Collection> sideInputs; - private final List> sideOutputTags; - private final WindowingStrategy windowingStrategy; - private final OutputManagerFactory outputManagerFactory; - // NOTE: we use HashMap here to guarantee Serializability - // Mapping from view id to a view - private final HashMap> idToViewMap; - private final String transformFullName; - private final String transformId; - private final Coder inputCoder; - private final Coder> windowedValueCoder; - private final HashMap, Coder> outputCoders; - private final PCollection.IsBounded isBounded; - private final String bundleCheckTimerId; - private final String bundleStateId; - - // portable api related - private final boolean isPortable; - private final RunnerApi.ExecutableStagePayload stagePayload; - private final JobInfo jobInfo; - private final HashMap> idToTupleTagMap; - - private transient SamzaTimerInternalsFactory timerInternalsFactory; - private transient DoFnRunner fnRunner; - private transient PushbackSideInputDoFnRunner pushbackFnRunner; - private transient SideInputHandler sideInputHandler; - private transient DoFnInvoker doFnInvoker; - private transient SamzaPipelineOptions samzaPipelineOptions; - - // This is derivable from pushbackValues which is persisted to a store. - // TODO: eagerly initialize the hold in init - @edu.umd.cs.findbugs.annotations.SuppressWarnings( - justification = "No bug", - value = "SE_TRANSIENT_FIELD_NOT_RESTORED") - private transient Instant pushbackWatermarkHold; - - // TODO: add this to checkpointable state - private transient Instant inputWatermark; - private transient BundleManager bundleManager; - private transient Instant sideInputWatermark; - private transient List> pushbackValues; - private transient ExecutableStageContext stageContext; - private transient StageBundleFactory stageBundleFactory; - private transient boolean bundleDisabled; - - private final DoFnSchemaInformation doFnSchemaInformation; - private final Map> sideInputMapping; - private final Map stateIdToStoreMapping; - - public PortableDoFnOp( - TupleTag mainOutputTag, - DoFn doFn, - Coder keyCoder, - Coder inputCoder, - Coder> windowedValueCoder, - Map, Coder> outputCoders, - Collection> sideInputs, - List> sideOutputTags, - WindowingStrategy windowingStrategy, - Map> idToViewMap, - OutputManagerFactory outputManagerFactory, - String transformFullName, - String transformId, - PCollection.IsBounded isBounded, - boolean isPortable, - RunnerApi.ExecutableStagePayload stagePayload, - JobInfo jobInfo, - Map> idToTupleTagMap, - DoFnSchemaInformation doFnSchemaInformation, - Map> sideInputMapping, - Map stateIdToStoreMapping) { - this.mainOutputTag = mainOutputTag; - this.doFn = doFn; - this.sideInputs = sideInputs; - this.sideOutputTags = sideOutputTags; - this.inputCoder = inputCoder; - this.windowedValueCoder = windowedValueCoder; - this.outputCoders = new HashMap<>(outputCoders); - this.windowingStrategy = windowingStrategy; - this.idToViewMap = new HashMap<>(idToViewMap); - this.outputManagerFactory = outputManagerFactory; - this.transformFullName = transformFullName; - this.transformId = transformId; - this.keyCoder = keyCoder; - this.isBounded = isBounded; - this.isPortable = isPortable; - this.stagePayload = stagePayload; - this.jobInfo = jobInfo; - this.idToTupleTagMap = new HashMap<>(idToTupleTagMap); - this.bundleCheckTimerId = "_samza_bundle_check_" + transformId; - this.bundleStateId = "_samza_bundle_" + transformId; - this.doFnSchemaInformation = doFnSchemaInformation; - this.sideInputMapping = sideInputMapping; - this.stateIdToStoreMapping = stateIdToStoreMapping; - } - - @Override - @SuppressWarnings("unchecked") - public void open( - Config config, - Context context, - Scheduler> timerRegistry, - OpEmitter emitter) { - this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; - this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; - this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE; - - final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); - final SamzaExecutionContext samzaExecutionContext = - (SamzaExecutionContext) context.getApplicationContainerContext(); - this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions(); - this.bundleDisabled = samzaPipelineOptions.getMaxBundleSize() <= 1; - - final String stateId = "pardo-" + transformId; - final SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory = - SamzaStoreStateInternals.createNonKeyedStateInternalsFactory( - stateId, context.getTaskContext(), samzaPipelineOptions); - final FutureCollector outputFutureCollector = createFutureCollector(); - - this.bundleManager = - new ClassicBundleManager<>( - createBundleProgressListener(), - outputFutureCollector, - samzaPipelineOptions.getMaxBundleSize(), - samzaPipelineOptions.getMaxBundleTimeMs(), - timerRegistry, - bundleCheckTimerId); - - this.timerInternalsFactory = - SamzaTimerInternalsFactory.createTimerInternalFactory( - keyCoder, - (Scheduler) timerRegistry, - getTimerStateId(signature), - nonKeyedStateInternalsFactory, - windowingStrategy, - isBounded, - samzaPipelineOptions); - - this.sideInputHandler = - new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null)); - - if (isPortable) { - final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload); - stageContext = SamzaExecutableStageContextFactory.getInstance().get(jobInfo); - stageBundleFactory = stageContext.getStageBundleFactory(executableStage); - this.fnRunner = - SamzaDoFnRunners.createPortable( - transformId, - DoFnUtils.toStepName(executableStage), - bundleStateId, - windowedValueCoder, - executableStage, - sideInputMapping, - sideInputHandler, - nonKeyedStateInternalsFactory, - timerInternalsFactory, - samzaPipelineOptions, - outputManagerFactory.create(emitter, outputFutureCollector), - stageBundleFactory, - samzaExecutionContext, - mainOutputTag, - idToTupleTagMap, - context, - transformFullName); - } else { - this.fnRunner = - SamzaDoFnRunners.create( - samzaPipelineOptions, - doFn, - windowingStrategy, - transformFullName, - stateId, - context, - mainOutputTag, - sideInputHandler, - timerInternalsFactory, - keyCoder, - outputManagerFactory.create(emitter, outputFutureCollector), - inputCoder, - sideOutputTags, - outputCoders, - doFnSchemaInformation, - (Map>) sideInputMapping, - stateIdToStoreMapping, - emitter, - outputFutureCollector); - } - - this.pushbackFnRunner = - SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler); - this.pushbackValues = new ArrayList<>(); - - final Iterator invokerReg = - ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator(); - if (!invokerReg.hasNext()) { - // use the default invoker here - doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, samzaPipelineOptions); - } else { - doFnInvoker = - Iterators.getOnlyElement(invokerReg).invokerSetupFor(doFn, samzaPipelineOptions, context); - } - } - - FutureCollector createFutureCollector() { - return new FutureCollectorImpl<>(); - } - - private String getTimerStateId(DoFnSignature signature) { - final StringBuilder builder = new StringBuilder("timer"); - if (signature.usesTimers()) { - signature.timerDeclarations().keySet().forEach(builder::append); - } - return builder.toString(); - } - - @Override - public void processElement(WindowedValue inputElement, OpEmitter emitter) { - try { - bundleManager.tryStartBundle(); - final Iterable> rejectedValues = - pushbackFnRunner.processElementInReadyWindows(inputElement); - for (WindowedValue rejectedValue : rejectedValues) { - if (rejectedValue.getTimestamp().compareTo(pushbackWatermarkHold) < 0) { - pushbackWatermarkHold = rejectedValue.getTimestamp(); - } - pushbackValues.add(rejectedValue); - } - - bundleManager.tryFinishBundle(emitter); - } catch (Throwable t) { - LOG.error("Encountered error during process element", t); - bundleManager.signalFailure(t); - throw t; - } - } - - private void doProcessWatermark(Instant watermark, OpEmitter emitter) { - this.inputWatermark = watermark; - - if (sideInputWatermark.isEqual(BoundedWindow.TIMESTAMP_MAX_VALUE)) { - // this means we will never see any more side input - emitAllPushbackValues(); - } - - final Instant actualInputWatermark = - pushbackWatermarkHold.isBefore(inputWatermark) ? pushbackWatermarkHold : inputWatermark; - - timerInternalsFactory.setInputWatermark(actualInputWatermark); - - Collection> readyTimers = timerInternalsFactory.removeReadyTimers(); - if (!readyTimers.isEmpty()) { - pushbackFnRunner.startBundle(); - for (KeyedTimerData keyedTimerData : readyTimers) { - fireTimer(keyedTimerData); - } - pushbackFnRunner.finishBundle(); - } - - if (timerInternalsFactory.getOutputWatermark() == null - || timerInternalsFactory.getOutputWatermark().isBefore(actualInputWatermark)) { - timerInternalsFactory.setOutputWatermark(actualInputWatermark); - emitter.emitWatermark(timerInternalsFactory.getOutputWatermark()); - } - } - - @Override - public void processWatermark(Instant watermark, OpEmitter emitter) { - bundleManager.processWatermark(watermark, emitter); - } - - @Override - public void processSideInput( - String id, WindowedValue> elements, OpEmitter emitter) { - checkState( - bundleDisabled, "Side input not supported in bundling mode. Please disable bundling."); - @SuppressWarnings("unchecked") - final WindowedValue> retypedElements = (WindowedValue>) elements; - - final PCollectionView view = idToViewMap.get(id); - if (view == null) { - throw new IllegalArgumentException("No mapping of id " + id + " to view."); - } - - sideInputHandler.addSideInputValue(view, retypedElements); - - final List> previousPushbackValues = new ArrayList<>(pushbackValues); - pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE; - pushbackValues.clear(); - - for (final WindowedValue value : previousPushbackValues) { - processElement(value, emitter); - } - - // We may be able to advance the output watermark since we may have played some pushed back - // events. - processWatermark(this.inputWatermark, emitter); - } - - @Override - public void processSideInputWatermark(Instant watermark, OpEmitter emitter) { - checkState( - bundleDisabled, "Side input not supported in bundling mode. Please disable bundling."); - sideInputWatermark = watermark; - - if (sideInputWatermark.isEqual(BoundedWindow.TIMESTAMP_MAX_VALUE)) { - // this means we will never see any more side input - processWatermark(this.inputWatermark, emitter); - } - } - - @Override - @SuppressWarnings("unchecked") - public void processTimer(KeyedTimerData keyedTimerData, OpEmitter emitter) { - // this is internal timer in processing time to check whether a bundle should be closed - if (bundleCheckTimerId.equals(keyedTimerData.getTimerData().getTimerId())) { - bundleManager.processTimer(keyedTimerData, emitter); - return; - } - - pushbackFnRunner.startBundle(); - fireTimer(keyedTimerData); - pushbackFnRunner.finishBundle(); - - this.timerInternalsFactory.removeProcessingTimer((KeyedTimerData) keyedTimerData); - } - - @Override - public void close() { - doFnInvoker.invokeTeardown(); - try (AutoCloseable factory = stageBundleFactory; - AutoCloseable context = stageContext) { - // do nothing - } catch (Exception e) { - LOG.error("Failed to close stage bundle factory", e); - } - } - - private void fireTimer(KeyedTimerData keyedTimerData) { - final TimerInternals.TimerData timer = keyedTimerData.getTimerData(); - LOG.debug("Firing timer {}", timer); - - final StateNamespace namespace = timer.getNamespace(); - // NOTE: not sure why this is safe, but DoFnOperator makes this assumption - final BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow(); - - fnRunner.onTimer( - timer.getTimerId(), - timer.getTimerFamilyId(), - keyedTimerData.getKey(), - window, - timer.getTimestamp(), - timer.getOutputTimestamp(), - timer.getDomain(), - timer.causedByDrain()); - } - - // todo: should this go through bundle manager to start and finish the bundle? - private void emitAllPushbackValues() { - if (!pushbackValues.isEmpty()) { - pushbackFnRunner.startBundle(); - - final List> previousPushbackValues = new ArrayList<>(pushbackValues); - pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE; - pushbackValues.clear(); - - for (final WindowedValue value : previousPushbackValues) { - fnRunner.processElement(value); - } - - pushbackFnRunner.finishBundle(); - } - } - - private BundleManager.BundleProgressListener createBundleProgressListener() { - return new BundleManager.BundleProgressListener() { - @Override - public void onBundleStarted() { - pushbackFnRunner.startBundle(); - } - - @Override - public void onBundleFinished(OpEmitter emitter) { - pushbackFnRunner.finishBundle(); - } - - @Override - public void onWatermark(Instant watermark, OpEmitter emitter) { - doProcessWatermark(watermark, emitter); - } - }; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaAssignContext.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaAssignContext.java deleted file mode 100644 index 0f8933e66cbf..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaAssignContext.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.WindowFn; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.joda.time.Instant; - -@SuppressWarnings({"keyfor", "nullness"}) // TODO(https://github.com/apache/beam/issues/20497) -class SamzaAssignContext extends WindowFn.AssignContext { - private final WindowedValue value; - - public SamzaAssignContext(WindowFn fn, WindowedValue value) { - fn.super(); - this.value = value; - - if (value.getWindows().size() != 1) { - throw new IllegalArgumentException( - String.format( - "Only single windowed value allowed for assignment. Windows: %s", - value.getWindows())); - } - } - - @Override - public InT element() { - return value.getValue(); - } - - @Override - public Instant timestamp() { - return value.getTimestamp(); - } - - @Override - public BoundedWindow window() { - return Iterables.getOnlyElement(value.getWindows()); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnInvokerRegistrar.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnInvokerRegistrar.java deleted file mode 100644 index c0536161cb79..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnInvokerRegistrar.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Map; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.reflect.DoFnInvoker; -import org.apache.samza.context.Context; - -/** A registrar for Samza DoFnInvoker. */ -public interface SamzaDoFnInvokerRegistrar { - - /** Returns the invoker for a {@link DoFn}. */ - DoFnInvoker invokerSetupFor( - DoFn fn, PipelineOptions options, Context context); - - /** Returns the configs for a {@link DoFn}. */ - Map configFor(DoFn fn); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnRunners.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnRunners.java deleted file mode 100644 index a2ec88a43415..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaDoFnRunners.java +++ /dev/null @@ -1,506 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.stream.Collectors; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.core.DoFnRunners; -import org.apache.beam.runners.core.SideInputHandler; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.StateTags; -import org.apache.beam.runners.core.StatefulDoFnRunner; -import org.apache.beam.runners.core.StepContext; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.fnexecution.control.OutputReceiverFactory; -import org.apache.beam.runners.fnexecution.control.RemoteBundle; -import org.apache.beam.runners.fnexecution.control.StageBundleFactory; -import org.apache.beam.runners.fnexecution.control.TimerReceiverFactory; -import org.apache.beam.runners.fnexecution.state.StateRequestHandler; -import org.apache.beam.runners.samza.SamzaExecutionContext; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.metrics.DoFnRunnerWithMetrics; -import org.apache.beam.runners.samza.util.StateUtils; -import org.apache.beam.runners.samza.util.WindowUtils; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.fn.data.FnDataReceiver; -import org.apache.beam.sdk.state.BagState; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.util.WindowedValueMultiReceiver; -import org.apache.beam.sdk.util.construction.Timer; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.values.CausedByDrain; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.samza.context.Context; -import org.joda.time.Instant; - -/** A factory for Samza runner translator to create underlying DoFnRunner used in {@link DoFnOp}. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaDoFnRunners { - - /** Create DoFnRunner for java runner. */ - public static DoFnRunner create( - SamzaPipelineOptions pipelineOptions, - DoFn doFn, - WindowingStrategy windowingStrategy, - String transformFullName, - String transformId, - Context context, - TupleTag mainOutputTag, - SideInputHandler sideInputHandler, - SamzaTimerInternalsFactory timerInternalsFactory, - Coder keyCoder, - WindowedValueMultiReceiver outputManager, - Coder inputCoder, - List> sideOutputTags, - Map, Coder> outputCoders, - DoFnSchemaInformation doFnSchemaInformation, - Map> sideInputMapping, - Map stateIdToStoreIdMapping, - OpEmitter emitter, - FutureCollector futureCollector) { - final KeyedInternals keyedInternals; - final TimerInternals timerInternals; - final StateInternals stateInternals; - final SamzaStoreStateInternals.Factory stateInternalsFactory = - SamzaStoreStateInternals.createStateInternalsFactory( - transformId, - keyCoder, - context.getTaskContext(), - pipelineOptions, - stateIdToStoreIdMapping); - - final SamzaExecutionContext executionContext = - (SamzaExecutionContext) context.getApplicationContainerContext(); - if (StateUtils.isStateful(doFn)) { - keyedInternals = new KeyedInternals(stateInternalsFactory, timerInternalsFactory); - stateInternals = keyedInternals.stateInternals(); - timerInternals = keyedInternals.timerInternals(); - } else { - keyedInternals = null; - stateInternals = stateInternalsFactory.stateInternalsForKey(null); - timerInternals = timerInternalsFactory.timerInternalsForKey(null); - } - - final StepContext stepContext = createStepContext(stateInternals, timerInternals); - final DoFnRunner underlyingRunner = - DoFnRunners.simpleRunner( - pipelineOptions, - doFn, - sideInputHandler, - outputManager, - mainOutputTag, - sideOutputTags, - stepContext, - inputCoder, - outputCoders, - windowingStrategy, - doFnSchemaInformation, - sideInputMapping); - - final DoFnRunner doFnRunnerWithMetrics = - pipelineOptions.getEnableMetrics() - ? DoFnRunnerWithMetrics.wrap( - underlyingRunner, executionContext.getMetricsContainer(), transformFullName) - : underlyingRunner; - - final DoFnRunner doFnRunnerWithStates; - if (keyedInternals != null) { - final DoFnRunner statefulDoFnRunner = - DoFnRunners.defaultStatefulDoFnRunner( - doFn, - inputCoder, - doFnRunnerWithMetrics, - stepContext, - windowingStrategy, - new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, windowingStrategy), - createStateCleaner(doFn, windowingStrategy, keyedInternals.stateInternals())); - - doFnRunnerWithStates = new DoFnRunnerWithKeyedInternals<>(statefulDoFnRunner, keyedInternals); - } else { - doFnRunnerWithStates = doFnRunnerWithMetrics; - } - - return pipelineOptions.getNumThreadsForProcessElement() > 1 - ? AsyncDoFnRunner.create( - doFnRunnerWithStates, emitter, futureCollector, keyedInternals != null, pipelineOptions) - : doFnRunnerWithStates; - } - - /** Creates a {@link StepContext} that allows accessing state and timer internals. */ - private static StepContext createStepContext( - StateInternals stateInternals, TimerInternals timerInternals) { - return new StepContext() { - @Override - public StateInternals stateInternals() { - return stateInternals; - } - - @Override - public TimerInternals timerInternals() { - return timerInternals; - } - }; - } - - @SuppressWarnings("unchecked") - private static StatefulDoFnRunner.StateCleaner createStateCleaner( - DoFn doFn, - WindowingStrategy windowingStrategy, - StateInternals stateInternals) { - final TypeDescriptor windowType = windowingStrategy.getWindowFn().getWindowTypeDescriptor(); - if (windowType.isSubtypeOf(TypeDescriptor.of(BoundedWindow.class))) { - final Coder windowCoder = - windowingStrategy.getWindowFn().windowCoder(); - return new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stateInternals, windowCoder); - } else { - return null; - } - } - - /** Create DoFnRunner for portable runner. */ - @SuppressWarnings("unchecked") - public static DoFnRunner createPortable( - String transformId, - String stepName, - String bundleStateId, - Coder> windowedValueCoder, - ExecutableStage executableStage, - Map> sideInputMapping, - SideInputHandler sideInputHandler, - SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory, - SamzaTimerInternalsFactory timerInternalsFactory, - SamzaPipelineOptions pipelineOptions, - WindowedValueMultiReceiver outputManager, - StageBundleFactory stageBundleFactory, - SamzaExecutionContext samzaExecutionContext, - TupleTag mainOutputTag, - Map> idToTupleTagMap, - Context context, - String transformFullName) { - // storing events within a bundle in states - final BagState> bundledEventsBag = - nonKeyedStateInternalsFactory - .stateInternalsForKey(null) - .state(StateNamespaces.global(), StateTags.bag(bundleStateId, windowedValueCoder)); - - final StateRequestHandler stateRequestHandler = - SamzaStateRequestHandlers.of( - transformId, - context.getTaskContext(), - pipelineOptions, - executableStage, - stageBundleFactory, - (Map>) - sideInputMapping, - sideInputHandler); - - final SamzaExecutionContext executionContext = - (SamzaExecutionContext) context.getApplicationContainerContext(); - final DoFnRunner underlyingRunner = - new SdkHarnessDoFnRunner<>( - pipelineOptions, - stepName, - timerInternalsFactory, - WindowUtils.getWindowStrategy( - executableStage.getInputPCollection().getId(), executableStage.getComponents()), - outputManager, - stageBundleFactory, - idToTupleTagMap, - bundledEventsBag, - stateRequestHandler, - samzaExecutionContext, - executableStage.getTransforms()); - return pipelineOptions.getEnableMetrics() - ? DoFnRunnerWithMetrics.wrap( - underlyingRunner, executionContext.getMetricsContainer(), transformFullName) - : underlyingRunner; - } - - static class SdkHarnessDoFnRunner implements DoFnRunner { - - private static final int DEFAULT_METRIC_SAMPLE_RATE = 100; - - private final SamzaPipelineOptions pipelineOptions; - private final SamzaTimerInternalsFactory timerInternalsFactory; - private final WindowingStrategy windowingStrategy; - private final WindowedValueMultiReceiver outputManager; - private final StageBundleFactory stageBundleFactory; - private final Map> idToTupleTagMap; - private final LinkedBlockingQueue> outputQueue = new LinkedBlockingQueue<>(); - private final BagState> bundledEventsBag; - private RemoteBundle remoteBundle; - private FnDataReceiver> inputReceiver; - private final StateRequestHandler stateRequestHandler; - private final SamzaExecutionContext samzaExecutionContext; - private long startBundleTime; - private final String stepName; - private final Collection pTransformNodes; - - private SdkHarnessDoFnRunner( - SamzaPipelineOptions pipelineOptions, - String stepName, - SamzaTimerInternalsFactory timerInternalsFactory, - WindowingStrategy windowingStrategy, - WindowedValueMultiReceiver outputManager, - StageBundleFactory stageBundleFactory, - Map> idToTupleTagMap, - BagState> bundledEventsBag, - StateRequestHandler stateRequestHandler, - SamzaExecutionContext samzaExecutionContext, - Collection pTransformNodes) { - this.pipelineOptions = pipelineOptions; - this.timerInternalsFactory = timerInternalsFactory; - this.windowingStrategy = windowingStrategy; - this.outputManager = outputManager; - this.stageBundleFactory = stageBundleFactory; - this.idToTupleTagMap = idToTupleTagMap; - this.bundledEventsBag = bundledEventsBag; - this.stateRequestHandler = stateRequestHandler; - this.samzaExecutionContext = samzaExecutionContext; - this.stepName = stepName; - this.pTransformNodes = pTransformNodes; - } - - @SuppressWarnings("unchecked") - private void timerDataConsumer(Timer timerElement, TimerInternals.TimerData timerData) { - TimerInternals timerInternals = - timerInternalsFactory.timerInternalsForKey(timerElement.getUserKey()); - if (timerElement.getClearBit()) { - timerInternals.deleteTimer(timerData); - } else { - timerInternals.setTimer(timerData); - } - } - - @Override - public void startBundle() { - try { - OutputReceiverFactory receiverFactory = - new OutputReceiverFactory() { - @Override - public FnDataReceiver create(String pCollectionId) { - return (receivedElement) -> { - // handover to queue, do not block the grpc thread - outputQueue.put(KV.of(pCollectionId, receivedElement)); - }; - } - }; - - final Coder windowCoder = windowingStrategy.getWindowFn().windowCoder(); - final TimerReceiverFactory timerReceiverFactory = - new TimerReceiverFactory(stageBundleFactory, this::timerDataConsumer, windowCoder); - - Map transformFullNameToUniqueName = - pTransformNodes.stream() - .collect( - Collectors.toMap( - pTransformNode -> pTransformNode.getId(), - pTransformNode -> pTransformNode.getTransform().getUniqueName())); - - SamzaMetricsBundleProgressHandler samzaMetricsBundleProgressHandler = - new SamzaMetricsBundleProgressHandler( - stepName, - samzaExecutionContext.getMetricsContainer(), - transformFullNameToUniqueName); - - remoteBundle = - stageBundleFactory.getBundle( - receiverFactory, - timerReceiverFactory, - stateRequestHandler, - samzaMetricsBundleProgressHandler); - - startBundleTime = getStartBundleTime(); - - inputReceiver = Iterables.getOnlyElement(remoteBundle.getInputReceivers().values()); - bundledEventsBag - .read() - .forEach( - elem -> { - try { - inputReceiver.accept(elem); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @SuppressWarnings({ - "RandomModInteger" // https://errorprone.info/bugpattern/RandomModInteger - }) - private long getStartBundleTime() { - /* - * Use random number for sampling purpose instead of counting as - * SdkHarnessDoFnRunner is stateless and counters won't persist - * between invocations of DoFn(s). - */ - return ThreadLocalRandom.current().nextInt() % DEFAULT_METRIC_SAMPLE_RATE == 0 - ? System.nanoTime() - : 0; - } - - @Override - public void processElement(WindowedValue elem) { - try { - bundledEventsBag.add(elem); - inputReceiver.accept(elem); - emitResults(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - private void emitResults() { - KV result; - while ((result = outputQueue.poll()) != null) { - outputManager.output( - idToTupleTagMap.get(result.getKey()), (WindowedValue) result.getValue()); - } - } - - private void emitMetrics() { - if (startBundleTime <= 0) { - return; - } - - final long count = Iterables.size(bundledEventsBag.read()); - - if (count <= 0) { - return; - } - - final long finishBundleTime = System.nanoTime(); - final long averageProcessTime = (finishBundleTime - startBundleTime) / count; - - String metricName = "ExecutableStage-" + stepName + "-process-ns"; - samzaExecutionContext - .getMetricsContainer() - .updateExecutableStageBundleMetric(metricName, averageProcessTime); - } - - @Override - public void onTimer( - String timerId, - String timerFamilyId, - KeyT key, - BoundedWindow window, - Instant timestamp, - Instant outputTimestamp, - TimeDomain timeDomain, - CausedByDrain causedByDrain) { - final KV timerReceiverKey = - TimerReceiverFactory.decodeTimerDataTimerId(timerFamilyId); - final FnDataReceiver timerReceiver = - remoteBundle.getTimerReceivers().get(timerReceiverKey); - final Timer timerValue = - Timer.of( - key, - timerId, - Collections.singletonList(window), - timestamp, - outputTimestamp, - // TODO: Support propagating the PaneInfo through. - PaneInfo.NO_FIRING, - causedByDrain); - try { - timerReceiver.accept(timerValue); - } catch (Exception e) { - throw new RuntimeException( - String.format(Locale.ENGLISH, "Failed to process timer %s", timerReceiver), e); - } - } - - @Override - public void finishBundle() { - try { - runWithTimeout( - pipelineOptions.getBundleProcessingTimeout(), - () -> { - // RemoteBundle close blocks until all results are received - try { - remoteBundle.close(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - emitResults(); - emitMetrics(); - bundledEventsBag.clear(); - } catch (Exception e) { - throw new RuntimeException("Failed to finish remote bundle", e); - } finally { - remoteBundle = null; - inputReceiver = null; - } - } - - /** - * Run a function and wait for at most the given time (in milliseconds). - * - * @param timeoutInMs the time to wait for completing the function call. If the value of timeout - * is negative, wait forever until the function call is completed - * @param runnable the main function - */ - static void runWithTimeout(long timeoutInMs, Runnable runnable) - throws ExecutionException, InterruptedException, TimeoutException { - if (timeoutInMs < 0) { - runnable.run(); - } else { - CompletableFuture.runAsync(runnable).get(timeoutInMs, TimeUnit.MILLISECONDS); - } - } - - @Override - public void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) {} - - @Override - public DoFn getFn() { - throw new UnsupportedOperationException(); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaExecutableStageContextFactory.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaExecutableStageContextFactory.java deleted file mode 100644 index f034d031f2c2..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaExecutableStageContextFactory.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import org.apache.beam.runners.fnexecution.control.DefaultExecutableStageContext; -import org.apache.beam.runners.fnexecution.control.ExecutableStageContext; -import org.apache.beam.runners.fnexecution.control.ReferenceCountingExecutableStageContextFactory; -import org.apache.beam.runners.fnexecution.provisioning.JobInfo; - -/** - * Singleton class that contains one {@link ExecutableStageContext.Factory} per job. Assumes it is - * safe to release the backing environment asynchronously. - */ -public class SamzaExecutableStageContextFactory implements ExecutableStageContext.Factory { - - private static final SamzaExecutableStageContextFactory instance = - new SamzaExecutableStageContextFactory(); - // This map should only ever have a single element, as each job will have its own - // classloader and therefore its own instance of SamzaExecutableStageContextFactory. This - // code supports multiple JobInfos in order to provide a sensible implementation of - // Factory.get(JobInfo), which in theory could be called with different JobInfos. - private static final ConcurrentMap jobFactories = - new ConcurrentHashMap<>(); - - private SamzaExecutableStageContextFactory() {} - - public static SamzaExecutableStageContextFactory getInstance() { - return instance; - } - - @Override - public ExecutableStageContext get(JobInfo jobInfo) { - ExecutableStageContext.Factory jobFactory = - jobFactories.computeIfAbsent( - jobInfo.jobId(), - k -> - ReferenceCountingExecutableStageContextFactory.create( - DefaultExecutableStageContext::create, - // Always release environment asynchronously. - (caller) -> false)); - - return jobFactory.get(jobInfo); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaMetricsBundleProgressHandler.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaMetricsBundleProgressHandler.java deleted file mode 100644 index 010ae53455f8..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaMetricsBundleProgressHandler.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.apache.beam.runners.core.metrics.MonitoringInfoConstants.TypeUrns.DISTRIBUTION_INT64_TYPE; -import static org.apache.beam.runners.core.metrics.MonitoringInfoConstants.TypeUrns.LATEST_INT64_TYPE; -import static org.apache.beam.runners.core.metrics.MonitoringInfoConstants.TypeUrns.SUM_INT64_TYPE; -import static org.apache.beam.runners.core.metrics.MonitoringInfoEncodings.decodeInt64Counter; -import static org.apache.beam.runners.core.metrics.MonitoringInfoEncodings.decodeInt64Distribution; -import static org.apache.beam.runners.core.metrics.MonitoringInfoEncodings.decodeInt64Gauge; - -import java.util.Map; -import org.apache.beam.model.fnexecution.v1.BeamFnApi; -import org.apache.beam.model.pipeline.v1.MetricsApi; -import org.apache.beam.runners.core.metrics.DistributionData; -import org.apache.beam.runners.core.metrics.MonitoringInfoConstants; -import org.apache.beam.runners.fnexecution.control.BundleProgressHandler; -import org.apache.beam.runners.samza.metrics.SamzaMetricsContainer; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Distribution; -import org.apache.beam.sdk.metrics.Gauge; -import org.apache.beam.sdk.metrics.MetricName; -import org.apache.beam.sdk.metrics.MetricsContainer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@inheritDoc} Parses metrics information contained in the bundle progress messages. Passed the - * updated metrics to the provided SamzaMetricsContainer. - */ -class SamzaMetricsBundleProgressHandler implements BundleProgressHandler { - - private static final Logger LOG = - LoggerFactory.getLogger(SamzaMetricsBundleProgressHandler.class); - private final String stepName; - - private final SamzaMetricsContainer samzaMetricsContainer; - private final Map transformIdToUniqueName; - - /** - * Constructor of a SamzaMetricsBundleProgressHandler. - * - *

The full metric names in classic mode is {transformUniqueName}:{className}:{metricName}. We - * attempt to follow the same format in portable mode, but the monitoringInfos returned by the - * worker only contains the transformId. The current solution is to provide a mapping from - * transformId back to uniqueName. A future improvement would be making the monitoring infos - * contain the uniqueName. - * - * @param stepName Default stepName provided by the runner. - * @param samzaMetricsContainer The destination for publishing the metrics. - * @param transformIdToUniqueName A mapping from transformId to uniqueName for pTransforms. - */ - public SamzaMetricsBundleProgressHandler( - String stepName, - SamzaMetricsContainer samzaMetricsContainer, - Map transformIdToUniqueName) { - this.stepName = stepName; - this.samzaMetricsContainer = samzaMetricsContainer; - this.transformIdToUniqueName = transformIdToUniqueName; - } - - @Override - /** - * {@inheritDoc} Handles a progress report from the bundle while it is executing. We choose to - * ignore the progress report. The metrics do not have to be updated on every progress report, so - * we save computation resources by ignoring it. - */ - public void onProgress(BeamFnApi.ProcessBundleProgressResponse progress) {} - - @Override - /** - * {@inheritDoc} Handles the bundle's completion report. Parses the monitoringInfos in the - * response, then updates the MetricsRegistry. - */ - public void onCompleted(BeamFnApi.ProcessBundleResponse response) { - response.getMonitoringInfosList().stream() - .filter(monitoringInfo -> !monitoringInfo.getPayload().isEmpty()) - .map(this::parseAndUpdateMetric) - .distinct() - .forEach(samzaMetricsContainer::updateMetrics); - } - - /** - * Parses the metric contained in monitoringInfo, then publishes the metric to the - * metricContainer. - * - *

We attempt to construct a classic mode metricName - * ({transformUniqueName}:{className}:{metricName}). All the info should be in the labels, but we - * have fallbacks in case the labels don't exist. - * - *

Priorities for the transformUniqueName 1. Obtained transformUniqueName using the - * transformIdToUniqueName 2. The transformId provided by the monitoringInfo 3. The stepName - * provided by the runner, which maybe a result of fusing. - * - *

Priorities for the className 1. The namespace label 2. The monitoringInfo urn. Copying the - * implementation in MonitoringInfoMetricName. - * - *

Priorities for the metricName 1. The name label 2. The monitoringInfo urn. Copying the - * implementation in MonitoringInfoMetricName. - * - * @see - * org.apache.beam.runners.core.metrics.MonitoringInfoMetricName#of(MetricsApi.MonitoringInfo) - * @return the final transformUniqueName for the metric - */ - private String parseAndUpdateMetric(MetricsApi.MonitoringInfo monitoringInfo) { - String pTransformId = - monitoringInfo.getLabelsOrDefault(MonitoringInfoConstants.Labels.PTRANSFORM, stepName); - String transformUniqueName = transformIdToUniqueName.getOrDefault(pTransformId, pTransformId); - String className = - monitoringInfo.getLabelsOrDefault( - MonitoringInfoConstants.Labels.NAMESPACE, monitoringInfo.getUrn()); - String userMetricName = - monitoringInfo.getLabelsOrDefault( - MonitoringInfoConstants.Labels.NAME, monitoringInfo.getLabelsMap().toString()); - - MetricsContainer metricsContainer = samzaMetricsContainer.getContainer(transformUniqueName); - MetricName metricName = MetricName.named(className, userMetricName); - - switch (monitoringInfo.getType()) { - case SUM_INT64_TYPE: - Counter counter = metricsContainer.getCounter(metricName); - counter.inc(decodeInt64Counter(monitoringInfo.getPayload())); - break; - - case DISTRIBUTION_INT64_TYPE: - Distribution distribution = metricsContainer.getDistribution(metricName); - DistributionData data = decodeInt64Distribution(monitoringInfo.getPayload()); - distribution.update(data.sum(), data.count(), data.min(), data.max()); - break; - - case LATEST_INT64_TYPE: - Gauge gauge = metricsContainer.getGauge(metricName); - // Gauge doesn't expose update as public. This will reset the timestamp. - - gauge.set(decodeInt64Gauge(monitoringInfo.getPayload()).value()); - break; - - default: - LOG.debug("Unsupported metric type {}", monitoringInfo.getType()); - } - return transformUniqueName; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStateRequestHandlers.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStateRequestHandlers.java deleted file mode 100644 index 0b6a0b6f4f14..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStateRequestHandlers.java +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.io.IOException; -import java.util.EnumMap; -import java.util.Iterator; -import java.util.Map; -import org.apache.beam.model.fnexecution.v1.BeamFnApi; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.core.SideInputHandler; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.StateTags; -import org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors; -import org.apache.beam.runners.fnexecution.control.StageBundleFactory; -import org.apache.beam.runners.fnexecution.state.StateRequestHandler; -import org.apache.beam.runners.fnexecution.state.StateRequestHandlers; -import org.apache.beam.runners.fnexecution.translation.StreamingSideInputHandlerFactory; -import org.apache.beam.runners.fnexecution.wire.ByteStringCoder; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.util.StateUtils; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.state.BagState; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; -import org.apache.samza.context.TaskContext; - -/** - * This class creates {@link StateRequestHandler} for side inputs and states of the Samza portable - * runner. - */ -public class SamzaStateRequestHandlers { - - public static StateRequestHandler of( - String transformId, - TaskContext context, - SamzaPipelineOptions pipelineOptions, - ExecutableStage executableStage, - StageBundleFactory stageBundleFactory, - Map> sideInputIds, - SideInputHandler sideInputHandler) { - final StateRequestHandler sideInputStateHandler = - createSideInputStateHandler(executableStage, sideInputIds, sideInputHandler); - final StateRequestHandler userStateRequestHandler = - createUserStateRequestHandler( - transformId, executableStage, context, pipelineOptions, stageBundleFactory); - final EnumMap handlerMap = - new EnumMap<>(BeamFnApi.StateKey.TypeCase.class); - handlerMap.put(BeamFnApi.StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputStateHandler); - handlerMap.put(BeamFnApi.StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputStateHandler); - handlerMap.put(BeamFnApi.StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputStateHandler); - handlerMap.put(BeamFnApi.StateKey.TypeCase.BAG_USER_STATE, userStateRequestHandler); - return StateRequestHandlers.delegateBasedUponType(handlerMap); - } - - private static StateRequestHandler createSideInputStateHandler( - ExecutableStage executableStage, - Map> sideInputIds, - SideInputHandler sideInputHandler) { - - if (executableStage.getSideInputs().size() <= 0) { - return StateRequestHandler.unsupported(); - } - - final StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = - Preconditions.checkNotNull( - StreamingSideInputHandlerFactory.forStage( - executableStage, sideInputIds, sideInputHandler)); - try { - return StateRequestHandlers.forSideInputHandlerFactory( - ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory); - } catch (IOException e) { - throw new RuntimeException("Failed to initialize SideInputHandler", e); - } - } - - private static StateRequestHandler createUserStateRequestHandler( - String transformId, - ExecutableStage executableStage, - TaskContext context, - SamzaPipelineOptions pipelineOptions, - StageBundleFactory stageBundleFactory) { - - if (!StateUtils.isStateful(executableStage)) { - return StateRequestHandler.unsupported(); - } - - final SamzaStoreStateInternals.Factory stateInternalsFactory = - SamzaStoreStateInternals.createStateInternalsFactory( - transformId, ByteStringCoder.of(), context, pipelineOptions, executableStage); - - return StateRequestHandlers.forBagUserStateHandlerFactory( - stageBundleFactory.getProcessBundleDescriptor(), - new BagUserStateFactory<>(stateInternalsFactory)); - } - - /** - * Factory to create {@link StateRequestHandlers.BagUserStateHandler} to provide bag state access - * for the given {@link K key} and {@link W window} provided by SDK worker, unlike classic - * pipeline where {@link K key} is set at {@link DoFnRunnerWithKeyedInternals#processElement} and - * {@link W window} is set at {@link - * org.apache.beam.runners.core.SimpleDoFnRunner.DoFnProcessContext#window()}}. - */ - static class BagUserStateFactory< - K extends ByteString, V extends ByteString, W extends BoundedWindow> - implements StateRequestHandlers.BagUserStateHandlerFactory { - - private final SamzaStoreStateInternals.Factory stateInternalsFactory; - - BagUserStateFactory(SamzaStoreStateInternals.Factory stateInternalsFactory) { - this.stateInternalsFactory = stateInternalsFactory; - } - - @Override - public StateRequestHandlers.BagUserStateHandler forUserState( - String pTransformId, - String userStateId, - Coder keyCoder, - Coder valueCoder, - Coder windowCoder) { - return new StateRequestHandlers.BagUserStateHandler() { - - /** {@inheritDoc} */ - @Override - public Iterable get(K key, W window) { - StateNamespace namespace = StateNamespaces.window(windowCoder, window); - BagState bagState = - stateInternalsFactory - .stateInternalsForKey(key) - .state(namespace, StateTags.bag(userStateId, valueCoder)); - return bagState.read(); - } - - /** {@inheritDoc} */ - @Override - public void append(K key, W window, Iterator values) { - StateNamespace namespace = StateNamespaces.window(windowCoder, window); - BagState bagState = - stateInternalsFactory - .stateInternalsForKey(key) - .state(namespace, StateTags.bag(userStateId, valueCoder)); - while (values.hasNext()) { - bagState.add(values.next()); - } - } - - /** {@inheritDoc} */ - @Override - public void clear(K key, W window) { - StateNamespace namespace = StateNamespaces.window(windowCoder, window); - BagState bagState = - stateInternalsFactory - .stateInternalsForKey(key) - .state(namespace, StateTags.bag(userStateId, valueCoder)); - bagState.clear(); - } - }; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternals.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternals.java deleted file mode 100644 index f8530936789f..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternals.java +++ /dev/null @@ -1,1131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.Serializable; -import java.lang.ref.SoftReference; -import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.function.Function; -import java.util.stream.Collectors; -import javax.annotation.Nonnull; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateInternalsFactory; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateTag; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.state.SamzaMapState; -import org.apache.beam.runners.samza.state.SamzaSetState; -import org.apache.beam.runners.samza.transforms.UpdatingCombineFn; -import org.apache.beam.sdk.coders.BooleanCoder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.InstantCoder; -import org.apache.beam.sdk.coders.VoidCoder; -import org.apache.beam.sdk.state.BagState; -import org.apache.beam.sdk.state.CombiningState; -import org.apache.beam.sdk.state.MapState; -import org.apache.beam.sdk.state.MultimapState; -import org.apache.beam.sdk.state.OrderedListState; -import org.apache.beam.sdk.state.ReadableState; -import org.apache.beam.sdk.state.ReadableStates; -import org.apache.beam.sdk.state.SetState; -import org.apache.beam.sdk.state.State; -import org.apache.beam.sdk.state.StateContext; -import org.apache.beam.sdk.state.StateContexts; -import org.apache.beam.sdk.state.ValueState; -import org.apache.beam.sdk.state.WatermarkHoldState; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.CombineWithContext; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.transforms.windowing.TimestampCombiner; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.util.construction.graph.UserStateReference; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.primitives.Ints; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.primitives.UnsignedBytes; -import org.apache.samza.config.Config; -import org.apache.samza.context.TaskContext; -import org.apache.samza.serializers.Serde; -import org.apache.samza.serializers.SerdeFactory; -import org.apache.samza.storage.kv.Entry; -import org.apache.samza.storage.kv.KeyValueIterator; -import org.apache.samza.storage.kv.KeyValueStore; -import org.checkerframework.checker.initialization.qual.Initialized; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.checkerframework.checker.nullness.qual.UnknownKeyFor; -import org.joda.time.Instant; - -/** {@link StateInternals} that uses Samza local {@link KeyValueStore} to manage state. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "keyfor", - "nullness" -}) // TODO(https://github.com/apache/beam/issues/20497) -public class SamzaStoreStateInternals implements StateInternals { - static final String BEAM_STORE = "beamStore"; - - private static final ThreadLocal> threadLocalBaos = - new ThreadLocal<>(); - - // the stores include both beamStore for system states as well as stores for user state - private final Map>> stores; - private final K key; - private final byte[] keyBytes; - private final int batchGetSize; - private final String stageId; - - private SamzaStoreStateInternals( - Map>> stores, - @Nullable K key, - byte @Nullable [] keyBytes, - String stageId, - int batchGetSize) { - this.stores = stores; - this.key = key; - this.keyBytes = keyBytes; - this.batchGetSize = batchGetSize; - this.stageId = stageId; - } - - @SuppressWarnings("unchecked") - static KeyValueStore> getBeamStore(TaskContext context) { - return (KeyValueStore>) - context.getStore(SamzaStoreStateInternals.BEAM_STORE); - } - - /** - * Creates non keyed state internal factory to persist states in {@link - * SamzaStoreStateInternals#BEAM_STORE}. - */ - static Factory createNonKeyedStateInternalsFactory( - String id, TaskContext context, SamzaPipelineOptions pipelineOptions) { - return createStateInternalsFactory(id, null, context, pipelineOptions, Collections.emptyMap()); - } - - static Factory createStateInternalsFactory( - String id, - Coder keyCoder, - TaskContext context, - SamzaPipelineOptions pipelineOptions, - ExecutableStage executableStage) { - - Map stateIdToStoreMap = - executableStage.getUserStates().stream() - .collect( - Collectors.toMap(UserStateReference::localName, UserStateReference::localName)); - - return createStateInternalsFactory(id, keyCoder, context, pipelineOptions, stateIdToStoreMap); - } - - @SuppressWarnings("unchecked") - static Factory createStateInternalsFactory( - String id, - @Nullable Coder keyCoder, - TaskContext context, - SamzaPipelineOptions pipelineOptions, - Map stateIdToStoreMap) { - final int batchGetSize = pipelineOptions.getStoreBatchGetSize(); - final Map>> stores = new HashMap<>(); - stores.put(BEAM_STORE, getBeamStore(context)); - - final Coder stateKeyCoder; - if (keyCoder != null) { - stateIdToStoreMap - .keySet() - .forEach( - stateId -> - stores.put( - stateId, - (KeyValueStore>) - context.getStore(stateIdToStoreMap.get(stateId)))); - stateKeyCoder = keyCoder; - } else { - stateKeyCoder = (Coder) VoidCoder.of(); - } - return new Factory<>(Objects.toString(id), stores, stateKeyCoder, batchGetSize); - } - - @Override - public K getKey() { - return key; - } - - @Override - public T state(StateNamespace stateNamespace, StateTag stateTag) { - return state(stateNamespace, stateTag, StateContexts.nullContext()); - } - - @Override - public V state( - StateNamespace namespace, StateTag address, StateContext stateContext) { - return address.bind( - new StateTag.StateBinder() { - @Override - public ValueState bindValue(StateTag> spec, Coder coder) { - return new SamzaValueState<>(namespace, address, coder); - } - - @Override - public BagState bindBag(StateTag> spec, Coder elemCoder) { - return new SamzaBagState<>(namespace, address, elemCoder); - } - - @Override - public SetState bindSet(StateTag> spec, Coder elemCoder) { - return new SamzaSetStateImpl<>(namespace, address, elemCoder); - } - - @Override - public MapState bindMap( - StateTag> spec, - Coder mapKeyCoder, - Coder mapValueCoder) { - return new SamzaMapStateImpl<>(namespace, address, mapKeyCoder, mapValueCoder); - } - - @Override - public MultimapState bindMultimap( - StateTag> spec, - Coder keyCoder, - Coder valueCoder) { - throw new UnsupportedOperationException( - String.format("%s is not supported", MultimapState.class.getSimpleName())); - } - - @Override - public OrderedListState bindOrderedList( - StateTag> spec, Coder elemCoder) { - throw new UnsupportedOperationException( - String.format("%s is not supported", OrderedListState.class.getSimpleName())); - } - - @Override - public - CombiningState bindCombiningValue( - StateTag> spec, - Coder accumCoder, - Combine.CombineFn combineFn) { - return new SamzaAccumulatorCombiningState<>(namespace, address, accumCoder, combineFn); - } - - @Override - public - CombiningState bindCombiningValueWithContext( - StateTag> spec, - Coder accumCoder, - CombineWithContext.CombineFnWithContext combineFn) { - throw new UnsupportedOperationException( - String.format("%s is not supported", CombiningState.class.getSimpleName())); - } - - @Override - public WatermarkHoldState bindWatermark( - StateTag spec, TimestampCombiner timestampCombiner) { - return new SamzaWatermarkHoldState(namespace, address, timestampCombiner); - } - }); - } - - /** Reuse the ByteArrayOutputStream buffer. */ - private static ByteArrayOutputStream getThreadLocalBaos() { - final SoftReference refBaos = threadLocalBaos.get(); - ByteArrayOutputStream baos = refBaos == null ? null : refBaos.get(); - if (baos == null) { - baos = new ByteArrayOutputStream(); - threadLocalBaos.set(new SoftReference<>(baos)); - } - - baos.reset(); - return baos; - } - - /** Factory class to create {@link SamzaStoreStateInternals}. */ - public static class Factory implements StateInternalsFactory { - private final String stageId; - private final Map>> stores; - private final Coder keyCoder; - private final int batchGetSize; - - public Factory( - String stageId, - Map>> stores, - Coder keyCoder, - int batchGetSize) { - this.stageId = stageId; - this.stores = stores; - this.keyCoder = keyCoder; - this.batchGetSize = batchGetSize; - } - - @Override - public StateInternals stateInternalsForKey(@Nullable K key) { - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - final DataOutputStream dos = new DataOutputStream(baos); - - try { - if (key != null) { - keyCoder.encode(key, baos); - } - final byte[] keyBytes = baos.toByteArray(); - baos.reset(); - - dos.write(keyBytes.length); - dos.write(keyBytes); - } catch (IOException e) { - throw new RuntimeException("Cannot encode key for state store", e); - } - - return new SamzaStoreStateInternals<>(stores, key, baos.toByteArray(), stageId, batchGetSize); - } - } - - private abstract class AbstractSamzaState { - private final StateNamespace namespace; - private final String addressId; - private final boolean isBeamStore; - private final String stageId; - private final byte[] keyBytes; - private byte[] encodedStoreKey; - protected final Coder coder; - protected final KeyValueStore> store; - - @SuppressWarnings({"unchecked", "rawtypes"}) - protected AbstractSamzaState( - StateNamespace namespace, StateTag address, Coder coder) { - this.coder = coder; - this.namespace = namespace; - this.addressId = address.getId(); - this.isBeamStore = !stores.containsKey(address.getId()); - this.store = - isBeamStore - ? (KeyValueStore) stores.get(BEAM_STORE) - : (KeyValueStore) stores.get(address.getId()); - this.stageId = SamzaStoreStateInternals.this.stageId; - this.keyBytes = SamzaStoreStateInternals.this.keyBytes; - } - - protected void clearInternal() { - store.delete(getEncodedStoreKey()); - } - - protected void writeInternal(T value) { - store.put(getEncodedStoreKey(), StateValue.of(value, coder)); - } - - protected T readInternal() { - final StateValue stateValue = store.get(getEncodedStoreKey()); - return decodeValue(stateValue); - } - - protected ReadableState isEmptyInternal() { - return new ReadableState() { - @Override - public Boolean read() { - return store.get(getEncodedStoreKey()) == null; - } - - @Override - public ReadableState readLater() { - return this; - } - }; - } - - protected ByteArray getEncodedStoreKey() { - return ByteArray.of(getEncodedStoreKeyBytes()); - } - - protected byte[] getEncodedStoreKeyBytes() { - if (encodedStoreKey == null) { - final ByteArrayOutputStream baos = getThreadLocalBaos(); - try (DataOutputStream dos = new DataOutputStream(baos)) { - dos.write(keyBytes); - dos.writeUTF(namespace.stringKey()); - - if (isBeamStore) { - // for system state, we need to differentiate based on the following: - dos.writeUTF(stageId); - dos.writeUTF(addressId); - } - } catch (IOException e) { - throw new RuntimeException("Could not encode full address for state: " + addressId, e); - } - this.encodedStoreKey = baos.toByteArray(); - } - return encodedStoreKey; - } - - protected T decodeValue(StateValue stateValue) { - return stateValue == null ? null : stateValue.getValue(coder); - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (!(o instanceof SamzaStoreStateInternals.AbstractSamzaState)) { - return false; - } - - @SuppressWarnings("unchecked") - final AbstractSamzaState that = (AbstractSamzaState) o; - if (isBeamStore || that.isBeamStore) { - if (!isBeamStore || !that.isBeamStore || !stageId.equals(that.stageId)) { - return false; - } - } - return Arrays.equals(keyBytes, that.keyBytes) - && addressId.equals(that.addressId) - && this.namespace.equals(that.namespace); - } - - @Override - public int hashCode() { - int result = namespace.hashCode(); - result = 31 * result + Arrays.hashCode(getEncodedStoreKeyBytes()); - return result; - } - } - - private class SamzaValueState extends AbstractSamzaState implements ValueState { - private SamzaValueState( - StateNamespace namespace, StateTag address, Coder coder) { - super(namespace, address, coder); - } - - @Override - public void write(T input) { - writeInternal(input); - } - - @Override - public T read() { - return readInternal(); - } - - @Override - public ValueState readLater() { - return this; - } - - @Override - public void clear() { - clearInternal(); - } - } - - private class SamzaBagState extends AbstractSamzaState implements BagState { - - private SamzaBagState( - StateNamespace namespace, StateTag address, Coder coder) { - super(namespace, address, coder); - } - - @Override - public void add(T value) { - synchronized (store) { - final int size = getSize(); - final ByteArray encodedKey = encodeKey(size); - store.put(encodedKey, StateValue.of(value, coder)); - store.put(getEncodedStoreKey(), StateValue.of(Ints.toByteArray(size + 1))); - } - } - - @Override - public ReadableState isEmpty() { - synchronized (store) { - return isEmptyInternal(); - } - } - - @Override - @Nonnull - public List read() { - synchronized (store) { - final int size = getSize(); - if (size == 0) { - return Collections.emptyList(); - } - - final List values = new ArrayList<>(size); - final List keys = new ArrayList<>(size); - int start = 0; - while (start < size) { - final int end = Math.min(size, start + batchGetSize); - for (int i = start; i < end; i++) { - keys.add(encodeKey(i)); - } - store.getAll(keys).values().forEach(value -> values.add(decodeValue(value))); - - start += batchGetSize; - keys.clear(); - } - return values; - } - } - - @Override - public BagState readLater() { - return this; - } - - @Override - public void clear() { - synchronized (store) { - final int size = getSize(); - if (size != 0) { - final List keys = new ArrayList<>(size); - for (int i = 0; i < size; i++) { - keys.add(encodeKey(i)); - } - store.deleteAll(keys); - store.delete(getEncodedStoreKey()); - } - } - } - - private int getSize() { - final StateValue stateSize = store.get(getEncodedStoreKey()); - return (stateSize == null || stateSize.valueBytes == null) - ? 0 - : Ints.fromByteArray(stateSize.valueBytes); - } - - private ByteArray encodeKey(int size) { - final ByteArrayOutputStream baos = getThreadLocalBaos(); - try (DataOutputStream dos = new DataOutputStream(baos)) { - dos.write(getEncodedStoreKeyBytes()); - dos.writeInt(size); - return ByteArray.of(baos.toByteArray()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - } - - private class SamzaSetStateImpl implements SamzaSetState { - private final SamzaMapStateImpl mapState; - - private SamzaSetStateImpl( - StateNamespace namespace, StateTag address, Coder coder) { - mapState = new SamzaMapStateImpl<>(namespace, address, coder, BooleanCoder.of()); - } - - @Override - public ReadableState contains(T t) { - return mapState.get(t); - } - - @Override - public @Nullable ReadableState addIfAbsent(T t) { - return mapState.putIfAbsent(t, true); - } - - @Override - public void remove(T t) { - mapState.remove(t); - } - - @Override - public void add(T value) { - mapState.put(value, true); - } - - @Override - public ReadableState isEmpty() { - return new ReadableState() { - - @Override - public Boolean read() { - return Iterables.isEmpty(mapState.entries().read()); - } - - @Override - public ReadableState readLater() { - return this; - } - }; - } - - @Override - public Iterable read() { - return mapState.keys().read(); - } - - @Override - public SetState readLater() { - return this; - } - - @Override - public void clear() { - mapState.clear(); - } - - @Override - public ReadableState> readIterator() { - final Iterator> iter = mapState.readIterator().read(); - return new ReadableState>() { - @Nullable - @Override - public Iterator read() { - return new Iterator() { - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public T next() { - return iter.next().getKey(); - } - }; - } - - @Override - public ReadableState> readLater() { - return this; - } - }; - } - - @Override - public void closeIterators() { - mapState.closeIterators(); - } - } - - private class SamzaMapStateImpl extends AbstractSamzaState - implements SamzaMapState { - - private final Coder keyCoder; - private final int storeKeySize; - private final List>> openIterators = - Collections.synchronizedList(new ArrayList<>()); - - private int maxKeySize; - - protected SamzaMapStateImpl( - StateNamespace namespace, - StateTag address, - Coder keyCoder, - Coder valueCoder) { - super(namespace, address, valueCoder); - - this.keyCoder = keyCoder; - this.storeKeySize = getEncodedStoreKeyBytes().length; - // initial max key size is around 100k, so we can restore timer keys - this.maxKeySize = this.storeKeySize + 100_000; - } - - @Override - public void put(KeyT key, ValueT value) { - final ByteArray encodedKey = encodeKey(key); - maxKeySize = Math.max(maxKeySize, encodedKey.getValue().length); - store.put(encodedKey, StateValue.of(value, coder)); - } - - @Override - public @Nullable ReadableState computeIfAbsent( - KeyT key, Function mappingFunction) { - final ByteArray encodedKey = encodeKey(key); - final ValueT current = decodeValue(store.get(encodedKey)); - if (current == null) { - put(key, mappingFunction.apply(key)); - } - - return current == null ? null : ReadableStates.immediate(current); - } - - @Override - public void remove(KeyT key) { - store.delete(encodeKey(key)); - } - - @Override - public ReadableState get(KeyT key) { - return getOrDefault(key, null); - } - - @Override - public @UnknownKeyFor @NonNull @Initialized ReadableState getOrDefault( - KeyT key, @Nullable ValueT defaultValue) { - return new ReadableState() { - @Override - public @Nullable ValueT read() { - ValueT value = decodeValue(store.get(encodeKey(key))); - return value != null ? value : defaultValue; - } - - @Override - public @UnknownKeyFor @NonNull @Initialized ReadableState readLater() { - return this; - } - }; - } - - @Override - public ReadableState> keys() { - return new ReadableState>() { - @Override - public Iterable read() { - return createIterable(entry -> decodeKey(entry.getKey())); - } - - @Override - public ReadableState> readLater() { - return this; - } - }; - } - - @Override - public ReadableState> values() { - return new ReadableState>() { - @Override - public Iterable read() { - return createIterable(entry -> decodeValue(entry.getValue())); - } - - @Override - public ReadableState> readLater() { - return this; - } - }; - } - - @Override - public ReadableState>> entries() { - return new ReadableState>>() { - @Override - public Iterable> read() { - return createIterable( - entry -> - new AbstractMap.SimpleEntry<>( - decodeKey(entry.getKey()), decodeValue(entry.getValue()))); - } - - @Override - public ReadableState>> readLater() { - return this; - } - }; - } - - @Override - public @UnknownKeyFor @NonNull @Initialized ReadableState< - @UnknownKeyFor @NonNull @Initialized Boolean> - isEmpty() { - ReadableState> keys = this.keys(); - return new ReadableState() { - @Override - public @Nullable Boolean read() { - return Iterables.isEmpty(keys.read()); - } - - @Override - public @UnknownKeyFor @NonNull @Initialized ReadableState readLater() { - keys.readLater(); - return this; - } - }; - } - - @Override - public ReadableState>> readIterator() { - final ByteArray maxKey = createMaxKey(); - final KeyValueIterator> kvIter = - store.range(getEncodedStoreKey(), maxKey); - openIterators.add(kvIter); - - return new ReadableState>>() { - @Nullable - @Override - public Iterator> read() { - return new Iterator>() { - @Override - public boolean hasNext() { - boolean hasNext = kvIter.hasNext(); - if (!hasNext) { - kvIter.close(); - openIterators.remove(kvIter); - } - return hasNext; - } - - @Override - public Map.Entry next() { - Entry> entry = kvIter.next(); - return new AbstractMap.SimpleEntry<>( - decodeKey(entry.getKey()), decodeValue(entry.getValue())); - } - }; - } - - @Override - public ReadableState>> readLater() { - return this; - } - }; - } - - /** - * Since we are not able to track the instances of the iterators created here and close them - * properly, we need to load the content into memory. - */ - private Iterable createIterable( - SerializableFunction< - org.apache.samza.storage.kv.Entry>, OutputT> - fn) { - final ByteArray maxKey = createMaxKey(); - final KeyValueIterator> kvIter = - store.range(getEncodedStoreKey(), maxKey); - final List>> iterable = ImmutableList.copyOf(kvIter); - kvIter.close(); - - return new Iterable() { - @Override - public Iterator iterator() { - final Iterator>> iter = iterable.iterator(); - - return new Iterator() { - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public OutputT next() { - return fn.apply(iter.next()); - } - }; - } - }; - } - - @Override - public void clear() { - final ByteArray maxKey = createMaxKey(); - final KeyValueIterator> kvIter = - store.range(getEncodedStoreKey(), maxKey); - while (kvIter.hasNext()) { - store.delete(kvIter.next().getKey()); - } - kvIter.close(); - } - - private ByteArray encodeKey(KeyT key) { - try { - final ByteArrayOutputStream baos = getThreadLocalBaos(); - baos.write(getEncodedStoreKeyBytes()); - keyCoder.encode(key, baos); - return ByteArray.of(baos.toByteArray()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - private KeyT decodeKey(ByteArray keyBytes) { - try { - final byte[] realKey = - Arrays.copyOfRange(keyBytes.value, storeKeySize, keyBytes.value.length); - return keyCoder.decode(new ByteArrayInputStream(realKey)); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - private ByteArray createMaxKey() { - byte[] maxKey = new byte[maxKeySize]; - Arrays.fill(maxKey, (byte) 0xff); - - final byte[] encodedKey = getEncodedStoreKeyBytes(); - System.arraycopy(encodedKey, 0, maxKey, 0, encodedKey.length); - return ByteArray.of(maxKey); - } - - @Override - public void closeIterators() { - openIterators.forEach(KeyValueIterator::close); - openIterators.clear(); - } - } - - private class SamzaAccumulatorCombiningState extends AbstractSamzaState - implements CombiningState { - - private final Combine.CombineFn combineFn; - - protected SamzaAccumulatorCombiningState( - StateNamespace namespace, - StateTag address, - Coder coder, - Combine.CombineFn combineFn) { - super(namespace, address, coder); - - this.combineFn = combineFn; - } - - @Override - public void clear() { - clearInternal(); - } - - @Override - public void add(InT value) { - final AccumT accum = getAccum(); - final AccumT current = combineFn.addInput(accum, value); - writeInternal(current); - } - - @Override - public ReadableState isEmpty() { - return isEmptyInternal(); - } - - @Override - public AccumT getAccum() { - final AccumT accum = readInternal(); - return accum != null ? accum : combineFn.createAccumulator(); - } - - @Override - public void addAccum(AccumT accum) { - final AccumT currentAccum = getAccum(); - final AccumT mergedAccum = mergeAccumulators(Arrays.asList(currentAccum, accum)); - writeInternal(mergedAccum); - } - - @Override - public AccumT mergeAccumulators(Iterable accumulators) { - return combineFn.mergeAccumulators(accumulators); - } - - @Override - public CombiningState readLater() { - return this; - } - - @Override - @Nonnull - public OutT read() { - AccumT accum = getAccum(); - OutT output = combineFn.extractOutput(accum); - if (combineFn instanceof UpdatingCombineFn) { - AccumT updatedAccum = - ((UpdatingCombineFn) combineFn).updateAfterFiring(accum); - writeInternal(updatedAccum); - } - return output; - } - } - - private class SamzaWatermarkHoldState extends AbstractSamzaState - implements WatermarkHoldState { - - private final TimestampCombiner timestampCombiner; - - public SamzaWatermarkHoldState( - StateNamespace namespace, StateTag address, TimestampCombiner timestampCombiner) { - super(namespace, address, InstantCoder.of()); - this.timestampCombiner = timestampCombiner; - } - - @Override - public void add(Instant value) { - final Instant currentValue = readInternal(); - final Instant combinedValue = - currentValue == null ? value : timestampCombiner.combine(currentValue, value); - - if (!combinedValue.equals(currentValue)) { - writeInternal(combinedValue); - } - } - - @Override - public ReadableState isEmpty() { - return isEmptyInternal(); - } - - @Override - public Instant read() { - return readInternal(); - } - - @Override - public TimestampCombiner getTimestampCombiner() { - return this.timestampCombiner; - } - - @Override - public WatermarkHoldState readLater() { - return this; - } - - @Override - public void clear() { - clearInternal(); - } - } - - /** Wrapper of byte[] so it can used as key in the KeyValueStore for caching. */ - public static class ByteArray implements Serializable, Comparable { - - private final byte[] value; - - public static ByteArray of(byte[] value) { - return new ByteArray(value); - } - - private ByteArray(byte[] value) { - this.value = value; - } - - public byte[] getValue() { - return value; - } - - @Override - public boolean equals(@Nullable Object o) { - if (!(o instanceof ByteArray)) { - return false; - } - ByteArray byteArray = (ByteArray) o; - return Arrays.equals(value, byteArray.value); - } - - @Override - public int hashCode() { - return value != null ? Arrays.hashCode(value) : 0; - } - - @Override - public int compareTo(ByteArray other) { - return UnsignedBytes.lexicographicalComparator().compare(value, other.value); - } - } - - /** Factory class to provide {@link ByteArraySerde}. */ - public static class ByteArraySerdeFactory implements SerdeFactory { - - @Override - public Serde getSerde(String name, Config config) { - return new ByteArraySerde(); - } - - /** Serde for {@link ByteArray}. */ - public static class ByteArraySerde implements Serde { - - @Override - public byte[] toBytes(ByteArray byteArray) { - return byteArray.value; - } - - @Override - public ByteArray fromBytes(byte[] bytes) { - return ByteArray.of(bytes); - } - } - } - - /** - * Wrapper for state value so that unencoded value can be read directly from the cache of - * KeyValueStore. - */ - public static class StateValue implements Serializable { - private T value; - private Coder valueCoder; - private byte[] valueBytes; - - private StateValue(T value, Coder valueCoder, byte[] valueBytes) { - this.value = value; - this.valueCoder = valueCoder; - this.valueBytes = valueBytes; - } - - public static StateValue of(T value, Coder valueCoder) { - return new StateValue<>(value, valueCoder, null); - } - - public static StateValue of(byte[] valueBytes) { - return new StateValue<>(null, null, valueBytes); - } - - public T getValue(Coder coder) { - if (value == null && valueBytes != null) { - if (valueCoder == null) { - valueCoder = coder; - } - try { - value = valueCoder.decode(new ByteArrayInputStream(valueBytes)); - } catch (IOException e) { - throw new RuntimeException("Could not decode state", e); - } - } - return value; - } - - public byte[] getValueBytes() { - if (valueBytes == null && value != null) { - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - valueCoder.encode(value, baos); - } catch (IOException e) { - throw new RuntimeException("Could not encode state value: " + value, e); - } - valueBytes = baos.toByteArray(); - } - return valueBytes; - } - } - - /** Factory class to provide {@link StateValueSerdeFactory.StateValueSerde}. */ - public static class StateValueSerdeFactory implements SerdeFactory> { - @Override - public Serde> getSerde(String name, Config config) { - return new StateValueSerde(); - } - - /** Serde for {@link StateValue}. */ - public static class StateValueSerde implements Serde> { - @Override - public StateValue fromBytes(byte[] bytes) { - return StateValue.of(bytes); - } - - @Override - public byte[] toBytes(StateValue stateValue) { - return stateValue == null ? null : stateValue.getValueBytes(); - } - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactory.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactory.java deleted file mode 100644 index bd6547b805a4..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactory.java +++ /dev/null @@ -1,733 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import com.google.auto.value.AutoValue; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NavigableSet; -import java.util.TreeSet; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.StateTags; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.core.TimerInternalsFactory; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.SamzaRunner; -import org.apache.beam.runners.samza.state.SamzaMapState; -import org.apache.beam.runners.samza.state.SamzaSetState; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.StructuredCoder; -import org.apache.beam.sdk.coders.VarLongCoder; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.GlobalWindow; -import org.apache.beam.sdk.values.PCollection.IsBounded; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.samza.operators.Scheduler; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link TimerInternalsFactory} that creates Samza {@link TimerInternals}. This class keeps track - * of the {@link org.apache.beam.runners.core.TimerInternals.TimerData} added to the sorted timer - * set, and removes the ready timers when the watermark is advanced. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaTimerInternalsFactory implements TimerInternalsFactory { - private static final Logger LOG = LoggerFactory.getLogger(SamzaTimerInternalsFactory.class); - private final NavigableSet> eventTimeBuffer; - private final Coder keyCoder; - private final Scheduler> timerRegistry; - private final SamzaTimerState state; - private final IsBounded isBounded; - - private Instant inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; - private Instant outputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; - - // Size of each event timer is around 200B, by default with buffer size 50k, the default size is - // 10M - private final int maxEventTimerBufferSize; - // Max event time stored in eventTimerBuffer - // If it is set to long.MAX_VALUE, it indicates the State does not contain any KeyedTimerData - private long maxEventTimeInBuffer; - - // The maximum number of ready timers to process at once per watermark. - private final long maxReadyTimersToProcessOnce; - - private SamzaTimerInternalsFactory( - Coder keyCoder, - Scheduler> timerRegistry, - String timerStateId, - SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory, - Coder windowCoder, - IsBounded isBounded, - SamzaPipelineOptions pipelineOptions) { - this.keyCoder = keyCoder; - this.timerRegistry = timerRegistry; - this.eventTimeBuffer = new TreeSet<>(); - this.maxEventTimerBufferSize = - pipelineOptions.getEventTimerBufferSize(); // must be placed before state initialization - this.maxEventTimeInBuffer = Long.MAX_VALUE; - this.maxReadyTimersToProcessOnce = pipelineOptions.getMaxReadyTimersToProcessOnce(); - this.state = new SamzaTimerState(timerStateId, nonKeyedStateInternalsFactory, windowCoder); - this.isBounded = isBounded; - } - - static SamzaTimerInternalsFactory createTimerInternalFactory( - Coder keyCoder, - Scheduler> timerRegistry, - String timerStateId, - SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory, - WindowingStrategy windowingStrategy, - IsBounded isBounded, - SamzaPipelineOptions pipelineOptions) { - - final Coder windowCoder = windowingStrategy.getWindowFn().windowCoder(); - - return new SamzaTimerInternalsFactory<>( - keyCoder, - timerRegistry, - timerStateId, - nonKeyedStateInternalsFactory, - windowCoder, - isBounded, - pipelineOptions); - } - - @Override - public TimerInternals timerInternalsForKey(@Nullable K key) { - final byte[] keyBytes; - - if (keyCoder == null) { - if (key != null) { - throw new IllegalArgumentException( - String.format("Received non-null key for unkeyed timer factory. Key: %s", key)); - } - keyBytes = null; - } else { - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - keyCoder.encode(key, baos); - } catch (IOException e) { - throw new RuntimeException("Could not encode key: " + key, e); - } - keyBytes = baos.toByteArray(); - } - - return new SamzaTimerInternals(keyBytes, key); - } - - public void setInputWatermark(Instant watermark) { - if (watermark.isBefore(inputWatermark)) { - throw new IllegalArgumentException("New input watermark is before current watermark"); - } - LOG.debug("Advancing input watermark from {} to {}.", inputWatermark, watermark); - inputWatermark = watermark; - } - - public void setOutputWatermark(Instant watermark) { - if (watermark.isAfter(inputWatermark)) { - LOG.debug("Clipping new output watermark from {} to {}.", watermark, inputWatermark); - watermark = inputWatermark; - } - - if (watermark.isBefore(outputWatermark)) { - throw new IllegalArgumentException("New output watermark is before current watermark"); - } - LOG.debug("Advancing output watermark from {} to {}.", outputWatermark, watermark); - outputWatermark = watermark; - } - - /** - * The method is called when watermark comes. It compares timers in memory buffer with watermark - * to prepare ready timers. When memory buffer is empty, it asks store to reload timers into - * buffer. note that the number of timers returned may be larger than memory buffer size. - * - * @return a collection of ready timers to be fired - */ - public Collection> removeReadyTimers() { - final Collection> readyTimers = new ArrayList<>(); - - while (!eventTimeBuffer.isEmpty() - && !eventTimeBuffer.first().getTimerData().getTimestamp().isAfter(inputWatermark) - && readyTimers.size() < maxReadyTimersToProcessOnce) { - - final KeyedTimerData keyedTimerData = eventTimeBuffer.pollFirst(); - readyTimers.add(keyedTimerData); - state.deletePersisted(keyedTimerData); - - if (eventTimeBuffer.isEmpty()) { - state.reloadEventTimeTimers(); - } - } - LOG.debug("Removed {} ready timers", readyTimers.size()); - - if (readyTimers.size() == maxReadyTimersToProcessOnce - && !eventTimeBuffer.isEmpty() - && eventTimeBuffer.first().getTimerData().getTimestamp().isBefore(inputWatermark)) { - LOG.warn( - "Loaded {} expired timers, the remaining will be processed at next watermark.", - maxReadyTimersToProcessOnce); - } - return readyTimers; - } - - public void removeProcessingTimer(KeyedTimerData keyedTimerData) { - state.deletePersisted(keyedTimerData); - } - - public Instant getInputWatermark() { - return inputWatermark; - } - - public Instant getOutputWatermark() { - return outputWatermark; - } - - // for unit test only - NavigableSet> getEventTimeBuffer() { - return eventTimeBuffer; - } - - private class SamzaTimerInternals implements TimerInternals { - private final byte[] keyBytes; - private final K key; - - public SamzaTimerInternals(byte[] keyBytes, K key) { - this.keyBytes = keyBytes; - this.key = key; - } - - @Override - public void setTimer( - StateNamespace namespace, - String timerId, - String timerFamilyId, - Instant target, - Instant outputTimestamp, - TimeDomain timeDomain) { - setTimer( - TimerData.of(timerId, timerFamilyId, namespace, target, outputTimestamp, timeDomain)); - } - - @Override - public void setTimer(TimerData timerData) { - if (isBounded == IsBounded.UNBOUNDED - && timerData.getTimestamp().getMillis() - > GlobalWindow.INSTANCE.maxTimestamp().getMillis()) { - // No need to register a timer greater than maxTimestamp if the input is unbounded. - // 1. It will ignore timers with (maxTimestamp + 1) created by stateful ParDo with global - // window. - // 2. It will register timers with maxTimestamp so that global window can be closed - // correctly when max watermark comes. - return; - } - - final KeyedTimerData keyedTimerData = new KeyedTimerData<>(keyBytes, key, timerData); - if (eventTimeBuffer.contains(keyedTimerData)) { - return; - } - - final Long lastTimestamp = state.get(keyedTimerData); - final Long newTimestamp = timerData.getTimestamp().getMillis(); - - if (newTimestamp.equals(lastTimestamp)) { - return; - } - - if (lastTimestamp != null) { - deleteTimer( - timerData.getNamespace(), - timerData.getTimerId(), - timerData.getTimerFamilyId(), - new Instant(lastTimestamp), - new Instant(lastTimestamp), - timerData.getDomain()); - } - - // persist it first - state.persist(keyedTimerData); - - // TO-DO: apply the same memory optimization over processing timers - switch (timerData.getDomain()) { - case EVENT_TIME: - /* - * To determine if the upcoming KeyedTimerData could be added to the Buffer while - * guaranteeing the Buffer's timestamps are all <= than those in State Store to preserve - * timestamp eviction priority: - * - *

1) If maxEventTimeInBuffer == long.MAX_VALUE, it indicates that the State is empty, - * therefore all the Event times greater or lesser than newTimestamp are in the buffer; - * - *

2) If newTimestamp < maxEventTimeInBuffer, it indicates that there are entries - * greater than newTimestamp, so it is safe to add it to the buffer - * - *

In case that the Buffer is full, we remove the largest timer from memory according - * to {@link KeyedTimerData.compareTo()} - */ - if (newTimestamp < maxEventTimeInBuffer) { - eventTimeBuffer.add(keyedTimerData); - if (eventTimeBuffer.size() > maxEventTimerBufferSize) { - eventTimeBuffer.pollLast(); - maxEventTimeInBuffer = - eventTimeBuffer.last().getTimerData().getTimestamp().getMillis(); - } - } - break; - - case PROCESSING_TIME: - timerRegistry.schedule(keyedTimerData, timerData.getTimestamp().getMillis()); - break; - - default: - throw new UnsupportedOperationException( - String.format( - "%s currently only supports even time or processing time", SamzaRunner.class)); - } - } - - /** @deprecated use {@link #deleteTimer(StateNamespace, String, String, TimeDomain)}. */ - @Override - @Deprecated - public void deleteTimer(StateNamespace namespace, String timerId, String timerFamilyId) { - deleteTimer(namespace, timerId, timerFamilyId, TimeDomain.EVENT_TIME); - } - - /** @deprecated use {@link #deleteTimer(StateNamespace, String, String, TimeDomain)}. */ - @Override - @Deprecated - public void deleteTimer(TimerData timerData) { - deleteTimer( - timerData.getNamespace(), - timerData.getTimerId(), - timerData.getTimerFamilyId(), - timerData.getDomain()); - } - - @Override - public void deleteTimer( - StateNamespace namespace, String timerId, String timerFamilyId, TimeDomain timeDomain) { - final TimerKey timerKey = TimerKey.of(key, namespace, timerId, timerFamilyId); - final Long lastTimestamp = state.get(timerKey, timeDomain); - - if (lastTimestamp == null) { - return; - } - - final Instant timestamp = Instant.ofEpochMilli(lastTimestamp); - deleteTimer(namespace, timerId, timerFamilyId, timestamp, timestamp, timeDomain); - } - - private void deleteTimer( - StateNamespace namespace, - String timerId, - String timerFamilyId, - Instant timestamp, - Instant outputTimestamp, - TimeDomain timeDomain) { - final TimerData timerData = - TimerData.of(timerId, timerFamilyId, namespace, timestamp, outputTimestamp, timeDomain); - final KeyedTimerData keyedTimerData = new KeyedTimerData<>(keyBytes, key, timerData); - - state.deletePersisted(keyedTimerData); - - switch (timerData.getDomain()) { - case EVENT_TIME: - eventTimeBuffer.remove(keyedTimerData); - break; - - case PROCESSING_TIME: - timerRegistry.delete(keyedTimerData); - break; - - default: - throw new UnsupportedOperationException( - String.format( - "%s currently only supports event time or processing time but get %s", - SamzaRunner.class, timerData.getDomain())); - } - } - - @Override - public Instant currentProcessingTime() { - return new Instant(); - } - - @Override - public Instant currentSynchronizedProcessingTime() { - throw new UnsupportedOperationException( - String.format( - "%s does not currently support synchronized processing time", SamzaRunner.class)); - } - - @Override - public Instant currentInputWatermarkTime() { - return inputWatermark; - } - - @Override - public Instant currentOutputWatermarkTime() { - return outputWatermark; - } - } - - private class SamzaTimerState { - private final SamzaMapState, Long> eventTimeTimerState; - private final SamzaSetState> timestampSortedEventTimeTimerState; - private final SamzaMapState, Long> processingTimeTimerState; - - SamzaTimerState( - String timerStateId, - SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory, - Coder windowCoder) { - - this.eventTimeTimerState = - (SamzaMapState, Long>) - nonKeyedStateInternalsFactory - .stateInternalsForKey(null) - .state( - StateNamespaces.global(), - StateTags.map( - timerStateId + "-et", - new TimerKeyCoder<>(keyCoder, windowCoder), - VarLongCoder.of())); - - this.timestampSortedEventTimeTimerState = - (SamzaSetState>) - nonKeyedStateInternalsFactory - .stateInternalsForKey(null) - .state( - StateNamespaces.global(), - StateTags.set( - timerStateId + "-ts", - new KeyedTimerData.KeyedTimerDataCoder<>(keyCoder, windowCoder))); - - this.processingTimeTimerState = - (SamzaMapState, Long>) - nonKeyedStateInternalsFactory - .stateInternalsForKey(null) - .state( - StateNamespaces.global(), - StateTags.map( - timerStateId + "-pt", - new TimerKeyCoder<>(keyCoder, windowCoder), - VarLongCoder.of())); - - init(); - } - - Long get(KeyedTimerData keyedTimerData) { - return get(TimerKey.of(keyedTimerData), keyedTimerData.getTimerData().getDomain()); - } - - Long get(TimerKey key, TimeDomain domain) { - switch (domain) { - case EVENT_TIME: - return eventTimeTimerState.get(key).read(); - - case PROCESSING_TIME: - return processingTimeTimerState.get(key).read(); - - default: - throw new UnsupportedOperationException( - String.format( - "%s currently only supports event time or processing time but get %s", - SamzaRunner.class, domain)); - } - } - - void persist(KeyedTimerData keyedTimerData) { - final TimerKey timerKey = TimerKey.of(keyedTimerData); - switch (keyedTimerData.getTimerData().getDomain()) { - case EVENT_TIME: - final Long timestamp = eventTimeTimerState.get(timerKey).read(); - - if (timestamp != null) { - final KeyedTimerData keyedTimerDataInStore = - TimerKey.toKeyedTimerData(timerKey, timestamp, TimeDomain.EVENT_TIME, keyCoder); - timestampSortedEventTimeTimerState.remove(keyedTimerDataInStore); - } - eventTimeTimerState.put( - timerKey, keyedTimerData.getTimerData().getTimestamp().getMillis()); - - timestampSortedEventTimeTimerState.add(keyedTimerData); - - break; - - case PROCESSING_TIME: - processingTimeTimerState.put( - timerKey, keyedTimerData.getTimerData().getTimestamp().getMillis()); - break; - - default: - throw new UnsupportedOperationException( - String.format( - "%s currently only supports event time or processing time but get %s", - SamzaRunner.class, keyedTimerData.getTimerData().getDomain())); - } - } - - void deletePersisted(KeyedTimerData keyedTimerData) { - final TimerKey timerKey = TimerKey.of(keyedTimerData); - switch (keyedTimerData.getTimerData().getDomain()) { - case EVENT_TIME: - eventTimeTimerState.remove(timerKey); - timestampSortedEventTimeTimerState.remove(keyedTimerData); - break; - - case PROCESSING_TIME: - processingTimeTimerState.remove(timerKey); - break; - - default: - throw new UnsupportedOperationException( - String.format( - "%s currently only supports event time or processing time but get %s", - SamzaRunner.class, keyedTimerData.getTimerData().getDomain())); - } - } - - /** - * Reload event time timers from state to memory buffer. Buffer size is bound by - * maxEventTimerBufferSize - */ - private void reloadEventTimeTimers() { - final Iterator> iter = - timestampSortedEventTimeTimerState.readIterator().read(); - - while (iter.hasNext() && eventTimeBuffer.size() < maxEventTimerBufferSize) { - final KeyedTimerData keyedTimerData = iter.next(); - eventTimeBuffer.add(keyedTimerData); - maxEventTimeInBuffer = keyedTimerData.getTimerData().getTimestamp().getMillis(); - } - - timestampSortedEventTimeTimerState.closeIterators(); - LOG.info("Loaded {} event time timers in memory", eventTimeBuffer.size()); - - if (eventTimeBuffer.size() < maxEventTimerBufferSize) { - LOG.debug( - "Event time timers in State is empty, filled {} timers out of {} buffer capacity", - eventTimeBuffer.size(), - maxEventTimeInBuffer); - // Reset the flag variable to indicate there are no more KeyedTimerData in State - maxEventTimeInBuffer = Long.MAX_VALUE; - } - } - - private void loadProcessingTimeTimers() { - final Iterator, Long>> iter = - processingTimeTimerState.readIterator().read(); - // since the iterator will reach to the end, it will be closed automatically - int count = 0; - while (iter.hasNext()) { - final Map.Entry, Long> entry = iter.next(); - final KeyedTimerData keyedTimerData = - TimerKey.toKeyedTimerData( - entry.getKey(), entry.getValue(), TimeDomain.PROCESSING_TIME, keyCoder); - - timerRegistry.schedule( - keyedTimerData, keyedTimerData.getTimerData().getTimestamp().getMillis()); - ++count; - } - processingTimeTimerState.closeIterators(); - - LOG.info("Loaded {} processing time timers in memory", count); - } - - /** - * Restore timer state from RocksDB. This is needed for migration of existing jobs. Give events - * in eventTimeTimerState, construct timestampSortedEventTimeTimerState preparing for memory - * reloading. TO-DO: processing time timers are still loaded into memory in one shot; will apply - * the same optimization mechanism as event time timer - */ - private void init() { - final Iterator, Long>> eventTimersIter = - eventTimeTimerState.readIterator().read(); - // use hasNext to check empty, because this is relatively cheap compared with Iterators.size() - if (eventTimersIter.hasNext()) { - final Iterator sortedEventTimerIter = - timestampSortedEventTimeTimerState.readIterator().read(); - - if (!sortedEventTimerIter.hasNext()) { - // inline the migration code - while (eventTimersIter.hasNext()) { - final Map.Entry, Long> entry = eventTimersIter.next(); - final KeyedTimerData keyedTimerData = - TimerKey.toKeyedTimerData( - entry.getKey(), entry.getValue(), TimeDomain.EVENT_TIME, keyCoder); - timestampSortedEventTimeTimerState.add(keyedTimerData); - } - } - timestampSortedEventTimeTimerState.closeIterators(); - } - eventTimeTimerState.closeIterators(); - - reloadEventTimeTimers(); - loadProcessingTimeTimers(); - } - } - - @AutoValue - abstract static class TimerKey { - abstract @Nullable K getKey(); - - abstract StateNamespace getStateNamespace(); - - abstract String getTimerId(); - - abstract String getTimerFamilyId(); - - static Builder builder() { - return new AutoValue_SamzaTimerInternalsFactory_TimerKey.Builder<>(); - } - - static TimerKey of(KeyedTimerData keyedTimerData) { - final TimerInternals.TimerData timerData = keyedTimerData.getTimerData(); - return of( - keyedTimerData.getKey(), - timerData.getNamespace(), - timerData.getTimerId(), - timerData.getTimerFamilyId()); - } - - static TimerKey of( - K key, StateNamespace namespace, String timerId, String timerFamilyId) { - return TimerKey.builder() - .setKey(key) - .setStateNamespace(namespace) - .setTimerId(timerId) - .setTimerFamilyId(timerFamilyId) - .build(); - } - - static KeyedTimerData toKeyedTimerData( - TimerKey timerKey, long timestamp, TimeDomain domain, Coder keyCoder) { - byte[] keyBytes = null; - if (keyCoder != null && timerKey.getKey() != null) { - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - keyCoder.encode(timerKey.getKey(), baos); - } catch (IOException e) { - throw new RuntimeException("Could not encode key: " + timerKey.getKey(), e); - } - keyBytes = baos.toByteArray(); - } - - return new KeyedTimerData<>( - keyBytes, - timerKey.getKey(), - TimerInternals.TimerData.of( - timerKey.getTimerId(), - timerKey.getTimerFamilyId(), - timerKey.getStateNamespace(), - new Instant(timestamp), - new Instant(timestamp), - domain)); - } - - @AutoValue.Builder - abstract static class Builder { - abstract Builder setKey(K key); - - abstract Builder setStateNamespace(StateNamespace stateNamespace); - - abstract Builder setTimerId(String timerId); - - abstract Builder setTimerFamilyId(String timerFamilyId); - - abstract TimerKey build(); - } - } - - /** Coder for {@link TimerKey}. */ - public static class TimerKeyCoder extends StructuredCoder> { - private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of(); - - private final Coder keyCoder; - private final Coder windowCoder; - - TimerKeyCoder(Coder keyCoder, Coder windowCoder) { - this.keyCoder = keyCoder; - this.windowCoder = windowCoder; - } - - @Override - public void encode(TimerKey value, OutputStream outStream) - throws CoderException, IOException { - - // encode the timestamp first - STRING_CODER.encode(value.getTimerId(), outStream); - STRING_CODER.encode(value.getStateNamespace().stringKey(), outStream); - - if (keyCoder != null) { - keyCoder.encode(value.getKey(), outStream); - } - - STRING_CODER.encode(value.getTimerFamilyId(), outStream); - } - - @Override - public TimerKey decode(InputStream inStream) throws CoderException, IOException { - // decode the timestamp first - final String timerId = STRING_CODER.decode(inStream); - // The namespace needs two-phase deserialization: - // first from bytes into a string, then from string to namespace object using windowCoder. - final StateNamespace namespace = - StateNamespaces.fromString(STRING_CODER.decode(inStream), windowCoder); - K key = null; - if (keyCoder != null) { - key = keyCoder.decode(inStream); - } - - // check if the stream has more available bytes. This is to ensure backward compatibility with - // old rocksdb state which does not encode timer family data - final String timerFamilyId = inStream.available() > 0 ? STRING_CODER.decode(inStream) : ""; - - return TimerKey.builder() - .setTimerId(timerId) - .setStateNamespace(namespace) - .setKey(key) - .setTimerFamilyId(timerFamilyId) - .build(); - } - - @Override - public List> getCoderArguments() { - return Arrays.asList(keyCoder, windowCoder); - } - - @Override - public void verifyDeterministic() throws NonDeterministicException {} - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SingletonKeyedWorkItem.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SingletonKeyedWorkItem.java deleted file mode 100644 index 5a59e8616cc5..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SingletonKeyedWorkItem.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collections; -import org.apache.beam.runners.core.KeyedWorkItem; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.sdk.values.WindowedValue; - -/** Implementation of {@link KeyedWorkItem} which contains only a single value. */ -class SingletonKeyedWorkItem implements KeyedWorkItem { - private final K key; - private final WindowedValue value; - - public SingletonKeyedWorkItem(K key, WindowedValue value) { - this.key = key; - this.value = value; - } - - @Override - public K key() { - return key; - } - - @Override - public Iterable timersIterable() { - return Collections.emptyList(); - } - - @Override - public Iterable> elementsIterable() { - return Collections.singletonList(value); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SplittableParDoProcessKeyedElementsOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SplittableParDoProcessKeyedElementsOp.java deleted file mode 100644 index f7886c95f123..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/SplittableParDoProcessKeyedElementsOp.java +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collection; -import java.util.Collections; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.core.DoFnRunners; -import org.apache.beam.runners.core.KeyedWorkItem; -import org.apache.beam.runners.core.KeyedWorkItems; -import org.apache.beam.runners.core.NullSideInputReader; -import org.apache.beam.runners.core.OutputAndTimeBoundedSplittableProcessElementInvoker; -import org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems; -import org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessElements; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateInternalsFactory; -import org.apache.beam.runners.core.StepContext; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.core.TimerInternals.TimerData; -import org.apache.beam.runners.core.construction.SerializablePipelineOptions; -import org.apache.beam.runners.core.serialization.Base64Serializer; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.sdk.coders.ByteArrayCoder; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.join.RawUnionValue; -import org.apache.beam.sdk.transforms.reflect.DoFnInvokers; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.WindowedValueMultiReceiver; -import org.apache.beam.sdk.util.construction.SplittableParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection.IsBounded; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.samza.config.Config; -import org.apache.samza.context.Context; -import org.apache.samza.operators.Scheduler; -import org.checkerframework.checker.nullness.qual.MonotonicNonNull; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Samza operator for {@link org.apache.beam.sdk.transforms.GroupByKey}. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SplittableParDoProcessKeyedElementsOp< - InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> - implements Op>, RawUnionValue, byte[]> { - private static final Logger LOG = - LoggerFactory.getLogger(SplittableParDoProcessKeyedElementsOp.class); - private static final String TIMER_STATE_ID = "timer"; - - private final TupleTag mainOutputTag; - private final WindowingStrategy windowingStrategy; - private final OutputManagerFactory outputManagerFactory; - private final SplittableParDoViaKeyedWorkItems.ProcessElements< - InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> - processElements; - private final String transformId; - private final IsBounded isBounded; - - private transient StateInternalsFactory stateInternalsFactory; - private transient SamzaTimerInternalsFactory timerInternalsFactory; - private transient DoFnRunner>, OutputT> fnRunner; - private transient SamzaPipelineOptions pipelineOptions; - private transient @MonotonicNonNull ScheduledExecutorService ses = null; - - public SplittableParDoProcessKeyedElementsOp( - TupleTag mainOutputTag, - SplittableParDo.ProcessKeyedElements - processKeyedElements, - WindowingStrategy windowingStrategy, - OutputManagerFactory outputManagerFactory, - String transformFullName, - String transformId, - IsBounded isBounded) { - this.mainOutputTag = mainOutputTag; - this.windowingStrategy = windowingStrategy; - this.outputManagerFactory = outputManagerFactory; - this.transformId = transformId; - this.isBounded = isBounded; - - this.processElements = new ProcessElements<>(processKeyedElements); - } - - @Override - public void open( - Config config, - Context context, - Scheduler> timerRegistry, - OpEmitter emitter) { - this.pipelineOptions = - Base64Serializer.deserializeUnchecked( - config.get("beamPipelineOptions"), SerializablePipelineOptions.class) - .get() - .as(SamzaPipelineOptions.class); - - final SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory = - SamzaStoreStateInternals.createNonKeyedStateInternalsFactory( - transformId, context.getTaskContext(), pipelineOptions); - - final WindowedValueMultiReceiver outputManager = outputManagerFactory.create(emitter); - - this.stateInternalsFactory = - new SamzaStoreStateInternals.Factory<>( - transformId, - Collections.singletonMap( - SamzaStoreStateInternals.BEAM_STORE, - SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), - ByteArrayCoder.of(), - pipelineOptions.getStoreBatchGetSize()); - - this.timerInternalsFactory = - SamzaTimerInternalsFactory.createTimerInternalFactory( - ByteArrayCoder.of(), - timerRegistry, - TIMER_STATE_ID, - nonKeyedStateInternalsFactory, - windowingStrategy, - isBounded, - pipelineOptions); - - if (this.ses == null) { - this.ses = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder().setNameFormat("samza-sdf-executor-%d").build()); - } - - final KeyedInternals keyedInternals = - new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory); - - SplittableParDoViaKeyedWorkItems.ProcessFn< - InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> - processFn = processElements.newProcessFn(processElements.getFn()); - DoFnInvokers.tryInvokeSetupFor(processFn, pipelineOptions); - processFn.setStateInternalsFactory(stateInternalsFactory); - processFn.setTimerInternalsFactory(timerInternalsFactory); - processFn.setSideInputReader(NullSideInputReader.empty()); - processFn.setProcessElementInvoker( - new OutputAndTimeBoundedSplittableProcessElementInvoker<>( - processElements.getFn(), - pipelineOptions, - outputManager, - mainOutputTag, - NullSideInputReader.empty(), - ses, - 10000, - Duration.standardSeconds(10), - () -> { - throw new UnsupportedOperationException("BundleFinalizer unsupported in Samza"); - })); - - final StepContext stepContext = - new StepContext() { - @Override - public StateInternals stateInternals() { - return keyedInternals.stateInternals(); - } - - @Override - public TimerInternals timerInternals() { - return keyedInternals.timerInternals(); - } - }; - - this.fnRunner = - DoFnRunners.simpleRunner( - pipelineOptions, - processFn, - NullSideInputReader.of(Collections.emptyList()), - outputManager, - mainOutputTag, - Collections.emptyList(), - stepContext, - null, - Collections.emptyMap(), - windowingStrategy, - DoFnSchemaInformation.create(), - Collections.emptyMap()); - } - - @Override - public void processElement( - WindowedValue>> inputElement, - OpEmitter emitter) { - fnRunner.startBundle(); - fnRunner.processElement(inputElement); - fnRunner.finishBundle(); - } - - @Override - public void processWatermark(Instant watermark, OpEmitter emitter) { - timerInternalsFactory.setInputWatermark(watermark); - - Collection> readyTimers = timerInternalsFactory.removeReadyTimers(); - if (!readyTimers.isEmpty()) { - fnRunner.startBundle(); - for (KeyedTimerData keyedTimerData : readyTimers) { - fireTimer(keyedTimerData.getKey(), keyedTimerData.getTimerData()); - } - fnRunner.finishBundle(); - } - - if (timerInternalsFactory.getOutputWatermark() == null - || timerInternalsFactory.getOutputWatermark().isBefore(watermark)) { - timerInternalsFactory.setOutputWatermark(watermark); - emitter.emitWatermark(timerInternalsFactory.getOutputWatermark()); - } - } - - @Override - public void processTimer( - KeyedTimerData keyedTimerData, OpEmitter emitter) { - fnRunner.startBundle(); - fireTimer(keyedTimerData.getKey(), keyedTimerData.getTimerData()); - fnRunner.finishBundle(); - - timerInternalsFactory.removeProcessingTimer(keyedTimerData); - } - - private void fireTimer(byte[] key, TimerData timer) { - LOG.debug("Firing timer {} for key {}", timer, key); - fnRunner.processElement( - WindowedValues.valueInGlobalWindow( - KeyedWorkItems.timersWorkItem(key, Collections.singletonList(timer)))); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/WindowAssignOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/WindowAssignOp.java deleted file mode 100644 index cd83f490bc25..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/WindowAssignOp.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.Collection; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.WindowFn; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; - -/** Samza operator for {@link org.apache.beam.sdk.transforms.windowing.Window.Assign}. */ -public class WindowAssignOp implements Op { - private final WindowFn windowFn; - - public WindowAssignOp(WindowFn windowFn) { - this.windowFn = windowFn; - } - - @Override - public void processElement(WindowedValue inputElement, OpEmitter emitter) { - final Collection windows; - try { - windows = windowFn.assignWindows(new SamzaAssignContext<>(windowFn, inputElement)); - } catch (Exception e) { - throw new RuntimeException(e); - } - - windows.stream() - .map( - window -> - WindowedValues.of( - inputElement.getValue(), - inputElement.getTimestamp(), - window, - inputElement.getPaneInfo())) - .forEach(outputElement -> emitter.emitElement(outputElement)); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/package-info.java deleted file mode 100644 index 52fb79321a38..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza.runtime; diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/state/SamzaMapState.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/state/SamzaMapState.java deleted file mode 100644 index a8741fbc8625..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/state/SamzaMapState.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.state; - -import java.util.Iterator; -import java.util.Map; -import org.apache.beam.sdk.state.MapState; -import org.apache.beam.sdk.state.ReadableState; - -/** Samza's extended MapState, allowing extra access methods to the state. */ -public interface SamzaMapState extends MapState { - - /** - * Returns an iterator from the current map state. Note this is different from the iterable - * implementation in {@link MapState#entries()}}, where we load the entries into memory and return - * iterable from that. To handle large state that doesn't fit in memory, we also need this method - * so it's possible to iterate on large data set and close the iterator when not needed. - * - * @return a {@link ReadableState} of an iterator - */ - ReadableState>> readIterator(); - - /** Closes the iterator returned from {@link SamzaMapState#readIterator()}. */ - void closeIterators(); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/state/SamzaSetState.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/state/SamzaSetState.java deleted file mode 100644 index 8af82fcd5b75..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/state/SamzaSetState.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.state; - -import java.util.Iterator; -import org.apache.beam.sdk.state.ReadableState; -import org.apache.beam.sdk.state.SetState; - -/** Samza's extended SetState, allowing extra access methods to the state. */ -public interface SamzaSetState extends SetState { - - /** - * Returns an iterator from the current set state. Note this is different from the iterable - * implementation in {@link SetState#read()}, where we load the entries into memory and return - * iterable from that. To handle large state that doesn't fit in memory, we also need this method - * so it's possible to iterate on large data set and close the iterator when not needed. - * - * @return a {@link ReadableState} of an iterator - */ - ReadableState> readIterator(); - - /** Closes the iterator returned from {@link SamzaSetState#readIterator()}. */ - void closeIterators(); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/state/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/state/package-info.java deleted file mode 100644 index dff50070f4ff..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/state/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza.state; diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/GroupWithoutRepartition.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/GroupWithoutRepartition.java deleted file mode 100644 index c0ce52e34c09..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/GroupWithoutRepartition.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.transforms; - -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PInput; -import org.apache.beam.sdk.values.POutput; - -/** - * A wrapper transform of {@link org.apache.beam.sdk.transforms.GroupByKey} or {@link - * org.apache.beam.sdk.transforms.join.CoGroupByKey} to indicate there is no repartition needed for - * Samza runner. For example: - * - *

input.apply(GroupWithoutRepartition.of(Count.perKey())); - */ -public class GroupWithoutRepartition - extends PTransform { - private final PTransform transform; - - public static - GroupWithoutRepartition of(PTransform transform) { - return new GroupWithoutRepartition<>(transform); - } - - private GroupWithoutRepartition(PTransform transform) { - this.transform = transform; - } - - @Override - @SuppressWarnings("unchecked") - public OutputT expand(InputT input) { - if (input instanceof PCollection) { - return (OutputT) ((PCollection) input).apply(transform); - } else if (input instanceof KeyedPCollectionTuple) { - return (OutputT) ((KeyedPCollectionTuple) input).apply(transform); - } else { - throw new RuntimeException( - transform.getName() - + " is not supported with " - + GroupWithoutRepartition.class.getSimpleName()); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/UpdatingCombineFn.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/UpdatingCombineFn.java deleted file mode 100644 index f0a7e5e1ceaa..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/UpdatingCombineFn.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.transforms; - -import org.apache.beam.sdk.transforms.Combine; - -/** - * Currently Beam only supports either throw away the accumulation or keep it after firing. This - * CombineFn allows more flexibility to update the accumulation. - */ -public abstract class UpdatingCombineFn - extends Combine.CombineFn { - - /** - * Returns an updated accumulator from the given accumulator after firing a window pane. - * - *

For efficiency, the input accumulator may be modified and returned. - */ - public abstract AccumT updateAfterFiring(AccumT accumulator); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/package-info.java deleted file mode 100644 index 292e7563081a..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/transforms/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza.transforms; diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigBuilder.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigBuilder.java deleted file mode 100644 index 79b5bac238e1..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigBuilder.java +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.samza.config.JobConfig.JOB_AUTOSIZING_CONTAINER_THREAD_POOL_SIZE; -import static org.apache.samza.config.JobConfig.JOB_CONTAINER_THREAD_POOL_SIZE; -import static org.apache.samza.config.JobConfig.JOB_ID; -import static org.apache.samza.config.JobConfig.JOB_NAME; -import static org.apache.samza.config.TaskConfig.COMMIT_MS; -import static org.apache.samza.config.TaskConfig.GROUPER_FACTORY; -import static org.apache.samza.config.TaskConfig.MAX_CONCURRENCY; - -import java.io.File; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.UUID; -import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils; -import org.apache.beam.runners.core.construction.SerializablePipelineOptions; -import org.apache.beam.runners.core.serialization.Base64Serializer; -import org.apache.beam.runners.samza.SamzaExecutionEnvironment; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.container.BeamContainerRunner; -import org.apache.beam.runners.samza.container.BeamJobCoordinatorRunner; -import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals; -import org.apache.beam.runners.samza.util.ConfigUtils; -import org.apache.beam.runners.samza.util.PortableConfigUtils; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.samza.config.ApplicationConfig; -import org.apache.samza.config.Config; -import org.apache.samza.config.ConfigLoaderFactory; -import org.apache.samza.config.JobCoordinatorConfig; -import org.apache.samza.config.MapConfig; -import org.apache.samza.config.ZkConfig; -import org.apache.samza.config.loaders.PropertiesConfigLoaderFactory; -import org.apache.samza.container.grouper.task.SingleContainerGrouperFactory; -import org.apache.samza.job.yarn.YarnJobFactory; -import org.apache.samza.runtime.LocalApplicationRunner; -import org.apache.samza.runtime.RemoteApplicationRunner; -import org.apache.samza.standalone.PassthroughJobCoordinatorFactory; -import org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory; -import org.apache.samza.zk.ZkJobCoordinatorFactory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Builder class to generate configs for BEAM samza runner during runtime. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class ConfigBuilder { - private static final Logger LOG = LoggerFactory.getLogger(ConfigBuilder.class); - - private static final String BEAM_STORE_FACTORY = "stores.beamStore.factory"; - private static final String APP_RUNNER_CLASS = "app.runner.class"; - private static final String YARN_PACKAGE_PATH = "yarn.package.path"; - private static final String JOB_FACTORY_CLASS = "job.factory.class"; - - private final Map config = new HashMap<>(); - private final SamzaPipelineOptions options; - - public ConfigBuilder(SamzaPipelineOptions options) { - this.options = options; - } - - public void put(String name, String property) { - config.put(name, property); - } - - public void putAll(Map properties) { - config.putAll(properties); - } - - /** Returns built configuration. */ - public Config build() { - try { - // apply framework configs - config.putAll(createSystemConfig(options, config)); - - // apply user configs - config.putAll(createUserConfig(options)); - - config.put(ApplicationConfig.APP_NAME, options.getJobName()); - config.put(ApplicationConfig.APP_ID, options.getJobInstance()); - config.put(JOB_NAME, options.getJobName()); - config.put(JOB_ID, options.getJobInstance()); - - // bundle-related configs - if (!PortableConfigUtils.isPortable(options)) { - config.putAll(createBundleConfig(options, config)); - LOG.info("Set bundle-related configs for classic mode"); - } else { - LOG.info("Skipped bundle-related configs for portable mode"); - } - - // remove config overrides before serialization (LISAMZA-15259) - options.setConfigOverride(new HashMap<>()); - config.put( - "beamPipelineOptions", - Base64Serializer.serializeUnchecked(new SerializablePipelineOptions(options))); - - validateConfigs(options, config); - - return new MapConfig(config); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @VisibleForTesting - static Map createBundleConfig( - SamzaPipelineOptions options, Map config) { - final ImmutableMap.Builder builder = ImmutableMap.builder(); - builder.put(MAX_CONCURRENCY, String.valueOf(options.getMaxBundleSize())); - - if (options.getMaxBundleSize() > 1) { - final int threadPoolSize = ConfigUtils.asJobConfig(config).getThreadPoolSize(); - // Since Samza doesn't allow mixing bundle > 1 with multithreading tasks right now, - // we disable the task thread pool in both user and autosizing configs. - LOG.info("Remove threadPoolSize configs when maxBundleSize > 1"); - builder.put(JOB_CONTAINER_THREAD_POOL_SIZE, "0"); - builder.put(JOB_AUTOSIZING_CONTAINER_THREAD_POOL_SIZE, "0"); - - if (threadPoolSize > 1 && options.getNumThreadsForProcessElement() <= 1) { - // In case the user sets the thread pool through samza config instead options, - // set the bundle thread pool size based on container thread pool config - // this allows Samza auto-sizing to tune the threads - LOG.info("Convert threadPoolSize {} to numThreadsForProcessElement", threadPoolSize); - // NumThreadsForProcessElement in option is the source of truth - options.setNumThreadsForProcessElement(threadPoolSize); - } - } - return builder.build(); - } - - private static Map createUserConfig(SamzaPipelineOptions options) - throws Exception { - final Map config = new HashMap<>(); - - // apply user configs - final String configFilePath = options.getConfigFilePath(); - - // If user provides a config file, use it as base configs. - if (StringUtils.isNoneEmpty(configFilePath)) { - LOG.info("configFilePath: {}", configFilePath); - - final Config properties = new MapConfig(Collections.singletonMap("path", configFilePath)); - final ConfigLoaderFactory configLoaderFactory = - options.getConfigLoaderFactory().getDeclaredConstructor().newInstance(); - - LOG.info("configLoaderFactory: {}", configLoaderFactory.getClass().getName()); - - // Config file must exist for default properties config - // TODO: add check to all non-empty files once we don't need to - // pass the command-line args through the containers - if (configLoaderFactory instanceof PropertiesConfigLoaderFactory) { - checkArgument( - new File(configFilePath).exists(), "Config file %s does not exist", configFilePath); - } - - config.putAll(configLoaderFactory.getLoader(properties).getConfig()); - } - // Apply override on top - if (options.getConfigOverride() != null) { - config.putAll(options.getConfigOverride()); - } - - return config; - } - - private static void validateZKStandAloneRun(Map config) { - checkArgument( - config.containsKey(APP_RUNNER_CLASS), - "Config %s not found for %s Deployment", - APP_RUNNER_CLASS, - SamzaExecutionEnvironment.STANDALONE); - checkArgument( - config.get(APP_RUNNER_CLASS).equals(LocalApplicationRunner.class.getName()), - "Config %s must be set to %s for %s Deployment", - APP_RUNNER_CLASS, - LocalApplicationRunner.class.getName(), - SamzaExecutionEnvironment.STANDALONE); - checkArgument( - config.containsKey(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY), - "Config %s not found for %s Deployment", - JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, - SamzaExecutionEnvironment.STANDALONE); - checkArgument( - config - .get(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY) - .equals(ZkJobCoordinatorFactory.class.getName()), - "Config %s must be set to %s for %s Deployment", - JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, - ZkJobCoordinatorFactory.class.getName(), - SamzaExecutionEnvironment.STANDALONE); - checkArgument( - config.containsKey(ZkConfig.ZK_CONNECT), - "Config %s not found for %s Deployment", - ZkConfig.ZK_CONNECT, - SamzaExecutionEnvironment.STANDALONE); - } - - private static void validateYarnRun(Map config) { - checkArgument( - config.containsKey(YARN_PACKAGE_PATH), - "Config %s not found for %s Deployment", - YARN_PACKAGE_PATH, - SamzaExecutionEnvironment.YARN); - final String appRunner = config.get(APP_RUNNER_CLASS); - checkArgument( - appRunner == null - || BeamJobCoordinatorRunner.class.getName().equals(appRunner) - || RemoteApplicationRunner.class.getName().equals(appRunner) - || BeamContainerRunner.class.getName().equals(appRunner), - "Config %s must be set to %s for %s Deployment, but found %s", - APP_RUNNER_CLASS, - String.format( - "[%s, %s or %s]", - BeamJobCoordinatorRunner.class.getName(), - RemoteApplicationRunner.class.getName(), - BeamContainerRunner.class.getName()), - SamzaExecutionEnvironment.YARN, - appRunner); - checkArgument( - config.containsKey(JOB_FACTORY_CLASS), - "Config %s not found for %s Deployment", - JOB_FACTORY_CLASS, - SamzaExecutionEnvironment.YARN); - } - - @VisibleForTesting - public static Map localRunConfig() { - // Default Samza config using local deployment of a single JVM - return ImmutableMap.builder() - .put(APP_RUNNER_CLASS, LocalApplicationRunner.class.getName()) - .put( - JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, - PassthroughJobCoordinatorFactory.class.getName()) - .put(GROUPER_FACTORY, SingleContainerGrouperFactory.class.getName()) - .put(COMMIT_MS, "-1") - .put("processor.id", "1") - .put( - // TODO: remove after SAMZA-1531 is resolved - ApplicationConfig.APP_RUN_ID, - System.currentTimeMillis() - + "-" - // use the most significant bits in UUID (8 digits) to avoid collision - + UUID.randomUUID().toString().substring(0, 8)) - .build(); - } - - public static Map yarnRunConfig() { - // Default Samza config using yarn deployment - return ImmutableMap.builder() - .put(APP_RUNNER_CLASS, RemoteApplicationRunner.class.getName()) - .put(JOB_FACTORY_CLASS, YarnJobFactory.class.getName()) - .build(); - } - - public static Map standAloneRunConfig() { - // Default Samza config using stand alone deployment - return ImmutableMap.builder() - .put(APP_RUNNER_CLASS, LocalApplicationRunner.class.getName()) - .put(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, ZkJobCoordinatorFactory.class.getName()) - .build(); - } - - private static Map createSystemConfig( - SamzaPipelineOptions options, Map config) { - final ImmutableMap.Builder configBuilder = - ImmutableMap.builder() - .put("stores.beamStore.key.serde", "byteArraySerde") - .put("stores.beamStore.msg.serde", "stateValueSerde") - .put( - "serializers.registry.stateValueSerde.class", - SamzaStoreStateInternals.StateValueSerdeFactory.class.getName()) - .put( - "serializers.registry.byteArraySerde.class", - SamzaStoreStateInternals.ByteArraySerdeFactory.class.getName()); - - // if config does not contain "stores.beamStore.factory" at this moment, - // then it is a stateless job. - if (!config.containsKey(BEAM_STORE_FACTORY)) { - options.setStateDurable(false); - configBuilder.put( - BEAM_STORE_FACTORY, - "org.apache.samza.storage.kv.inmemory.InMemoryKeyValueStorageEngineFactory"); - } - - LOG.info("Execution environment is {}", options.getSamzaExecutionEnvironment()); - switch (options.getSamzaExecutionEnvironment()) { - case YARN: - configBuilder.putAll(yarnRunConfig()); - break; - case STANDALONE: - configBuilder.putAll(standAloneRunConfig()); - break; - default: // LOCAL - configBuilder.putAll(localRunConfig()); - break; - } - - // TODO: remove after we sort out Samza task wrapper - configBuilder.put("samza.li.task.wrapper.enabled", "false"); - - return configBuilder.build(); - } - - static Map createRocksDBStoreConfig(SamzaPipelineOptions options) { - final ImmutableMap.Builder configBuilder = - ImmutableMap.builder() - .put(BEAM_STORE_FACTORY, RocksDbKeyValueStorageEngineFactory.class.getName()) - .put("stores.beamStore.rocksdb.compression", "lz4"); - - if (options.getStateDurable()) { - LOG.info("stateDurable is enabled"); - configBuilder.put("stores.beamStore.changelog", getChangelogTopic(options, "beamStore")); - configBuilder.put("job.host-affinity.enabled", "true"); - } - - return configBuilder.build(); - } - - private static void validateConfigs(SamzaPipelineOptions options, Map config) { - - // validate execution environment - switch (options.getSamzaExecutionEnvironment()) { - case YARN: - validateYarnRun(config); - break; - case STANDALONE: - validateZKStandAloneRun(config); - break; - default: - // do nothing - break; - } - } - - static String getChangelogTopic(SamzaPipelineOptions options, String storeName) { - return String.format( - "%s-%s-%s-changelog", options.getJobName(), options.getJobInstance(), storeName); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigContext.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigContext.java deleted file mode 100644 index 954f95a593ee..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ConfigContext.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Map; -import java.util.Set; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.util.StoreIdGenerator; -import org.apache.beam.sdk.runners.AppliedPTransform; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; - -/** Helper that provides context data such as output for config generation. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class ConfigContext { - private final Map idMap; - private AppliedPTransform currentTransform; - private final SamzaPipelineOptions options; - private final StoreIdGenerator storeIdGenerator; - - public ConfigContext( - Map idMap, Set nonUniqueStateIds, SamzaPipelineOptions options) { - this.idMap = idMap; - this.options = options; - this.storeIdGenerator = new StoreIdGenerator(nonUniqueStateIds); - } - - public void setCurrentTransform(AppliedPTransform currentTransform) { - this.currentTransform = currentTransform; - } - - public void clearCurrentTransform() { - this.currentTransform = null; - } - - @SuppressWarnings("unchecked") - public OutT getOutput(PTransform transform) { - return (OutT) Iterables.getOnlyElement(this.currentTransform.getOutputs().values()); - } - - public String getOutputId(TransformHierarchy.Node node) { - return getIdForPValue(Iterables.getOnlyElement(node.getOutputs().values())); - } - - public SamzaPipelineOptions getPipelineOptions() { - return this.options; - } - - public StoreIdGenerator getStoreIdGenerator() { - return storeIdGenerator; - } - - private String getIdForPValue(PValue pvalue) { - final String id = idMap.get(pvalue); - if (id == null) { - throw new IllegalArgumentException("No id mapping for value: " + pvalue); - } - return id; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/FlattenPCollectionsTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/FlattenPCollectionsTranslator.java deleted file mode 100644 index fe840ee86461..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/FlattenPCollectionsTranslator.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.beam.runners.samza.runtime.Op; -import org.apache.beam.runners.samza.runtime.OpAdapter; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.Flatten; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.samza.operators.MessageStream; - -/** - * Translates {@link org.apache.beam.sdk.transforms.Flatten.PCollections} to Samza merge operator. - */ -class FlattenPCollectionsTranslator implements TransformTranslator> { - @Override - public void translate( - Flatten.PCollections transform, TransformHierarchy.Node node, TranslationContext ctx) { - doTranslate(transform, node, ctx); - } - - private static void doTranslate( - Flatten.PCollections transform, TransformHierarchy.Node node, TranslationContext ctx) { - final PCollection output = ctx.getOutput(transform); - - final List>> inputStreams = new ArrayList<>(); - for (Map.Entry, PCollection> taggedPValue : node.getInputs().entrySet()) { - @SuppressWarnings("unchecked") - final PCollection input = (PCollection) taggedPValue.getValue(); - inputStreams.add(ctx.getMessageStream(input)); - } - - if (inputStreams.isEmpty()) { - // for some of the validateRunner tests only - final MessageStream> noOpStream = - ctx.getDummyStream() - .flatMapAsync( - OpAdapter.adapt((Op) (inputElement, emitter) -> {}, ctx)); - ctx.registerMessageStream(output, noOpStream); - return; - } - - ctx.registerMessageStream(output, mergeInputStreams(inputStreams)); - } - - @Override - public void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - doTranslatePortable(transform, ctx); - } - - private static void doTranslatePortable( - PipelineNode.PTransformNode transform, PortableTranslationContext ctx) { - final List>> inputStreams = ctx.getAllInputMessageStreams(transform); - final String outputId = ctx.getOutputId(transform); - - // For portable api there should be at least the impulse as a dummy input - // We will know once validateRunner tests are available for portable runners - checkState( - !inputStreams.isEmpty(), "no input streams defined for Flatten: %s", transform.getId()); - - ctx.registerMessageStream(outputId, mergeInputStreams(inputStreams)); - } - - // Merge multiple input streams into one, as this is what "flatten" is meant to do - private static MessageStream> mergeInputStreams( - List>> inputStreams) { - if (inputStreams.size() == 1) { - return Iterables.getOnlyElement(inputStreams); - } - final Set>> streamsToMerge = new HashSet<>(); - inputStreams.forEach( - stream -> { - if (!streamsToMerge.add(stream)) { - // Merge same streams. Make a copy of the current stream. - streamsToMerge.add(stream.map(m -> m)); - } - }); - - return MessageStream.mergeAll(streamsToMerge); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/GroupByKeyTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/GroupByKeyTranslator.java deleted file mode 100644 index 18d105e35e9f..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/GroupByKeyTranslator.java +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import static org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils.escape; - -import java.util.Map; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.core.KeyedWorkItem; -import org.apache.beam.runners.core.KeyedWorkItemCoder; -import org.apache.beam.runners.core.SystemReduceFn; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.runtime.DoFnOp; -import org.apache.beam.runners.samza.runtime.GroupByKeyOp; -import org.apache.beam.runners.samza.runtime.KvToKeyedWorkItemOp; -import org.apache.beam.runners.samza.runtime.OpAdapter; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.transforms.GroupWithoutRepartition; -import org.apache.beam.runners.samza.util.SamzaCoders; -import org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils; -import org.apache.beam.runners.samza.util.WindowUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.CombineFnBase; -import org.apache.beam.sdk.transforms.GroupByKey; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.AppliedCombineFn; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.samza.operators.MessageStream; -import org.apache.samza.serializers.KVSerde; - -/** Translates {@link GroupByKey} to Samza {@link GroupByKeyOp}. */ -@SuppressWarnings({"keyfor", "nullness"}) // TODO(https://github.com/apache/beam/issues/20497) -class GroupByKeyTranslator - implements TransformTranslator< - PTransform>, PCollection>>>, - TransformConfigGenerator< - PTransform>, PCollection>>> { - - @Override - public void translate( - PTransform>, PCollection>> transform, - TransformHierarchy.Node node, - TranslationContext ctx) { - doTranslate(transform, node, ctx); - } - - private static void doTranslate( - PTransform>, PCollection>> transform, - TransformHierarchy.Node node, - TranslationContext ctx) { - final PCollection> input = ctx.getInput(transform); - - final PCollection> output = ctx.getOutput(transform); - final TupleTag> outputTag = ctx.getOutputTag(transform); - - @SuppressWarnings("unchecked") - final WindowingStrategy windowingStrategy = - (WindowingStrategy) input.getWindowingStrategy(); - - final MessageStream>> inputStream = ctx.getMessageStream(input); - - final KvCoder kvInputCoder = (KvCoder) input.getCoder(); - final Coder>> elementCoder = SamzaCoders.of(input); - - final SystemReduceFn reduceFn = - getSystemReduceFn(transform, input.getPipeline(), kvInputCoder); - - final MessageStream>> outputStream = - doTranslateGBK( - inputStream, - needRepartition(node, ctx), - reduceFn, - windowingStrategy, - kvInputCoder, - elementCoder, - ctx, - outputTag, - input.isBounded()); - - ctx.registerMessageStream(output, outputStream); - } - - @Override - public void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - final String inputId = ctx.getInputId(transform); - final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); - final MessageStream>> inputStream = ctx.getMessageStreamById(inputId); - final WindowingStrategy windowingStrategy = - WindowUtils.getWindowStrategy(inputId, pipeline.getComponents()); - final WindowedValues.WindowedValueCoder> windowedInputCoder = - WindowUtils.instantiateWindowedCoder(inputId, pipeline.getComponents()); - final TupleTag> outputTag = - new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet())); - - final MessageStream>> outputStream = - doTranslatePortable( - input, inputStream, windowingStrategy, windowedInputCoder, outputTag, ctx); - - ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); - } - - @Override - public Map createConfig( - PTransform>, PCollection>> transform, - TransformHierarchy.Node node, - ConfigContext ctx) { - return ConfigBuilder.createRocksDBStoreConfig(ctx.getPipelineOptions()); - } - - @Override - public Map createPortableConfig( - PipelineNode.PTransformNode transform, SamzaPipelineOptions options) { - return ConfigBuilder.createRocksDBStoreConfig(options); - } - - /** - * The method is used to translate both portable GBK transform as well as grouping side inputs - * into Samza. - */ - static MessageStream>> doTranslatePortable( - RunnerApi.PCollection input, - MessageStream>> inputStream, - WindowingStrategy windowingStrategy, - WindowedValues.WindowedValueCoder> windowedInputCoder, - TupleTag> outputTag, - PortableTranslationContext ctx) { - final boolean needRepartition = ctx.getPipelineOptions().getMaxSourceParallelism() > 1; - final Coder windowCoder = windowingStrategy.getWindowFn().windowCoder(); - final KvCoder kvInputCoder = (KvCoder) windowedInputCoder.getValueCoder(); - final Coder>> elementCoder = - WindowedValues.FullWindowedValueCoder.of(kvInputCoder, windowCoder); - - @SuppressWarnings("unchecked") - final SystemReduceFn reduceFn = - (SystemReduceFn) - SystemReduceFn.buffering(kvInputCoder.getValueCoder()); - - final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); - - return doTranslateGBK( - inputStream, - needRepartition, - reduceFn, - windowingStrategy, - kvInputCoder, - elementCoder, - ctx, - outputTag, - isBounded); - } - - private static MessageStream>> doTranslateGBK( - MessageStream>> inputStream, - boolean needRepartition, - SystemReduceFn reduceFn, - WindowingStrategy windowingStrategy, - KvCoder kvInputCoder, - Coder>> elementCoder, - TranslationContext ctx, - TupleTag> outputTag, - PCollection.IsBounded isBounded) { - final MessageStream>> filteredInputStream = - inputStream.filter(msg -> msg.getType() == OpMessage.Type.ELEMENT); - - final MessageStream>> partitionedInputStream; - if (!needRepartition) { - partitionedInputStream = filteredInputStream; - } else { - partitionedInputStream = - filteredInputStream - .partitionBy( - msg -> msg.getElement().getValue().getKey(), - msg -> msg.getElement(), - KVSerde.of( - SamzaCoders.toSerde(kvInputCoder.getKeyCoder()), - SamzaCoders.toSerde(elementCoder)), - "gbk-" + escape(ctx.getTransformId())) - .map(kv -> OpMessage.ofElement(kv.getValue())); - } - - final Coder> keyedWorkItemCoder = - KeyedWorkItemCoder.of( - kvInputCoder.getKeyCoder(), - kvInputCoder.getValueCoder(), - windowingStrategy.getWindowFn().windowCoder()); - - final MessageStream>> outputStream = - partitionedInputStream - .flatMapAsync(OpAdapter.adapt(new KvToKeyedWorkItemOp<>(), ctx)) - .flatMapAsync( - OpAdapter.adapt( - new GroupByKeyOp<>( - outputTag, - keyedWorkItemCoder, - reduceFn, - windowingStrategy, - new DoFnOp.SingleOutputManagerFactory<>(), - ctx.getTransformFullName(), - ctx.getTransformId(), - isBounded), - ctx)); - return outputStream; - } - - @SuppressWarnings("unchecked") - private static - SystemReduceFn getSystemReduceFn( - PTransform>, PCollection>> transform, - Pipeline pipeline, - KvCoder kvInputCoder) { - if (transform instanceof GroupByKey) { - return (SystemReduceFn) - SystemReduceFn.buffering(kvInputCoder.getValueCoder()); - } else if (transform instanceof Combine.PerKey) { - final CombineFnBase.GlobalCombineFn combineFn = - ((Combine.PerKey) transform).getFn(); - return SystemReduceFn.combining( - kvInputCoder.getKeyCoder(), - AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder)); - } else { - throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey."); - } - } - - private static boolean needRepartition(TransformHierarchy.Node node, TranslationContext ctx) { - if (ctx.getPipelineOptions().getMaxSourceParallelism() == 1) { - // Only one task will be created, no need for repartition - return false; - } - - if (node == null) { - return true; - } - - if (node.getTransform() instanceof GroupWithoutRepartition) { - return false; - } else { - return needRepartition(node.getEnclosingNode(), ctx); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ImpulseTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ImpulseTranslator.java deleted file mode 100644 index 35d8f1f39962..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ImpulseTranslator.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils; -import org.apache.beam.sdk.runners.TransformHierarchy.Node; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.sdk.values.PBegin; -import org.apache.beam.sdk.values.PCollection; -import org.apache.samza.operators.KV; -import org.apache.samza.serializers.KVSerde; -import org.apache.samza.serializers.NoOpSerde; -import org.apache.samza.serializers.Serde; -import org.apache.samza.system.descriptors.GenericInputDescriptor; -import org.apache.samza.system.descriptors.GenericSystemDescriptor; - -/** - * Translate {@link org.apache.beam.sdk.transforms.Impulse} to a samza message stream produced by - * {@link - * org.apache.beam.runners.samza.translation.SamzaImpulseSystemFactory.SamzaImpulseSystemConsumer}. - */ -@SuppressWarnings({ - "rawtypes" // TODO(https://github.com/apache/beam/issues/20447) -}) -public class ImpulseTranslator - implements TransformTranslator>> { - - @Override - public void translate( - PTransform> transform, Node node, TranslationContext ctx) { - final PCollection output = ctx.getOutput(transform); - final String outputId = ctx.getIdForPValue(output); - final GenericSystemDescriptor systemDescriptor = - new GenericSystemDescriptor(outputId, SamzaImpulseSystemFactory.class.getName()); - - // The KvCoder is needed here for Samza not to crop the key. - final Serde>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>()); - final GenericInputDescriptor>> inputDescriptor = - systemDescriptor.getInputDescriptor(outputId, kvSerde); - - ctx.registerInputMessageStream(output, inputDescriptor); - } - - @Override - public void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - - final String outputId = ctx.getOutputId(transform); - final String escapedOutputId = SamzaPipelineTranslatorUtils.escape(outputId); - final GenericSystemDescriptor systemDescriptor = - new GenericSystemDescriptor(escapedOutputId, SamzaImpulseSystemFactory.class.getName()); - - // The KvCoder is needed here for Samza not to crop the key. - final Serde>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>()); - final GenericInputDescriptor>> inputDescriptor = - systemDescriptor.getInputDescriptor(escapedOutputId, kvSerde); - - ctx.registerInputMessageStream(outputId, inputDescriptor); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/PViewToIdMapper.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/PViewToIdMapper.java deleted file mode 100644 index f7bca872759d..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/PViewToIdMapper.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.util.NameUtils; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.PValue; - -/** - * This class generates an ID for each {@link PValue} during a topological traversal of the BEAM - * {@link Pipeline}. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class PViewToIdMapper extends Pipeline.PipelineVisitor.Defaults { - private final Map idMap = new HashMap<>(); - private int nextId; - - public static Map buildIdMap(Pipeline pipeline) { - final PViewToIdMapper mapper = new PViewToIdMapper(); - pipeline.traverseTopologically(mapper); - return mapper.getIdMap(); - } - - private PViewToIdMapper() {} - - @Override - public void visitValue(PValue value, TransformHierarchy.Node producer) { - final String valueDesc = pValueToString(value).replaceFirst(".*:([a-zA-Z#0-9]+).*", "$1"); - - final String samzaSafeValueDesc = valueDesc.replaceAll("[^A-Za-z0-9_-]", "_"); - - idMap.put(value, String.format("%d-%s", nextId++, samzaSafeValueDesc)); - } - - @Override - public void visitPrimitiveTransform(TransformHierarchy.Node node) { - if (node.getTransform() instanceof SamzaPublishView) { - final PCollectionView view = ((SamzaPublishView) node.getTransform()).getView(); - visitValue(view, node); - } - } - - public Map getIdMap() { - return Collections.unmodifiableMap(idMap); - } - - /** - * This method is created to replace the {@link org.apache.beam.sdk.values.PValueBase#toString()} - * with the old implementation that doesn't contain the hashcode. - */ - private static String pValueToString(PValue value) { - String name; - try { - name = value.getName(); - } catch (IllegalStateException e) { - name = ""; - } - return name + " [" + NameUtils.approximateSimpleName(value.getClass()) + "]"; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ParDoBoundMultiTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ParDoBoundMultiTranslator.java deleted file mode 100644 index 097eb4c256a3..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ParDoBoundMultiTranslator.java +++ /dev/null @@ -1,563 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import static org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.ServiceLoader; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.SideInputId; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.runtime.DoFnOp; -import org.apache.beam.runners.samza.runtime.Op; -import org.apache.beam.runners.samza.runtime.OpAdapter; -import org.apache.beam.runners.samza.runtime.OpEmitter; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.runtime.PortableDoFnOp; -import org.apache.beam.runners.samza.runtime.SamzaDoFnInvokerRegistrar; -import org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils; -import org.apache.beam.runners.samza.util.StateUtils; -import org.apache.beam.runners.samza.util.WindowUtils; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.IterableCoder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.coders.VoidCoder; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.ViewFn; -import org.apache.beam.sdk.transforms.join.RawUnionValue; -import org.apache.beam.sdk.transforms.reflect.DoFnSignature; -import org.apache.beam.sdk.transforms.reflect.DoFnSignatures; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.construction.ParDoTranslation; -import org.apache.beam.sdk.util.construction.RunnerPCollectionView; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.PCollectionViews; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterators; -import org.apache.samza.operators.MessageStream; -import org.apache.samza.operators.functions.FlatMapFunction; -import org.apache.samza.operators.functions.WatermarkFunction; -import org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory; -import org.joda.time.Instant; - -/** - * Translates {@link org.apache.beam.sdk.transforms.ParDo.MultiOutput} or ExecutableStage in - * portable api to Samza {@link DoFnOp}. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class ParDoBoundMultiTranslator - implements TransformTranslator>, - TransformConfigGenerator> { - - private final SamzaDoFnInvokerRegistrar doFnInvokerRegistrar; - - ParDoBoundMultiTranslator() { - final Iterator invokerReg = - ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator(); - doFnInvokerRegistrar = invokerReg.hasNext() ? Iterators.getOnlyElement(invokerReg) : null; - } - - @Override - public void translate( - ParDo.MultiOutput transform, - TransformHierarchy.Node node, - TranslationContext ctx) { - doTranslate(transform, node, ctx); - } - - // static for serializing anonymous functions - private static void doTranslate( - ParDo.MultiOutput transform, - TransformHierarchy.Node node, - TranslationContext ctx) { - final PCollection input = ctx.getInput(transform); - final Map, Coder> outputCoders = - ctx.getCurrentTransform().getOutputs().entrySet().stream() - .filter(e -> e.getValue() != null) - .collect( - Collectors.toMap(e -> e.getKey(), e -> ((PCollection) e.getValue()).getCoder())); - - final Coder keyCoder = - StateUtils.isStateful(transform.getFn()) - ? ((KvCoder) input.getCoder()).getKeyCoder() - : null; - - if (DoFnSignatures.isSplittable(transform.getFn())) { - throw new UnsupportedOperationException("Splittable DoFn is not currently supported"); - } - if (DoFnSignatures.requiresTimeSortedInput(transform.getFn())) { - throw new UnsupportedOperationException( - "@RequiresTimeSortedInput annotation is not currently supported"); - } - - final MessageStream> inputStream = ctx.getMessageStream(input); - final List>> sideInputStreams = - transform.getSideInputs().values().stream() - .map(ctx::getViewStream) - .collect(Collectors.toList()); - final ArrayList, PCollection>> outputs = - new ArrayList<>(node.getOutputs().entrySet()); - - final Map, Integer> tagToIndexMap = new HashMap<>(); - final Map> indexToPCollectionMap = new HashMap<>(); - - for (int index = 0; index < outputs.size(); ++index) { - final Map.Entry, PCollection> taggedOutput = outputs.get(index); - tagToIndexMap.put(taggedOutput.getKey(), index); - - if (taggedOutput.getValue() == null) { - throw new IllegalArgumentException( - "Expected side output to be PCollection, but was: " + taggedOutput.getValue()); - } - final PCollection sideOutputCollection = taggedOutput.getValue(); - indexToPCollectionMap.put(index, sideOutputCollection); - } - - final HashMap> idToPValueMap = new HashMap<>(); - for (PCollectionView view : transform.getSideInputs().values()) { - idToPValueMap.put(ctx.getViewId(view), view); - } - - DoFnSchemaInformation doFnSchemaInformation; - doFnSchemaInformation = ParDoTranslation.getSchemaInformation(ctx.getCurrentTransform()); - - Map> sideInputMapping = - ParDoTranslation.getSideInputMapping(ctx.getCurrentTransform()); - - final DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass()); - final Map stateIdToStoreMapping = new HashMap<>(); - for (String stateId : signature.stateDeclarations().keySet()) { - final String transformFullName = node.getEnclosingNode().getFullName(); - final String storeId = ctx.getStoreIdGenerator().getId(stateId, transformFullName); - stateIdToStoreMapping.put(stateId, storeId); - } - final DoFnOp op = - new DoFnOp<>( - transform.getMainOutputTag(), - transform.getFn(), - keyCoder, - (Coder) input.getCoder(), - null, - outputCoders, - transform.getSideInputs().values(), - transform.getAdditionalOutputTags().getAll(), - input.getWindowingStrategy(), - idToPValueMap, - new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), - ctx.getTransformFullName(), - ctx.getTransformId(), - input.isBounded(), - false, - null, - null, - Collections.emptyMap(), - doFnSchemaInformation, - sideInputMapping, - stateIdToStoreMapping); - - final MessageStream> mergedStreams; - if (sideInputStreams.isEmpty()) { - mergedStreams = inputStream; - } else { - MessageStream> mergedSideInputStreams = - MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn()); - mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams)); - } - - final MessageStream> taggedOutputStream = - mergedStreams.flatMapAsync(OpAdapter.adapt(op, ctx)); - - for (int outputIndex : tagToIndexMap.values()) { - @SuppressWarnings("unchecked") - final MessageStream> outputStream = - taggedOutputStream - .filter( - message -> - message.getType() != OpMessage.Type.ELEMENT - || message.getElement().getValue().getUnionTag() == outputIndex) - .flatMapAsync(OpAdapter.adapt(new RawUnionValueToValue(), ctx)); - - ctx.registerMessageStream(indexToPCollectionMap.get(outputIndex), outputStream); - } - } - - /* - * We reuse ParDo translator to translate ExecutableStage - */ - @Override - public void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - doTranslatePortable(transform, pipeline, ctx); - } - - // static for serializing anonymous functions - private static void doTranslatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - Map outputs = transform.getTransform().getOutputsMap(); - - final RunnerApi.ExecutableStagePayload stagePayload; - try { - stagePayload = - RunnerApi.ExecutableStagePayload.parseFrom( - transform.getTransform().getSpec().getPayload()); - } catch (IOException e) { - throw new RuntimeException(e); - } - - String inputId = stagePayload.getInput(); - final MessageStream> inputStream = ctx.getMessageStreamById(inputId); - - // Analyze side inputs - final List>>> sideInputStreams = new ArrayList<>(); - final Map> sideInputMapping = new HashMap<>(); - final Map> idToViewMapping = new HashMap<>(); - final RunnerApi.Components components = stagePayload.getComponents(); - for (SideInputId sideInputId : stagePayload.getSideInputsList()) { - final String sideInputCollectionId = - components - .getTransformsOrThrow(sideInputId.getTransformId()) - .getInputsOrThrow(sideInputId.getLocalName()); - final WindowingStrategy windowingStrategy = - WindowUtils.getWindowStrategy(sideInputCollectionId, components); - final WindowedValues.WindowedValueCoder coder = - (WindowedValues.WindowedValueCoder) instantiateCoder(sideInputCollectionId, components); - - // Create a runner-side view - final PCollectionView view = createPCollectionView(sideInputId, coder, windowingStrategy); - - // Use GBK to aggregate the side inputs and then broadcast it out - final MessageStream>> broadcastSideInput = - groupAndBroadcastSideInput( - sideInputId, - sideInputCollectionId, - components.getPcollectionsOrThrow(sideInputCollectionId), - (WindowingStrategy) windowingStrategy, - coder, - ctx); - - sideInputStreams.add(broadcastSideInput); - sideInputMapping.put(sideInputId, view); - idToViewMapping.put(getSideInputUniqueId(sideInputId), view); - } - - final Map, Integer> tagToIndexMap = new HashMap<>(); - final Map indexToIdMap = new HashMap<>(); - final Map> idToTupleTagMap = new HashMap<>(); - - // first output as the main output - final TupleTag mainOutputTag = - outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next()); - - AtomicInteger index = new AtomicInteger(0); - outputs - .keySet() - .iterator() - .forEachRemaining( - outputName -> { - TupleTag tupleTag = new TupleTag<>(outputName); - tagToIndexMap.put(tupleTag, index.get()); - String collectionId = outputs.get(outputName); - indexToIdMap.put(index.get(), collectionId); - idToTupleTagMap.put(collectionId, tupleTag); - index.incrementAndGet(); - }); - - WindowedValues.WindowedValueCoder windowedInputCoder = - WindowUtils.instantiateWindowedCoder(inputId, pipeline.getComponents()); - - // TODO: support schema and side inputs for portable runner - // Note: transform.getTransform() is an ExecutableStage, not ParDo, so we need to extract - // these info from its components. - final DoFnSchemaInformation doFnSchemaInformation = null; - - final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); - final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); - - // No key coder information required for handing the stateless stage or stage with user states - // The key coder information is required for handing the stage with user timers - final Coder timerKeyCoder = - stagePayload.getTimersCount() > 0 - ? ((KvCoder) - ((WindowedValues.FullWindowedValueCoder) windowedInputCoder).getValueCoder()) - .getKeyCoder() - : null; - - final PortableDoFnOp op = - new PortableDoFnOp<>( - mainOutputTag, - new NoOpDoFn<>(), - timerKeyCoder, - windowedInputCoder.getValueCoder(), // input coder not in use - windowedInputCoder, - Collections.emptyMap(), // output coders not in use - new ArrayList<>(sideInputMapping.values()), - new ArrayList<>(idToTupleTagMap.values()), // used by java runner only - WindowUtils.getWindowStrategy(inputId, stagePayload.getComponents()), - idToViewMapping, - new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), - ctx.getTransformFullName(), - ctx.getTransformId(), - isBounded, - true, - stagePayload, - ctx.getJobInfo(), - idToTupleTagMap, - doFnSchemaInformation, - sideInputMapping, - Collections.emptyMap()); - - final MessageStream> mergedStreams; - if (sideInputStreams.isEmpty()) { - mergedStreams = inputStream; - } else { - MessageStream> mergedSideInputStreams = - MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn()); - mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams)); - } - - final MessageStream> taggedOutputStream = - mergedStreams.flatMapAsync(OpAdapter.adapt(op, ctx)); - - for (int outputIndex : tagToIndexMap.values()) { - @SuppressWarnings("unchecked") - final MessageStream> outputStream = - taggedOutputStream - .filter( - message -> - message.getType() != OpMessage.Type.ELEMENT - || message.getElement().getValue().getUnionTag() == outputIndex) - .flatMapAsync(OpAdapter.adapt(new RawUnionValueToValue(), ctx)); - - ctx.registerMessageStream(indexToIdMap.get(outputIndex), outputStream); - } - } - - @Override - public Map createConfig( - ParDo.MultiOutput transform, TransformHierarchy.Node node, ConfigContext ctx) { - final Map config = new HashMap<>(); - final DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass()); - final SamzaPipelineOptions options = ctx.getPipelineOptions(); - - // If a ParDo observes directly or indirectly with window, then this is a stateful ParDo - // in this case, we will use RocksDB as system store. - if (signature.processElement().observesWindow()) { - config.putAll(ConfigBuilder.createRocksDBStoreConfig(options)); - } - - if (signature.usesState()) { - // set up user state configs - for (String stateId : signature.stateDeclarations().keySet()) { - final String transformFullName = node.getEnclosingNode().getFullName(); - final String storeId = ctx.getStoreIdGenerator().getId(stateId, transformFullName); - config.put( - "stores." + storeId + ".factory", RocksDbKeyValueStorageEngineFactory.class.getName()); - config.put("stores." + storeId + ".key.serde", "byteArraySerde"); - config.put("stores." + storeId + ".msg.serde", "stateValueSerde"); - config.put("stores." + storeId + ".rocksdb.compression", "lz4"); - - if (options.getStateDurable()) { - config.put( - "stores." + storeId + ".changelog", - ConfigBuilder.getChangelogTopic(options, storeId)); - } - } - } - - if (doFnInvokerRegistrar != null) { - config.putAll(doFnInvokerRegistrar.configFor(transform.getFn())); - } - - return config; - } - - @Override - public Map createPortableConfig( - PipelineNode.PTransformNode transform, SamzaPipelineOptions options) { - - final RunnerApi.ExecutableStagePayload stagePayload; - try { - stagePayload = - RunnerApi.ExecutableStagePayload.parseFrom( - transform.getTransform().getSpec().getPayload()); - } catch (IOException e) { - throw new RuntimeException(e); - } - - if (!StateUtils.isStateful(stagePayload)) { - return Collections.emptyMap(); - } - - final Map config = - new HashMap<>(ConfigBuilder.createRocksDBStoreConfig(options)); - for (RunnerApi.ExecutableStagePayload.UserStateId stateId : stagePayload.getUserStatesList()) { - final String storeId = stateId.getLocalName(); - - config.put( - "stores." + storeId + ".factory", RocksDbKeyValueStorageEngineFactory.class.getName()); - config.put("stores." + storeId + ".key.serde", "byteArraySerde"); - config.put("stores." + storeId + ".msg.serde", "stateValueSerde"); - config.put("stores." + storeId + ".rocksdb.compression", "lz4"); - - if (options.getStateDurable()) { - config.put( - "stores." + storeId + ".changelog", ConfigBuilder.getChangelogTopic(options, storeId)); - } - } - - return config; - } - - @SuppressWarnings("unchecked") - private static final ViewFn>, ?> VIEW_FN = - (ViewFn) - new PCollectionViews.MultimapViewFn<>( - (PCollectionViews.TypeDescriptorSupplier>>) - () -> TypeDescriptors.iterables(new TypeDescriptor>() {}), - (PCollectionViews.TypeDescriptorSupplier) TypeDescriptors::voids); - - // This method follows the same way in Flink to create a runner-side Java - // PCollectionView to represent a portable side input. - private static PCollectionView createPCollectionView( - SideInputId sideInputId, - WindowedValues.WindowedValueCoder coder, - WindowingStrategy windowingStrategy) { - - return new RunnerPCollectionView<>( - null, - new TupleTag<>(sideInputId.getLocalName()), - VIEW_FN, - // TODO: support custom mapping fn - windowingStrategy.getWindowFn().getDefaultWindowMappingFn(), - windowingStrategy, - coder.getValueCoder()); - } - - // Group the side input globally with a null key and then broadcast it - // to all tasks. - private static - MessageStream>> groupAndBroadcastSideInput( - SideInputId sideInputId, - String sideInputCollectionId, - RunnerApi.PCollection sideInputPCollection, - WindowingStrategy windowingStrategy, - WindowedValues.WindowedValueCoder coder, - PortableTranslationContext ctx) { - final MessageStream> sideInput = - ctx.getMessageStreamById(sideInputCollectionId); - final MessageStream>> keyedSideInput = - sideInput.map( - opMessage -> { - WindowedValue wv = opMessage.getElement(); - return OpMessage.ofElement(wv.withValue(KV.of(null, wv.getValue()))); - }); - final WindowedValues.WindowedValueCoder> kvCoder = - coder.withValueCoder(KvCoder.of(VoidCoder.of(), coder.getValueCoder())); - final MessageStream>>> groupedSideInput = - GroupByKeyTranslator.doTranslatePortable( - sideInputPCollection, - keyedSideInput, - windowingStrategy, - kvCoder, - new TupleTag<>("main output"), - ctx); - final MessageStream>> nonkeyGroupedSideInput = - groupedSideInput.map( - opMessage -> { - WindowedValue>> wv = opMessage.getElement(); - return OpMessage.ofElement(wv.withValue(wv.getValue().getValue())); - }); - final MessageStream>> broadcastSideInput = - SamzaPublishViewTranslator.doTranslate( - nonkeyGroupedSideInput, - coder.withValueCoder(IterableCoder.of(coder.getValueCoder())), - ctx.getTransformId(), - getSideInputUniqueId(sideInputId), - ctx.getPipelineOptions()); - - return broadcastSideInput; - } - - private static String getSideInputUniqueId(SideInputId sideInputId) { - return sideInputId.getTransformId() + "-" + sideInputId.getLocalName(); - } - - static class SideInputWatermarkFn - implements FlatMapFunction, OpMessage>, - WatermarkFunction> { - - @Override - public Collection> apply(OpMessage message) { - return Collections.singletonList(message); - } - - @Override - public Collection> processWatermark(long watermark) { - return Collections.singletonList(OpMessage.ofSideInputWatermark(new Instant(watermark))); - } - - @Override - public Long getOutputWatermark() { - // Always return max so the side input watermark will not be aggregated with main inputs. - return Long.MAX_VALUE; - } - } - - static class RawUnionValueToValue implements Op { - @Override - public void processElement(WindowedValue inputElement, OpEmitter emitter) { - @SuppressWarnings("unchecked") - final OutT value = (OutT) inputElement.getValue().getValue(); - emitter.emitElement(inputElement.withValue(value)); - } - } - - private static class NoOpDoFn extends DoFn { - @ProcessElement - public void doNothing(@SuppressWarnings("unused") ProcessContext context) {} - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/PortableTranslationContext.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/PortableTranslationContext.java deleted file mode 100644 index 8821ee62928d..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/PortableTranslationContext.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.beam.runners.fnexecution.provisioning.JobInfo; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.samza.application.descriptors.StreamApplicationDescriptor; -import org.apache.samza.operators.KV; -import org.apache.samza.operators.MessageStream; -import org.apache.samza.system.descriptors.InputDescriptor; - -/** - * Helper that keeps the mapping from BEAM PCollection id to Samza {@link MessageStream}. It also - * provides other context data such as input and output of a {@link - * org.apache.beam.model.pipeline.v1.RunnerApi.PTransform}. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class PortableTranslationContext extends TranslationContext { - private final Map> messageStreams = new HashMap<>(); - private final JobInfo jobInfo; - - private PipelineNode.PTransformNode currentTransform; - - public PortableTranslationContext( - StreamApplicationDescriptor appDescriptor, SamzaPipelineOptions options, JobInfo jobInfo) { - super(appDescriptor, Collections.emptyMap(), Collections.emptySet(), options); - this.jobInfo = jobInfo; - } - - public List>> getAllInputMessageStreams( - PipelineNode.PTransformNode transform) { - final Collection inputStreamIds = transform.getTransform().getInputsMap().values(); - return inputStreamIds.stream().map(this::getMessageStreamById).collect(Collectors.toList()); - } - - public MessageStream> getOneInputMessageStream( - PipelineNode.PTransformNode transform) { - String id = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values()); - return getMessageStreamById(id); - } - - @SuppressWarnings("unchecked") - public MessageStream> getMessageStreamById(String id) { - return (MessageStream>) messageStreams.get(id); - } - - public String getInputId(PipelineNode.PTransformNode transform) { - return Iterables.getOnlyElement(transform.getTransform().getInputsMap().values()); - } - - public String getOutputId(PipelineNode.PTransformNode transform) { - return Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()); - } - - public JobInfo getJobInfo() { - return jobInfo; - } - - public void registerMessageStream(String id, MessageStream> stream) { - if (messageStreams.containsKey(id)) { - throw new IllegalArgumentException("Stream already registered for id: " + id); - } - messageStreams.put(id, stream); - } - - /** Register an input stream with certain config id. */ - public void registerInputMessageStream( - String id, InputDescriptor>, ?> inputDescriptor) { - registerInputMessageStreams(id, Collections.singletonList(inputDescriptor)); - } - - public void registerInputMessageStreams( - String id, List>, ?>> inputDescriptors) { - registerInputMessageStreams(id, inputDescriptors, this::registerMessageStream); - } - - public void setCurrentTransform(PipelineNode.PTransformNode currentTransform) { - this.currentTransform = currentTransform; - } - - @Override - public void clearCurrentTransform() { - this.currentTransform = null; - } - - @Override - public String getTransformFullName() { - return currentTransform.getTransform().getUniqueName(); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReadTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReadTranslator.java deleted file mode 100644 index a666e36ad02f..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReadTranslator.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Map; -import org.apache.beam.runners.core.serialization.Base64Serializer; -import org.apache.beam.runners.samza.adapter.BoundedSourceSystem; -import org.apache.beam.runners.samza.adapter.UnboundedSourceSystem; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.util.SamzaCoders; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.BoundedSource; -import org.apache.beam.sdk.io.Source; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.SplittableParDo; -import org.apache.beam.sdk.values.PBegin; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.samza.operators.KV; -import org.apache.samza.serializers.KVSerde; -import org.apache.samza.serializers.NoOpSerde; -import org.apache.samza.serializers.Serde; -import org.apache.samza.system.descriptors.GenericInputDescriptor; -import org.apache.samza.system.descriptors.GenericSystemDescriptor; - -/** - * Translates {@link org.apache.beam.sdk.io.Read} to Samza input {@link - * org.apache.samza.operators.MessageStream}. - */ -public class ReadTranslator implements TransformTranslator>> { - - @Override - public void translate( - PTransform> transform, - TransformHierarchy.Node node, - TranslationContext ctx) { - final PCollection output = ctx.getOutput(transform); - final Coder> coder = SamzaCoders.of(output); - final Source source = - transform instanceof SplittableParDo.PrimitiveBoundedRead - ? ((SplittableParDo.PrimitiveBoundedRead) transform).getSource() - : ((SplittableParDo.PrimitiveUnboundedRead) transform).getSource(); - final String id = ctx.getIdForPValue(output); - - // Create system descriptor - final GenericSystemDescriptor systemDescriptor; - if (source instanceof BoundedSource) { - systemDescriptor = - new GenericSystemDescriptor(id, BoundedSourceSystem.Factory.class.getName()); - } else { - systemDescriptor = - new GenericSystemDescriptor(id, UnboundedSourceSystem.Factory.class.getName()); - } - - final Map systemConfig = - ImmutableMap.of( - "source", Base64Serializer.serializeUnchecked(source), - "coder", Base64Serializer.serializeUnchecked(coder), - "stepName", node.getFullName()); - systemDescriptor.withSystemConfigs(systemConfig); - - // Create stream descriptor - @SuppressWarnings("unchecked") - final Serde>> kvSerde = - (Serde) KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); - final GenericInputDescriptor>> inputDescriptor = - systemDescriptor.getInputDescriptor(id, kvSerde); - if (source instanceof BoundedSource) { - inputDescriptor.isBounded(); - } - - ctx.registerInputMessageStream(output, inputDescriptor); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/RedistributeByKeyTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/RedistributeByKeyTranslator.java deleted file mode 100644 index e59d74abe38f..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/RedistributeByKeyTranslator.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import com.google.auto.service.AutoService; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.NativeTransforms; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; - -/** - * Translates Reshuffle transform into Samza's native partitionBy operator, which will partition - * each incoming message by the key into a Task corresponding to that key. - */ -public class RedistributeByKeyTranslator - implements TransformTranslator>, PCollection>>> { - - private final ReshuffleTranslator reshuffleTranslator = - new ReshuffleTranslator<>("rdstr-"); - - @Override - public void translate( - PTransform>, PCollection>> transform, - TransformHierarchy.Node node, - TranslationContext ctx) { - reshuffleTranslator.translate(transform, node, ctx); - } - - @Override - public void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - reshuffleTranslator.translatePortable(transform, pipeline, ctx); - } - - /** Predicate to determine whether a URN is a Samza native transform. */ - @AutoService(NativeTransforms.IsNativeTransform.class) - public static class IsSamzaNativeTransform implements NativeTransforms.IsNativeTransform { - @Override - public boolean test(RunnerApi.PTransform pTransform) { - return false; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReshuffleTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReshuffleTranslator.java deleted file mode 100644 index c318505d9849..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReshuffleTranslator.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import com.google.auto.service.AutoService; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.util.SamzaCoders; -import org.apache.beam.runners.samza.util.WindowUtils; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.NativeTransforms; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.samza.operators.MessageStream; -import org.apache.samza.serializers.KVSerde; - -/** - * Translates Reshuffle transform into Samza's native partitionBy operator, which will partition - * each incoming message by the key into a Task corresponding to that key. - */ -public class ReshuffleTranslator - implements TransformTranslator>, PCollection>>> { - - private final String prefix; - - ReshuffleTranslator(String prefix) { - this.prefix = prefix; - } - - ReshuffleTranslator() { - this("rshfl-"); - } - - @Override - public void translate( - PTransform>, PCollection>> transform, - TransformHierarchy.Node node, - TranslationContext ctx) { - - final PCollection> input = ctx.getInput(transform); - final PCollection> output = ctx.getOutput(transform); - final MessageStream>> inputStream = ctx.getMessageStream(input); - // input will be OpMessage of Windowed>> - final KvCoder inputCoder = (KvCoder) input.getCoder(); - final Coder>> elementCoder = SamzaCoders.of(input); - - final MessageStream>> outputStream = - doTranslate( - inputStream, - inputCoder.getKeyCoder(), - elementCoder, - prefix + ctx.getTransformId(), - ctx.getPipelineOptions().getMaxSourceParallelism() > 1); - - ctx.registerMessageStream(output, outputStream); - } - - @Override - public void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - - final String inputId = ctx.getInputId(transform); - final MessageStream>> inputStream = ctx.getMessageStreamById(inputId); - final WindowedValues.WindowedValueCoder> windowedInputCoder = - WindowUtils.instantiateWindowedCoder(inputId, pipeline.getComponents()); - final String outputId = ctx.getOutputId(transform); - - final MessageStream>> outputStream = - doTranslate( - inputStream, - ((KvCoder) windowedInputCoder.getValueCoder()).getKeyCoder(), - windowedInputCoder, - prefix + ctx.getTransformId(), - ctx.getPipelineOptions().getMaxSourceParallelism() > 1); - - ctx.registerMessageStream(outputId, outputStream); - } - - private static MessageStream>> doTranslate( - MessageStream>> inputStream, - Coder keyCoder, - Coder>> valueCoder, - String partitionById, // will be used in the intermediate stream name - boolean needRepartition) { - - return needRepartition - ? inputStream - .filter(op -> OpMessage.Type.ELEMENT == op.getType()) - .partitionBy( - opMessage -> opMessage.getElement().getValue().getKey(), - OpMessage::getElement, // windowed value - KVSerde.of(SamzaCoders.toSerde(keyCoder), SamzaCoders.toSerde(valueCoder)), - partitionById) - // convert back to OpMessage - .map(kv -> OpMessage.ofElement(kv.getValue())) - : inputStream.filter(op -> OpMessage.Type.ELEMENT == op.getType()); - } - - /** Predicate to determine whether a URN is a Samza native transform. */ - @AutoService(NativeTransforms.IsNativeTransform.class) - public static class IsSamzaNativeTransform implements NativeTransforms.IsNativeTransform { - @Override - public boolean test(RunnerApi.PTransform pTransform) { - return false; - // Re-enable after https://github.com/apache/beam/issues/21188 is completed - // return PTransformTranslation.RESHUFFLE_URN.equals( - // PTransformTranslation.urnForTransformOrNull(pTransform)); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaImpulseSystemFactory.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaImpulseSystemFactory.java deleted file mode 100644 index 4035c1610e9f..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaImpulseSystemFactory.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.samza.Partition; -import org.apache.samza.config.Config; -import org.apache.samza.metrics.MetricsRegistry; -import org.apache.samza.system.IncomingMessageEnvelope; -import org.apache.samza.system.SystemAdmin; -import org.apache.samza.system.SystemConsumer; -import org.apache.samza.system.SystemFactory; -import org.apache.samza.system.SystemProducer; -import org.apache.samza.system.SystemStreamMetadata; -import org.apache.samza.system.SystemStreamPartition; - -/** - * This is a trivial system for generating impulse event in Samza when translating IMPULSE transform - * in portable api. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaImpulseSystemFactory implements SystemFactory { - @Override - public SystemConsumer getConsumer( - String systemName, Config config, MetricsRegistry metricsRegistry) { - return new SamzaImpulseSystemConsumer(); - } - - @Override - public SystemProducer getProducer( - String systemName, Config config, MetricsRegistry metricsRegistry) { - throw new UnsupportedOperationException("SamzaImpulseSystem doesn't support producing"); - } - - @Override - public SystemAdmin getAdmin(String systemName, Config config) { - return new SamzaImpulseSystemAdmin(); - } - - private static final String DUMMY_OFFSET = "0"; - - /** System admin for ImpulseSystem. */ - public static class SamzaImpulseSystemAdmin implements SystemAdmin { - @Override - public Map getOffsetsAfter( - Map offset) { - return offset.keySet().stream() - .collect(Collectors.toMap(Function.identity(), k -> DUMMY_OFFSET)); - } - - @Override - public Map getSystemStreamMetadata(Set streamNames) { - return streamNames.stream() - .collect( - Collectors.toMap( - Function.identity(), - stream -> { - // Impulse system will always be single partition - Map - partitionMetadata = - Collections.singletonMap( - new Partition(0), - new SystemStreamMetadata.SystemStreamPartitionMetadata( - DUMMY_OFFSET, DUMMY_OFFSET, DUMMY_OFFSET)); - return new SystemStreamMetadata(stream, partitionMetadata); - })); - } - - @Override - public Integer offsetComparator(String offset1, String offset2) { - return 0; - } - } - - /** System consumer for ImpulseSystem. */ - public static class SamzaImpulseSystemConsumer implements SystemConsumer { - private AtomicBoolean isEnd = new AtomicBoolean(false); - - @Override - public void start() {} - - @Override - public void stop() {} - - @Override - public void register(SystemStreamPartition ssp, String offset) {} - - private static List constructMessages(SystemStreamPartition ssp) { - final IncomingMessageEnvelope impulseMessage = - new IncomingMessageEnvelope( - ssp, - DUMMY_OFFSET, - /* key */ null, - OpMessage.ofElement(WindowedValues.valueInGlobalWindow(new byte[0]))); - - final IncomingMessageEnvelope watermarkMessage = - IncomingMessageEnvelope.buildWatermarkEnvelope( - ssp, BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()); - - final IncomingMessageEnvelope endOfStreamMessage = - IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp); - - return Arrays.asList(impulseMessage, watermarkMessage, endOfStreamMessage); - } - - @Override - public Map> poll( - Set ssps, long timeout) throws InterruptedException { - if (isEnd.compareAndSet(false, true)) { - return ssps.stream() - .collect( - Collectors.toMap( - Function.identity(), SamzaImpulseSystemConsumer::constructMessages)); - } else { - return Collections.emptyMap(); - } - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPipelineTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPipelineTranslator.java deleted file mode 100644 index f29588d277ad..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPipelineTranslator.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.google.auto.service.AutoService; -import java.util.HashMap; -import java.util.Map; -import java.util.ServiceLoader; -import org.apache.beam.runners.samza.metrics.SamzaMetricOpFactory; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.PTransformTranslation; -import org.apache.beam.sdk.util.construction.TransformPayloadTranslatorRegistrar; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; - -/** This class knows all the translators from a primitive BEAM transform to a Samza operator. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaPipelineTranslator { - - private static final Map> TRANSLATORS = loadTranslators(); - - private static Map> loadTranslators() { - Map> translators = new HashMap<>(); - for (SamzaTranslatorRegistrar registrar : ServiceLoader.load(SamzaTranslatorRegistrar.class)) { - translators.putAll(registrar.getTransformTranslators()); - } - return ImmutableMap.copyOf(translators); - } - - private SamzaPipelineTranslator() {} - - public static void translate(Pipeline pipeline, TranslationContext ctx) { - final TransformVisitorFn translateFn = - new TransformVisitorFn() { - - @Override - public > void apply( - T transform, - TransformHierarchy.Node node, - Pipeline pipeline, - TransformTranslator translator) { - ctx.setCurrentTransform(node.toAppliedPTransform(pipeline)); - ctx.attachTransformMetricOp( - (PTransform) transform, - node, - SamzaMetricOpFactory.OpType.INPUT); - - translator.translate(transform, node, ctx); - - ctx.attachTransformMetricOp( - (PTransform) transform, - node, - SamzaMetricOpFactory.OpType.OUTPUT); - ctx.clearCurrentTransform(); - } - }; - final SamzaPipelineVisitor visitor = new SamzaPipelineVisitor(translateFn); - pipeline.traverseTopologically(visitor); - } - - public static void createConfig( - Pipeline pipeline, ConfigContext ctx, ConfigBuilder configBuilder) { - - final TransformVisitorFn configFn = - new TransformVisitorFn() { - @Override - public > void apply( - T transform, - TransformHierarchy.Node node, - Pipeline pipeline, - TransformTranslator translator) { - - ctx.setCurrentTransform(node.toAppliedPTransform(pipeline)); - - if (translator instanceof TransformConfigGenerator) { - TransformConfigGenerator configGenerator = - (TransformConfigGenerator) translator; - configBuilder.putAll(configGenerator.createConfig(transform, node, ctx)); - } - - ctx.clearCurrentTransform(); - } - }; - final SamzaPipelineVisitor visitor = new SamzaPipelineVisitor(configFn); - pipeline.traverseTopologically(visitor); - } - - public interface TransformVisitorFn { - > void apply( - T transform, - TransformHierarchy.Node node, - Pipeline pipeline, - TransformTranslator translator); - } - - public static class SamzaPipelineVisitor extends Pipeline.PipelineVisitor.Defaults { - private final TransformVisitorFn visitorFn; - - public SamzaPipelineVisitor(TransformVisitorFn visitorFn) { - this.visitorFn = visitorFn; - } - - @Override - public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) { - final PTransform transform = node.getTransform(); - final String urn = getUrnForTransform(transform); - if (canTranslate(urn, transform)) { - applyTransform(transform, node, TRANSLATORS.get(urn)); - return CompositeBehavior.DO_NOT_ENTER_TRANSFORM; - } - return CompositeBehavior.ENTER_TRANSFORM; - } - - @Override - public void visitPrimitiveTransform(TransformHierarchy.Node node) { - final PTransform transform = node.getTransform(); - final String urn = getUrnForTransform(transform); - checkArgument( - canTranslate(urn, transform), - String.format("Unsupported transform class: %s. Node: %s", transform, node)); - - applyTransform(transform, node, TRANSLATORS.get(urn)); - } - - private > void applyTransform( - T transform, TransformHierarchy.Node node, TransformTranslator translator) { - - @SuppressWarnings("unchecked") - final TransformTranslator typedTranslator = (TransformTranslator) translator; - visitorFn.apply(transform, node, getPipeline(), typedTranslator); - } - - private static boolean canTranslate(String urn, PTransform transform) { - if (!TRANSLATORS.containsKey(urn)) { - return false; - } else if (urn.equals(PTransformTranslation.COMBINE_PER_KEY_TRANSFORM_URN)) { - // According to BEAM, Combines with side inputs are translated as generic composites - return ((Combine.PerKey) transform).getSideInputs().isEmpty(); - } else { - return true; - } - } - - private static String getUrnForTransform(PTransform transform) { - return transform == null ? null : PTransformTranslation.urnForTransformOrNull(transform); - } - } - - /** Registers Samza translators. */ - @AutoService(SamzaTranslatorRegistrar.class) - public static class SamzaTranslators implements SamzaTranslatorRegistrar { - - @Override - public Map> getTransformTranslators() { - return ImmutableMap.>builder() - .put(PTransformTranslation.READ_TRANSFORM_URN, new ReadTranslator<>()) - .put(PTransformTranslation.RESHUFFLE_URN, new ReshuffleTranslator<>()) - .put(PTransformTranslation.REDISTRIBUTE_BY_KEY_URN, new RedistributeByKeyTranslator<>()) - .put(PTransformTranslation.PAR_DO_TRANSFORM_URN, new ParDoBoundMultiTranslator<>()) - .put(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN, new GroupByKeyTranslator<>()) - .put(PTransformTranslation.COMBINE_PER_KEY_TRANSFORM_URN, new GroupByKeyTranslator<>()) - .put(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN, new WindowAssignTranslator<>()) - .put(PTransformTranslation.FLATTEN_TRANSFORM_URN, new FlattenPCollectionsTranslator<>()) - .put(SamzaPublishView.SAMZA_PUBLISH_VIEW_URN, new SamzaPublishViewTranslator<>()) - .put(PTransformTranslation.IMPULSE_TRANSFORM_URN, new ImpulseTranslator()) - .put(ExecutableStage.URN, new ParDoBoundMultiTranslator<>()) - .put(PTransformTranslation.TEST_STREAM_TRANSFORM_URN, new SamzaTestStreamTranslator()) - .put( - PTransformTranslation.SPLITTABLE_PROCESS_KEYED_URN, - new SplittableParDoTranslators.ProcessKeyedElements<>()) - .build(); - } - } - - /** Registers classes specialized to the Samza runner. */ - @AutoService(TransformPayloadTranslatorRegistrar.class) - public static class SamzaTransformsRegistrar implements TransformPayloadTranslatorRegistrar { - @Override - public Map< - ? extends Class, - ? extends PTransformTranslation.TransformPayloadTranslator> - getTransformPayloadTranslators() { - return ImmutableMap.of( - SamzaPublishView.class, new SamzaPublishView.SamzaPublishViewPayloadTranslator()); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortablePipelineTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortablePipelineTranslator.java deleted file mode 100644 index 150b1ce90902..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortablePipelineTranslator.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import com.google.auto.service.AutoService; -import java.util.HashMap; -import java.util.Map; -import java.util.ServiceLoader; -import java.util.Set; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.sdk.util.construction.PTransformTranslation; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Portable specific samza pipeline translator. This is the entry point for translating a portable - * pipeline - */ -@SuppressWarnings({ - "keyfor", - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaPortablePipelineTranslator { - private static final Logger LOG = LoggerFactory.getLogger(SamzaPortablePipelineTranslator.class); - - private static final Map> TRANSLATORS = loadTranslators(); - - private static Map> loadTranslators() { - Map> translators = new HashMap<>(); - for (SamzaPortableTranslatorRegistrar registrar : - ServiceLoader.load(SamzaPortableTranslatorRegistrar.class)) { - translators.putAll(registrar.getTransformTranslators()); - } - LOG.info("{} translators loaded.", translators.size()); - return ImmutableMap.copyOf(translators); - } - - private SamzaPortablePipelineTranslator() {} - - public static void translate(RunnerApi.Pipeline pipeline, PortableTranslationContext ctx) { - QueryablePipeline queryablePipeline = QueryablePipeline.forPipeline(pipeline); - - for (PipelineNode.PTransformNode transform : - queryablePipeline.getTopologicallyOrderedTransforms()) { - ctx.setCurrentTransform(transform); - - LOG.info("Translating transform urn: {}", transform.getTransform().getSpec().getUrn()); - TRANSLATORS - .get(transform.getTransform().getSpec().getUrn()) - .translatePortable(transform, queryablePipeline, ctx); - - ctx.clearCurrentTransform(); - } - } - - public static void createConfig( - RunnerApi.Pipeline pipeline, ConfigBuilder configBuilder, SamzaPipelineOptions options) { - QueryablePipeline queryablePipeline = QueryablePipeline.forPipeline(pipeline); - for (PipelineNode.PTransformNode transform : - queryablePipeline.getTopologicallyOrderedTransforms()) { - TransformTranslator translator = - TRANSLATORS.get(transform.getTransform().getSpec().getUrn()); - if (translator instanceof TransformConfigGenerator) { - TransformConfigGenerator configGenerator = (TransformConfigGenerator) translator; - configBuilder.putAll(configGenerator.createPortableConfig(transform, options)); - } - } - } - - public static Set knownUrns() { - return TRANSLATORS.keySet(); - } - - /** Registers Samza translators. */ - @AutoService(SamzaPortableTranslatorRegistrar.class) - public static class SamzaTranslators implements SamzaPortableTranslatorRegistrar { - - @Override - public Map> getTransformTranslators() { - return ImmutableMap.>builder() - // Re-enable after https://github.com/apache/beam/issues/21188 is completed - // .put(PTransformTranslation.RESHUFFLE_URN, new ReshuffleTranslator<>()) - .put(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN, new GroupByKeyTranslator<>()) - .put(PTransformTranslation.FLATTEN_TRANSFORM_URN, new FlattenPCollectionsTranslator<>()) - .put(PTransformTranslation.IMPULSE_TRANSFORM_URN, new ImpulseTranslator()) - .put(PTransformTranslation.TEST_STREAM_TRANSFORM_URN, new SamzaTestStreamTranslator<>()) - .put(ExecutableStage.URN, new ParDoBoundMultiTranslator<>()) - .build(); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortableTranslatorRegistrar.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortableTranslatorRegistrar.java deleted file mode 100644 index 5eede8f65326..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortableTranslatorRegistrar.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Map; - -/** A registrar of TransformTranslator in portable pipeline. */ -public interface SamzaPortableTranslatorRegistrar { - Map> getTransformTranslators(); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishView.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishView.java deleted file mode 100644 index 3c09474eda2c..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishView.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.List; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.PTransformTranslation; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; - -/** - * Samza {@link PTransform} that creates a primitive output {@link PCollection}, as the results of a - * {@link PCollectionView}. - */ -class SamzaPublishView - extends PTransform>, PCollection>> { - static final String SAMZA_PUBLISH_VIEW_URN = "beam:transform:samza:publish-view:v1"; - - private final PCollectionView view; - - SamzaPublishView(PCollectionView view) { - this.view = view; - } - - @Override - public PCollection> expand(PCollection> input) { - return PCollection.>createPrimitiveOutputInternal( - input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), input.getCoder()); - } - - public PCollectionView getView() { - return view; - } - - @Override - public String getName() { - return view.getName(); - } - - static class SamzaPublishViewPayloadTranslator - extends PTransformTranslation.TransformPayloadTranslator.NotSerializable< - SamzaPublishView> { - - SamzaPublishViewPayloadTranslator() {} - - @Override - public String getUrn() { - return SAMZA_PUBLISH_VIEW_URN; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishViewTransformOverride.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishViewTransformOverride.java deleted file mode 100644 index 89848147cef4..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishViewTransformOverride.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import org.apache.beam.runners.core.Concatenate; -import org.apache.beam.sdk.runners.AppliedPTransform; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.View; -import org.apache.beam.sdk.util.construction.SingleInputOutputOverrideFactory; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; - -/** Samza override for {@link View} (side input) transforms. */ -class SamzaPublishViewTransformOverride - extends SingleInputOutputOverrideFactory< - PCollection, PCollection, View.CreatePCollectionView> { - @Override - public PTransformReplacement, PCollection> getReplacementTransform( - AppliedPTransform< - PCollection, PCollection, View.CreatePCollectionView> - transform) { - - @SuppressWarnings("unchecked") - PCollection input = - (PCollection) Iterables.getOnlyElement(transform.getInputs().values()); - - return PTransformReplacement.of( - input, new SamzaCreatePCollectionViewTransform<>(transform.getTransform().getView())); - } - - private static class SamzaCreatePCollectionViewTransform - extends PTransform, PCollection> { - private final PCollectionView view; - - public SamzaCreatePCollectionViewTransform(PCollectionView view) { - this.view = view; - } - - @Override - public PCollection expand(PCollection input) { - // This actually creates a branch in the graph that publishes the view but then returns - // the original input. This is copied from the Flink runner. - input - .apply(Combine.globally(new Concatenate()).withoutDefaults()) - .apply(new SamzaPublishView<>(view)); - return input; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishViewTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishViewTranslator.java deleted file mode 100644 index 08cf1057aabf..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPublishViewTranslator.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.List; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.util.SamzaCoders; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.samza.operators.MessageStream; - -/** Translates {@link SamzaPublishView} to a view {@link MessageStream} as side input. */ -class SamzaPublishViewTranslator - implements TransformTranslator> { - - @Override - public void translate( - SamzaPublishView transform, - TransformHierarchy.Node node, - TranslationContext ctx) { - final PCollection> input = ctx.getInput(transform); - final MessageStream>> inputStream = ctx.getMessageStream(input); - @SuppressWarnings("unchecked") - final Coder>> elementCoder = (Coder) SamzaCoders.of(input); - final String viewId = ctx.getViewId(transform.getView()); - - final MessageStream>> outputStream = - doTranslate( - inputStream, elementCoder, ctx.getTransformId(), viewId, ctx.getPipelineOptions()); - - ctx.registerViewStream(transform.getView(), outputStream); - } - - /** - * This method is used to translate both native Java PublishView transform as well as portable - * side input broadcasting into Samza. - */ - static MessageStream>> doTranslate( - MessageStream>> inputStream, - Coder>> coder, - String transformId, - String viewId, - SamzaPipelineOptions options) { - - final MessageStream>> elementStream = - inputStream - .filter(msg -> msg.getType() == OpMessage.Type.ELEMENT) - .map(OpMessage::getElement); - - // TODO: once SAMZA-1580 is resolved, this optimization will go directly inside Samza - final MessageStream>> broadcastStream = - options.getMaxSourceParallelism() == 1 - ? elementStream - : elementStream.broadcast(SamzaCoders.toSerde(coder), "view-" + transformId); - - return broadcastStream.map(element -> OpMessage.ofSideInput(viewId, element)); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTestStreamSystemFactory.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTestStreamSystemFactory.java deleted file mode 100644 index 72cd711a6acc..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTestStreamSystemFactory.java +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.beam.runners.core.serialization.Base64Serializer; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.sdk.testing.TestStream; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.TimestampedValue; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.samza.Partition; -import org.apache.samza.SamzaException; -import org.apache.samza.config.Config; -import org.apache.samza.config.SystemConfig; -import org.apache.samza.metrics.MetricsRegistry; -import org.apache.samza.system.IncomingMessageEnvelope; -import org.apache.samza.system.SystemAdmin; -import org.apache.samza.system.SystemConsumer; -import org.apache.samza.system.SystemFactory; -import org.apache.samza.system.SystemProducer; -import org.apache.samza.system.SystemStreamMetadata; -import org.apache.samza.system.SystemStreamPartition; - -/** - * A Samza system factory that supports consuming from {@link TestStream} and translating events - * into messages according to the {@link org.apache.beam.sdk.testing.TestStream.EventType} of the - * events. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaTestStreamSystemFactory implements SystemFactory { - @Override - public SystemConsumer getConsumer(String systemName, Config config, MetricsRegistry registry) { - final String streamPrefix = String.format(SystemConfig.SYSTEM_ID_PREFIX, systemName); - final Config scopedConfig = config.subset(streamPrefix, true); - return new SamzaTestStreamSystemConsumer<>(getTestStream(scopedConfig)); - } - - @Override - public SystemProducer getProducer(String systemName, Config config, MetricsRegistry registry) { - throw new UnsupportedOperationException("SamzaTestStreamSystem doesn't support producing"); - } - - @Override - public SystemAdmin getAdmin(String systemName, Config config) { - return new SamzaTestStreamSystemAdmin(); - } - - /** A helper function to decode testStream from the config. */ - private static TestStream getTestStream(Config config) { - @SuppressWarnings("unchecked") - final SerializableFunction> testStreamDecoder = - Base64Serializer.deserializeUnchecked( - config.get(SamzaTestStreamTranslator.TEST_STREAM_DECODER), SerializableFunction.class); - return testStreamDecoder.apply(config.get(SamzaTestStreamTranslator.ENCODED_TEST_STREAM)); - } - - private static final String DUMMY_OFFSET = "0"; - - /** System admin for SamzaTestStreamSystem. */ - public static class SamzaTestStreamSystemAdmin implements SystemAdmin { - @Override - public Map getOffsetsAfter( - Map offsets) { - return offsets.keySet().stream() - .collect(Collectors.toMap(Function.identity(), k -> DUMMY_OFFSET)); - } - - @Override - public Map getSystemStreamMetadata(Set streamNames) { - return streamNames.stream() - .collect( - Collectors.toMap( - Function.identity(), - stream -> { - // TestStream will always be single partition - Map - partitionMetadata = - Collections.singletonMap( - new Partition(0), - new SystemStreamMetadata.SystemStreamPartitionMetadata( - DUMMY_OFFSET, DUMMY_OFFSET, DUMMY_OFFSET)); - return new SystemStreamMetadata(stream, partitionMetadata); - })); - } - - @Override - public Integer offsetComparator(String offset1, String offset2) { - return 0; - } - } - - /** System consumer for SamzaTestStreamSystem. */ - public static class SamzaTestStreamSystemConsumer implements SystemConsumer { - TestStream testStream; - - public SamzaTestStreamSystemConsumer(TestStream testStream) { - this.testStream = testStream; - } - - @Override - public void start() {} - - @Override - public void stop() {} - - @Override - public void register(SystemStreamPartition systemStreamPartition, String offset) {} - - @Override - public Map> poll( - Set systemStreamPartitions, long timeout) { - SystemStreamPartition ssp = systemStreamPartitions.iterator().next(); - ArrayList messages = new ArrayList<>(); - - for (TestStream.Event event : testStream.getEvents()) { - if (event.getType().equals(TestStream.EventType.ELEMENT)) { - // If event type is element, for each element, create a message with the element and - // timestamp. - for (TimestampedValue element : ((TestStream.ElementEvent) event).getElements()) { - WindowedValue windowedValue = - WindowedValues.timestampedValueInGlobalWindow( - element.getValue(), element.getTimestamp()); - final OpMessage opMessage = OpMessage.ofElement(windowedValue); - final IncomingMessageEnvelope envelope = - new IncomingMessageEnvelope(ssp, DUMMY_OFFSET, null, opMessage); - messages.add(envelope); - } - } else if (event.getType().equals(TestStream.EventType.WATERMARK)) { - // If event type is watermark, create a watermark message. - long watermarkMillis = ((TestStream.WatermarkEvent) event).getWatermark().getMillis(); - final IncomingMessageEnvelope envelope = - IncomingMessageEnvelope.buildWatermarkEnvelope(ssp, watermarkMillis); - messages.add(envelope); - if (watermarkMillis == BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) { - // If watermark reached max watermark, also create a end-of-stream message - final IncomingMessageEnvelope endOfStreamMessage = - IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp); - messages.add(endOfStreamMessage); - break; - } - } else if (event.getType().equals(TestStream.EventType.PROCESSING_TIME)) { - throw new UnsupportedOperationException( - "Advancing Processing time is not supported by the Samza Runner."); - } else { - throw new SamzaException("Unknown event type " + event.getType()); - } - } - - return ImmutableMap.of(ssp, messages); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTestStreamTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTestStreamTranslator.java deleted file mode 100644 index eb4f8658e5fe..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTestStreamTranslator.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.io.IOException; -import java.util.Map; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.core.serialization.Base64Serializer; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.testing.TestStream; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.util.CoderUtils; -import org.apache.beam.sdk.util.construction.RehydratedComponents; -import org.apache.beam.sdk.util.construction.TestStreamTranslation; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.samza.operators.KV; -import org.apache.samza.serializers.KVSerde; -import org.apache.samza.serializers.NoOpSerde; -import org.apache.samza.serializers.Serde; -import org.apache.samza.system.descriptors.GenericInputDescriptor; -import org.apache.samza.system.descriptors.GenericSystemDescriptor; - -/** - * Translate {@link org.apache.beam.sdk.testing.TestStream} to a samza message stream produced by - * {@link SamzaTestStreamSystemFactory.SamzaTestStreamSystemConsumer}. - */ -@SuppressWarnings({"rawtypes"}) -public class SamzaTestStreamTranslator implements TransformTranslator> { - public static final String ENCODED_TEST_STREAM = "encodedTestStream"; - public static final String TEST_STREAM_DECODER = "testStreamDecoder"; - - @Override - public void translate( - TestStream testStream, TransformHierarchy.Node node, TranslationContext ctx) { - final PCollection output = ctx.getOutput(testStream); - final String outputId = ctx.getIdForPValue(output); - final Coder valueCoder = testStream.getValueCoder(); - final TestStream.TestStreamCoder testStreamCoder = TestStream.TestStreamCoder.of(valueCoder); - - // encode testStream as a string - final String encodedTestStream; - try { - encodedTestStream = CoderUtils.encodeToBase64(testStreamCoder, testStream); - } catch (CoderException e) { - throw new RuntimeException("Could not encode TestStream.", e); - } - - // the decoder for encodedTestStream - SerializableFunction> testStreamDecoder = - string -> { - try { - return CoderUtils.decodeFromBase64(TestStream.TestStreamCoder.of(valueCoder), string); - } catch (CoderException e) { - throw new RuntimeException("Could not decode TestStream.", e); - } - }; - - ctx.registerInputMessageStream( - output, createInputDescriptor(outputId, encodedTestStream, testStreamDecoder)); - } - - @Override - public void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - final ByteString bytes = transform.getTransform().getSpec().getPayload(); - final SerializableFunction> testStreamDecoder = - createTestStreamDecoder(pipeline.getComponents(), bytes); - - final String outputId = ctx.getOutputId(transform); - final String escapedOutputId = SamzaPipelineTranslatorUtils.escape(outputId); - - ctx.registerInputMessageStream( - outputId, - createInputDescriptor( - escapedOutputId, Base64Serializer.serializeUnchecked(bytes), testStreamDecoder)); - } - - @SuppressWarnings("unchecked") - private static GenericInputDescriptor>> createInputDescriptor( - String id, - String encodedTestStream, - SerializableFunction> testStreamDecoder) { - final Map systemConfig = - ImmutableMap.of( - ENCODED_TEST_STREAM, - encodedTestStream, - TEST_STREAM_DECODER, - Base64Serializer.serializeUnchecked(testStreamDecoder)); - final GenericSystemDescriptor systemDescriptor = - new GenericSystemDescriptor(id, SamzaTestStreamSystemFactory.class.getName()) - .withSystemConfigs(systemConfig); - - // The KvCoder is needed here for Samza not to crop the key. - final Serde>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>()); - return systemDescriptor.getInputDescriptor(id, kvSerde); - } - - @SuppressWarnings("unchecked") - private static SerializableFunction> createTestStreamDecoder( - RunnerApi.Components components, ByteString payload) { - Coder coder; - try { - coder = - (Coder) - RehydratedComponents.forComponents(components) - .getCoder(RunnerApi.TestStreamPayload.parseFrom(payload).getCoderId()); - } catch (IOException e) { - throw new RuntimeException(e); - } - - // the decoder for encodedTestStream - return encodedTestStream -> { - try { - return TestStreamTranslation.testStreamFromProtoPayload( - RunnerApi.TestStreamPayload.parseFrom( - Base64Serializer.deserializeUnchecked(encodedTestStream, ByteString.class)), - coder); - } catch (IOException e) { - throw new RuntimeException("Could not decode TestStream.", e); - } - }; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTransformOverrides.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTransformOverrides.java deleted file mode 100644 index 3eb169337329..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTransformOverrides.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.List; -import org.apache.beam.sdk.runners.PTransformOverride; -import org.apache.beam.sdk.util.construction.PTransformMatchers; -import org.apache.beam.sdk.util.construction.PTransformTranslation; -import org.apache.beam.sdk.util.construction.SplittableParDo; -import org.apache.beam.sdk.util.construction.SplittableParDoNaiveBounded; -import org.apache.beam.sdk.util.construction.UnsupportedOverrideFactory; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; - -/** {@link org.apache.beam.sdk.transforms.PTransform} overrides for Samza runner. */ -@SuppressWarnings({ - "rawtypes" // TODO(https://github.com/apache/beam/issues/20447) -}) -public class SamzaTransformOverrides { - public static List getDefaultOverrides() { - return ImmutableList.builder() - .add( - PTransformOverride.of( - PTransformMatchers.urnEqualTo(PTransformTranslation.CREATE_VIEW_TRANSFORM_URN), - new SamzaPublishViewTransformOverride())) - - // Note that we have a direct replacement for SplittableParDo.ProcessKeyedElements - // for unbounded splittable DoFns and do not need to rely on - // SplittableParDoViaKeyedWorkItems override. Once this direct replacement supports side - // inputs we can remove the SplittableParDoNaiveBounded override. - .add( - PTransformOverride.of( - PTransformMatchers.splittableParDo(), new SplittableParDo.OverrideFactory())) - .add( - PTransformOverride.of( - PTransformMatchers.splittableProcessKeyedBounded(), - new SplittableParDoNaiveBounded.OverrideFactory())) - - // TODO: [https://github.com/apache/beam/issues/19132] Support @RequiresStableInput on Samza - // runner - .add( - PTransformOverride.of( - PTransformMatchers.requiresStableInputParDoMulti(), - UnsupportedOverrideFactory.withMessage( - "Samza runner currently doesn't support @RequiresStableInput annotation."))) - .build(); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTranslatorRegistrar.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTranslatorRegistrar.java deleted file mode 100644 index c3eae9a2f9af..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaTranslatorRegistrar.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Map; - -/** A registrar of TransformTranslator. */ -public interface SamzaTranslatorRegistrar { - Map> getTransformTranslators(); -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SplittableParDoTranslators.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SplittableParDoTranslators.java deleted file mode 100644 index 2bd6674700c8..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SplittableParDoTranslators.java +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import static org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils.escape; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import org.apache.beam.runners.samza.runtime.DoFnOp; -import org.apache.beam.runners.samza.runtime.KvToKeyedWorkItemOp; -import org.apache.beam.runners.samza.runtime.OpAdapter; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.runtime.SplittableParDoProcessKeyedElementsOp; -import org.apache.beam.runners.samza.translation.ParDoBoundMultiTranslator.RawUnionValueToValue; -import org.apache.beam.runners.samza.util.SamzaCoders; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.runners.TransformHierarchy.Node; -import org.apache.beam.sdk.transforms.join.RawUnionValue; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.construction.SplittableParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.samza.operators.MessageStream; -import org.apache.samza.serializers.KVSerde; - -/** A set of translators for {@link SplittableParDo}. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SplittableParDoTranslators { - - /** - * Translates {@link SplittableParDo.ProcessKeyedElements} to Samza {@link - * SplittableParDoProcessKeyedElementsOp}. - */ - static class ProcessKeyedElements - implements TransformTranslator< - SplittableParDo.ProcessKeyedElements< - InputT, OutputT, RestrictionT, WatermarkEstimatorStateT>> { - - @Override - public void translate( - SplittableParDo.ProcessKeyedElements< - InputT, OutputT, RestrictionT, WatermarkEstimatorStateT> - transform, - Node node, - TranslationContext ctx) { - final PCollection>> input = ctx.getInput(transform); - - final ArrayList, PCollection>> outputs = - new ArrayList<>(node.getOutputs().entrySet()); - - final Map, Integer> tagToIndexMap = new HashMap<>(); - final Map> indexToPCollectionMap = new HashMap<>(); - - for (int index = 0; index < outputs.size(); ++index) { - final Map.Entry, PCollection> taggedOutput = outputs.get(index); - tagToIndexMap.put(taggedOutput.getKey(), index); - - if (taggedOutput.getValue() == null) { - throw new IllegalArgumentException( - "Expected side output to be PCollection, but was: " + taggedOutput.getValue()); - } - final PCollection sideOutputCollection = (PCollection) taggedOutput.getValue(); - indexToPCollectionMap.put(index, sideOutputCollection); - } - - @SuppressWarnings("unchecked") - final WindowingStrategy windowingStrategy = - (WindowingStrategy) input.getWindowingStrategy(); - - final MessageStream>>> inputStream = - ctx.getMessageStream(input); - - final KvCoder> kvInputCoder = - (KvCoder>) input.getCoder(); - final Coder>>> elementCoder = - SamzaCoders.of(input); - - final MessageStream>>> filteredInputStream = - inputStream.filter(msg -> msg.getType() == OpMessage.Type.ELEMENT); - - final MessageStream>>> partitionedInputStream; - if (!needRepartition(ctx)) { - partitionedInputStream = filteredInputStream; - } else { - partitionedInputStream = - filteredInputStream - .partitionBy( - msg -> msg.getElement().getValue().getKey(), - msg -> msg.getElement(), - KVSerde.of( - SamzaCoders.toSerde(kvInputCoder.getKeyCoder()), - SamzaCoders.toSerde(elementCoder)), - "sdf-" + escape(ctx.getTransformId())) - .map(kv -> OpMessage.ofElement(kv.getValue())); - } - - final MessageStream> taggedOutputStream = - partitionedInputStream - .flatMapAsync(OpAdapter.adapt(new KvToKeyedWorkItemOp<>(), ctx)) - .flatMapAsync( - OpAdapter.adapt( - new SplittableParDoProcessKeyedElementsOp<>( - transform.getMainOutputTag(), - transform, - windowingStrategy, - new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), - ctx.getTransformFullName(), - ctx.getTransformId(), - input.isBounded()), - ctx)); - - for (int outputIndex : tagToIndexMap.values()) { - @SuppressWarnings("unchecked") - final MessageStream> outputStream = - taggedOutputStream - .filter( - message -> - message.getType() != OpMessage.Type.ELEMENT - || message.getElement().getValue().getUnionTag() == outputIndex) - .flatMapAsync(OpAdapter.adapt(new RawUnionValueToValue(), ctx)); - - ctx.registerMessageStream(indexToPCollectionMap.get(outputIndex), outputStream); - } - } - - private static boolean needRepartition(TranslationContext ctx) { - if (ctx.getPipelineOptions().getMaxSourceParallelism() == 1) { - // Only one task will be created, no need for repartition - return false; - } - return true; - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/StateIdParser.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/StateIdParser.java deleted file mode 100644 index 05135a4d97dd..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/StateIdParser.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Collections; -import java.util.HashSet; -import java.util.Set; -import org.apache.beam.runners.samza.util.StateUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.reflect.DoFnSignature; -import org.apache.beam.sdk.transforms.reflect.DoFnSignatures; - -/** - * This class identifies the set of non-unique state ids by scanning the BEAM {@link Pipeline} with - * a topological traversal. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class StateIdParser extends Pipeline.PipelineVisitor.Defaults { - private final Set nonUniqueStateIds = new HashSet<>(); - private final Set usedStateIds = new HashSet<>(); - - public static Set scan(Pipeline pipeline) { - final StateIdParser parser = new StateIdParser(); - pipeline.traverseTopologically(parser); - return parser.getNonUniqueStateIds(); - } - - private StateIdParser() {} - - @Override - public void visitPrimitiveTransform(TransformHierarchy.Node node) { - if (node.getTransform() instanceof ParDo.MultiOutput) { - final DoFn doFn = ((ParDo.MultiOutput) node.getTransform()).getFn(); - if (StateUtils.isStateful(doFn)) { - final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); - for (String stateId : signature.stateDeclarations().keySet()) { - if (!usedStateIds.add(stateId)) { - nonUniqueStateIds.add(stateId); - } - } - } - } - } - - public Set getNonUniqueStateIds() { - return Collections.unmodifiableSet(nonUniqueStateIds); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TransformConfigGenerator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TransformConfigGenerator.java deleted file mode 100644 index aa956e4b28e0..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TransformConfigGenerator.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Collections; -import java.util.Map; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; - -/** Generates config for a BEAM PTransform (regular java api or portable api). */ -public interface TransformConfigGenerator> { - /** Generate config for regular java api PTransform. */ - default Map createConfig( - T transform, TransformHierarchy.Node node, ConfigContext ctx) { - return Collections.emptyMap(); - } - - /** Generate config for portable api PTransform. */ - default Map createPortableConfig( - PipelineNode.PTransformNode transform, SamzaPipelineOptions options) { - return Collections.emptyMap(); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TransformTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TransformTranslator.java deleted file mode 100644 index 477b5e3a91e2..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TransformTranslator.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; - -/** Interface of Samza translator for BEAM {@link PTransform}. */ -public interface TransformTranslator> { - - /** Translates the Java {@link PTransform} into Samza API. */ - default void translate(T transform, TransformHierarchy.Node node, TranslationContext ctx) { - throw new UnsupportedOperationException( - "Java translation is not supported for " + this.getClass().getSimpleName()); - } - - /** - * Translates the portable {@link org.apache.beam.model.pipeline.v1.RunnerApi.PTransform} into - * Samza API. - */ - default void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - throw new UnsupportedOperationException( - "Portable translation is not supported for " + this.getClass().getSimpleName()); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TranslationContext.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TranslationContext.java deleted file mode 100644 index 4da5b8708f18..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/TranslationContext.java +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.function.BiConsumer; -import java.util.function.Consumer; -import java.util.stream.Collectors; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.metrics.SamzaMetricOpFactory; -import org.apache.beam.runners.samza.metrics.SamzaTransformMetricRegistry; -import org.apache.beam.runners.samza.runtime.OpAdapter; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.util.HashIdGenerator; -import org.apache.beam.runners.samza.util.StoreIdGenerator; -import org.apache.beam.sdk.runners.AppliedPTransform; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.construction.PTransformTranslation; -import org.apache.beam.sdk.util.construction.TransformInputs; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.samza.application.descriptors.StreamApplicationDescriptor; -import org.apache.samza.config.Config; -import org.apache.samza.config.MapConfig; -import org.apache.samza.operators.KV; -import org.apache.samza.operators.MessageStream; -import org.apache.samza.operators.OutputStream; -import org.apache.samza.serializers.NoOpSerde; -import org.apache.samza.system.EndOfStreamMessage; -import org.apache.samza.system.OutgoingMessageEnvelope; -import org.apache.samza.system.StreamSpec; -import org.apache.samza.system.SystemFactory; -import org.apache.samza.system.SystemProducer; -import org.apache.samza.system.SystemStream; -import org.apache.samza.system.SystemStreamMetadata; -import org.apache.samza.system.WatermarkMessage; -import org.apache.samza.system.descriptors.GenericInputDescriptor; -import org.apache.samza.system.descriptors.GenericSystemDescriptor; -import org.apache.samza.system.descriptors.InputDescriptor; -import org.apache.samza.system.descriptors.OutputDescriptor; -import org.apache.samza.system.inmemory.InMemorySystemFactory; -import org.apache.samza.table.Table; -import org.apache.samza.table.descriptors.TableDescriptor; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Helper that keeps the mapping from BEAM {@link PValue}/{@link PCollectionView} to Samza {@link - * MessageStream}. It also provides other context data such as input and output of a {@link - * PTransform}. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://issues.apache.org/jira/browse/BEAM-10556) - "keyfor", - "nullness" -}) // TODO(https://issues.apache.org/jira/browse/BEAM-10402) -public class TranslationContext { - private static final Logger LOG = LoggerFactory.getLogger(TranslationContext.class); - private final StreamApplicationDescriptor appDescriptor; - private final Map> messsageStreams = new HashMap<>(); - private final Map, MessageStream> viewStreams = new HashMap<>(); - private final Map idMap; - private final Map registeredInputStreams = new HashMap<>(); - private final Map registeredTables = new HashMap<>(); - private final SamzaPipelineOptions options; - private final HashIdGenerator idGenerator = new HashIdGenerator(); - private final StoreIdGenerator storeIdGenerator; - private final SamzaTransformMetricRegistry samzaTransformMetricRegistry; - private AppliedPTransform currentTransform; - - public TranslationContext( - StreamApplicationDescriptor appDescriptor, - Map idMap, - Set nonUniqueStateIds, - SamzaPipelineOptions options) { - this.appDescriptor = appDescriptor; - this.idMap = idMap; - this.options = options; - this.storeIdGenerator = new StoreIdGenerator(nonUniqueStateIds); - this.samzaTransformMetricRegistry = new SamzaTransformMetricRegistry(); - } - - public void registerInputMessageStream( - PValue pvalue, InputDescriptor>, ?> inputDescriptor) { - registerInputMessageStreams(pvalue, Collections.singletonList(inputDescriptor)); - } - - /** - * Function to register a merged messageStream of all input messageStreams to a PCollection. - * - * @param pvalue output of a transform - * @param inputDescriptors a list of Samza InputDescriptors - */ - public void registerInputMessageStreams( - PValue pvalue, List>, ?>> inputDescriptors) { - registerInputMessageStreams(pvalue, inputDescriptors, this::registerMessageStream); - } - - protected void registerInputMessageStreams( - KeyT key, - List>, ?>> inputDescriptors, - BiConsumer>> registerFunction) { - final Set>> streamsToMerge = new HashSet<>(); - for (InputDescriptor>, ?> inputDescriptor : inputDescriptors) { - final String streamId = inputDescriptor.getStreamId(); - // each streamId registered in map should already be add in messageStreamMap - if (registeredInputStreams.containsKey(streamId)) { - @SuppressWarnings("unchecked") - MessageStream> messageStream = registeredInputStreams.get(streamId); - LOG.info( - "Stream id {} has already been mapped to {} stream. Mapping {} to the same message stream.", - streamId, - messageStream, - key); - streamsToMerge.add(messageStream); - } else { - final MessageStream> typedStream = - getValueStream(appDescriptor.getInputStream(inputDescriptor)); - registeredInputStreams.put(streamId, typedStream); - streamsToMerge.add(typedStream); - } - } - - registerFunction.accept(key, MessageStream.mergeAll(streamsToMerge)); - } - - public void registerMessageStream(PValue pvalue, MessageStream> stream) { - if (messsageStreams.containsKey(pvalue)) { - throw new IllegalArgumentException("Stream already registered for pvalue: " + pvalue); - } - messsageStreams.put(pvalue, stream); - } - - // Add a dummy stream for use in special cases (TestStream, empty flatten) - public MessageStream> getDummyStream() { - InputDescriptor, ?> dummyInput = - createDummyStreamDescriptor(UUID.randomUUID().toString()); - return appDescriptor.getInputStream(dummyInput); - } - - public MessageStream> getMessageStream(PValue pvalue) { - @SuppressWarnings("unchecked") - final MessageStream> stream = - (MessageStream>) messsageStreams.get(pvalue); - if (stream == null) { - throw new IllegalArgumentException("No stream registered for pvalue: " + pvalue); - } - return stream; - } - - public void attachTransformMetricOp( - PTransform transform, - TransformHierarchy.Node node, - SamzaMetricOpFactory.OpType opType) { - final Boolean enableTransformMetrics = getPipelineOptions().getEnableTransformMetrics(); - final String transformURN = PTransformTranslation.urnForTransformOrNull(transform); - - // skip attach transform if user override is false or transform is not registered - if (!enableTransformMetrics || transformURN == null) { - return; - } - - // skip attach transform if transform is reading from external sources - if (isIOTransform(node, opType)) { - return; - } - - for (PValue pValue : getPValueForTransform(opType, transform, node)) { - // skip attach transform if pValue is not registered i.e. if not translated with a samza - // translator - if (!messsageStreams.containsKey(pValue)) { - LOG.debug( - "Skip attach transform metric op for pValue: {} for transform: {}", - pValue, - getTransformFullName()); - continue; - } - - // add another step for default metric computation - getMessageStream(pValue) - .flatMapAsync( - OpAdapter.adapt( - SamzaMetricOpFactory.createMetricOp( - transformURN, - pValue.getName(), - getTransformFullName(), - opType, - samzaTransformMetricRegistry), - this)); - } - } - - // Get the input or output PValue for a transform - private List getPValueForTransform( - SamzaMetricOpFactory.OpType opType, - @NonNull PTransform transform, - TransformHierarchy.@NonNull Node node) { - switch (opType) { - case INPUT: - { - if (node.getInputs().size() > 1) { - return node.getInputs().entrySet().stream() - .map(Map.Entry::getValue) - .collect(Collectors.toList()); - } else { - return ImmutableList.of(getInput(transform)); - } - } - case OUTPUT: - if (node.getOutputs().size() > 1) { - return node.getOutputs().entrySet().stream() - .map(Map.Entry::getValue) - .collect(Collectors.toList()); - } - return ImmutableList.of(getOutput(transform)); - default: - throw new IllegalArgumentException("Unknown opType: " + opType); - } - } - - // Transforms that read or write to/from external sources are not supported - private static boolean isIOTransform( - TransformHierarchy.@NonNull Node node, SamzaMetricOpFactory.OpType opType) { - switch (opType) { - case INPUT: - return node.getInputs().size() == 0; - case OUTPUT: - return node.getOutputs().size() == 0; - default: - throw new IllegalArgumentException("Unknown opType: " + opType); - } - } - - public void registerViewStream( - PCollectionView view, MessageStream>> stream) { - if (viewStreams.containsKey(view)) { - throw new IllegalArgumentException("Stream already registered for view: " + view); - } - - viewStreams.put(view, stream); - } - - public MessageStream> getViewStream(PCollectionView view) { - @SuppressWarnings("unchecked") - final MessageStream> stream = - (MessageStream>) viewStreams.get(view); - if (stream == null) { - throw new IllegalArgumentException("No stream registered for view: " + view); - } - return stream; - } - - public String getViewId(PCollectionView view) { - return getIdForPValue(view); - } - - public void setCurrentTransform(AppliedPTransform currentTransform) { - this.currentTransform = currentTransform; - } - - public void clearCurrentTransform() { - this.currentTransform = null; - } - - public AppliedPTransform getCurrentTransform() { - return currentTransform; - } - - @SuppressWarnings("unchecked") - public InT getInput(PTransform transform) { - return (InT) - Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(this.currentTransform)); - } - - @SuppressWarnings("unchecked") - public OutT getOutput(PTransform transform) { - return (OutT) Iterables.getOnlyElement(this.currentTransform.getOutputs().values()); - } - - @SuppressWarnings("unchecked") - public TupleTag getOutputTag(PTransform> transform) { - return (TupleTag) Iterables.getOnlyElement(this.currentTransform.getOutputs().keySet()); - } - - public SamzaPipelineOptions getPipelineOptions() { - return this.options; - } - - public OutputStream getOutputStream(OutputDescriptor outputDescriptor) { - return appDescriptor.getOutputStream(outputDescriptor); - } - - @SuppressWarnings("unchecked") - public Table> getTable(TableDescriptor tableDesc) { - return registeredTables.computeIfAbsent( - tableDesc.getTableId(), id -> appDescriptor.getTable(tableDesc)); - } - - private static MessageStream getValueStream(MessageStream> input) { - return input.map(KV::getValue); - } - - public String getIdForPValue(PValue pvalue) { - final String id = idMap.get(pvalue); - if (id == null) { - throw new IllegalArgumentException("No id mapping for value: " + pvalue); - } - return id; - } - - public String getTransformFullName() { - return currentTransform.getFullName(); - } - - public String getTransformId() { - return idGenerator.getId(getTransformFullName()); - } - - public StoreIdGenerator getStoreIdGenerator() { - return storeIdGenerator; - } - - /** The dummy stream created will only be used in Beam tests. */ - private static InputDescriptor, ?> createDummyStreamDescriptor(String id) { - final GenericSystemDescriptor dummySystem = - new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName()); - final GenericInputDescriptor> dummyInput = - dummySystem.getInputDescriptor(id, new NoOpSerde<>()); - dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST); - final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig()); - final SystemFactory factory = new InMemorySystemFactory(); - final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1); - factory.getAdmin(id, config).createStream(dummyStreamSpec); - - final SystemProducer producer = factory.getProducer(id, config, null); - final SystemStream sysStream = new SystemStream(id, id); - final Consumer sendFn = - (msg) -> { - producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg)); - }; - final WindowedValue windowedValue = - WindowedValues.timestampedValueInGlobalWindow("dummy", new Instant()); - - sendFn.accept(OpMessage.ofElement(windowedValue)); - sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis())); - sendFn.accept(new EndOfStreamMessage(null)); - return dummyInput; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/WindowAssignTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/WindowAssignTranslator.java deleted file mode 100644 index cad9d4dc23d1..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/WindowAssignTranslator.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.samza.runtime.OpAdapter; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.runners.samza.runtime.WindowAssignOp; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.transforms.windowing.WindowFn; -import org.apache.beam.sdk.util.construction.WindowingStrategyTranslation; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.util.construction.graph.QueryablePipeline; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.InvalidProtocolBufferException; -import org.apache.samza.operators.MessageStream; - -/** - * Translates {@link org.apache.beam.sdk.transforms.windowing.Window.Assign} to Samza {@link - * WindowAssignOp}. - */ -class WindowAssignTranslator implements TransformTranslator> { - @Override - public void translate( - Window.Assign transform, TransformHierarchy.Node node, TranslationContext ctx) { - final PCollection output = ctx.getOutput(transform); - - @SuppressWarnings("unchecked") - final WindowFn windowFn = (WindowFn) output.getWindowingStrategy().getWindowFn(); - - final MessageStream> inputStream = ctx.getMessageStream(ctx.getInput(transform)); - - final MessageStream> outputStream = - inputStream.flatMapAsync(OpAdapter.adapt(new WindowAssignOp<>(windowFn), ctx)); - - ctx.registerMessageStream(output, outputStream); - } - - @Override - public void translatePortable( - PipelineNode.PTransformNode transform, - QueryablePipeline pipeline, - PortableTranslationContext ctx) { - final RunnerApi.WindowIntoPayload payload; - try { - payload = - RunnerApi.WindowIntoPayload.parseFrom(transform.getTransform().getSpec().getPayload()); - } catch (InvalidProtocolBufferException e) { - throw new IllegalArgumentException( - String.format("failed to parse WindowIntoPayload: %s", transform.getId()), e); - } - - @SuppressWarnings("unchecked") - final WindowFn windowFn = - (WindowFn) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn()); - - final MessageStream> inputStream = ctx.getOneInputMessageStream(transform); - - final MessageStream> outputStream = - inputStream.flatMapAsync(OpAdapter.adapt(new WindowAssignOp<>(windowFn), ctx)); - - ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/package-info.java deleted file mode 100644 index 6c25cbbf6055..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza.translation; diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/ConfigUtils.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/ConfigUtils.java deleted file mode 100644 index e1ce72679252..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/ConfigUtils.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.util.Map; -import org.apache.samza.config.ApplicationConfig; -import org.apache.samza.config.Config; -import org.apache.samza.config.JobConfig; -import org.apache.samza.config.MapConfig; - -/** Util class to operate Samza config. * */ -public class ConfigUtils { - - /** Convert a {@link Map} of config Strings into {@link Config}. */ - public static Config asSamzaConfig(Map config) { - return new MapConfig(config); - } - - /** Convert a {@link Map} of config Strings into {@link JobConfig}. */ - public static JobConfig asJobConfig(Map config) { - return new JobConfig(asSamzaConfig(config)); - } - - /** Convert a {@link Map} of config Strings into {@link JobConfig}. */ - public static ApplicationConfig asApplicationConfig(Map config) { - return new ApplicationConfig(asSamzaConfig(config)); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/DoFnUtils.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/DoFnUtils.java deleted file mode 100644 index 6fd6a6370a1e..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/DoFnUtils.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.commons.collections.CollectionUtils; - -/** Utils for {@link org.apache.beam.runners.samza.runtime.DoFnOp}. */ -public class DoFnUtils { - - public static String toStepName(ExecutableStage executableStage) { - /* - * Look for the first/input ParDo/DoFn in this executable stage by - * matching ParDo/DoFn's input PCollection with executable stage's - * input PCollection - */ - Set inputs = - executableStage.getTransforms().stream() - .filter( - transform -> - transform - .getTransform() - .getInputsMap() - .containsValue(executableStage.getInputPCollection().getId())) - .collect(Collectors.toSet()); - - Set outputIds = - executableStage.getOutputPCollections().stream() - .map(PipelineNode.PCollectionNode::getId) - .collect(Collectors.toSet()); - - /* - * Look for the last/output ParDo/DoFn in this executable stage by - * matching ParDo/DoFn's output PCollection(s) with executable stage's - * out PCollection(s) - */ - Set outputs = - executableStage.getTransforms().stream() - .filter( - transform -> - CollectionUtils.containsAny( - transform.getTransform().getOutputsMap().values(), outputIds)) - .collect(Collectors.toSet()); - - return String.format("[%s-%s]", toStepName(inputs), toStepName(outputs)); - } - - private static String toStepName(Set nodes) { - // TODO: format name when there are multiple input/output PTransform(s) in the ExecutableStage - return nodes.isEmpty() - ? "" - : Iterables.get( - Splitter.on('/').split(nodes.iterator().next().getTransform().getUniqueName()), 0); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/FutureUtils.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/FutureUtils.java deleted file mode 100644 index a6ca03512558..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/FutureUtils.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.util.Collection; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** A util class to handle java 8 {@link CompletableFuture} and {@link CompletionStage}. */ -@SuppressWarnings({"rawtypes"}) -public final class FutureUtils { - /** - * Flattens the input future collection and returns a single future comprising the results of all - * the futures. - * - * @param inputFutures input future collection - * @param result type of the input future - * @return a single {@link CompletionStage} that contains the results of all the input futures. - */ - public static CompletionStage> flattenFutures( - Collection> inputFutures) { - CompletableFuture[] futures = inputFutures.toArray(new CompletableFuture[0]); - - return CompletableFuture.allOf(futures) - .thenApply( - ignored -> { - final List result = - Stream.of(futures).map(CompletableFuture::join).collect(Collectors.toList()); - return result; - }); - } - - public static CompletionStage> combineFutures( - CompletionStage> future1, CompletionStage> future2) { - - if (future1 == null) { - return future2; - } else if (future2 == null) { - return future1; - } else { - return future1.thenCombine( - future2, - (c1, c2) -> { - c1.addAll(c2); - return c1; - }); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/HashIdGenerator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/HashIdGenerator.java deleted file mode 100644 index ecf2bc6de596..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/HashIdGenerator.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.util.HashSet; -import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class generates hash-based unique ids from String. The id length is the hash length and the - * suffix length combined. Ids generated are guaranteed to be unique, such that same names will be - * hashed to different ids. - */ -public class HashIdGenerator { - private static final Logger LOG = LoggerFactory.getLogger(HashIdGenerator.class); - - private static final int DEFAULT_MAX_HASH_LENGTH = 5; - private final int maxHashLength; - private final Set usedIds = new HashSet<>(); - - public HashIdGenerator(int maxHashLength) { - this.maxHashLength = maxHashLength; - } - - public HashIdGenerator() { - this(DEFAULT_MAX_HASH_LENGTH); - } - - public String getId(String name) { - // Use the id directly if it is unique and the length is less than max - if (name.length() <= maxHashLength && usedIds.add(name)) { - return name; - } - - // Pick the last bytes of hashcode and use hex format - final String hexString = Integer.toHexString(name.hashCode()); - final String origId = - hexString.length() <= maxHashLength - ? hexString - : hexString.substring(Math.max(0, hexString.length() - maxHashLength)); - String id = origId; - int suffixNum = 2; - while (!usedIds.add(id)) { - // A duplicate! Retry. - id = origId + "-" + suffixNum++; - } - LOG.info("Name {} is mapped to id {}", name, id); - return id; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/PipelineJsonRenderer.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/PipelineJsonRenderer.java deleted file mode 100644 index 1063b053e7a8..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/PipelineJsonRenderer.java +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.google.errorprone.annotations.DoNotCall; -import com.google.errorprone.annotations.FormatMethod; -import com.google.errorprone.annotations.FormatString; -import java.util.AbstractMap; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.ServiceLoader; -import java.util.function.Supplier; -import java.util.stream.Collectors; -import javax.annotation.Nullable; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.samza.SamzaRunner; -import org.apache.beam.runners.samza.translation.ConfigContext; -import org.apache.beam.runners.samza.translation.SamzaPipelineTranslator; -import org.apache.beam.runners.samza.translation.TransformTranslator; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.vendor.grpc.v1p69p0.com.google.gson.JsonArray; -import org.apache.beam.vendor.grpc.v1p69p0.com.google.gson.JsonObject; -import org.apache.beam.vendor.grpc.v1p69p0.com.google.gson.JsonParser; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterators; -import org.apache.samza.config.Config; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A JSON renderer for BEAM {@link Pipeline} DAG. This can help us with visualization of the Beam - * DAG. - */ -public class PipelineJsonRenderer implements Pipeline.PipelineVisitor { - private static final Logger LOG = LoggerFactory.getLogger(PipelineJsonRenderer.class); - private static final String TRANSFORM_IO_MAP_DELIMITER = ","; - - /** - * Interface to get I/O information for a Beam job. This will help add I/O information to the Beam - * DAG. - */ - public interface SamzaIOInfo { - - /** Get I/O topic name and cluster. */ - Optional getIOInfo(TransformHierarchy.Node node); - } - - /** A registrar for {@link SamzaIOInfo}. */ - public interface SamzaIORegistrar { - - SamzaIOInfo getSamzaIO(); - } - - private static final String OUTERMOST_NODE = "OuterMostNode"; - @Nullable private static final SamzaIOInfo SAMZA_IO_INFO = loadSamzaIOInfo(); - - /** - * This method creates a JSON representation of the Beam pipeline. - * - * @param pipeline The beam pipeline - * @param ctx Config context of the pipeline - * @return JSON string representation of the pipeline - */ - public static String toJsonString(Pipeline pipeline, ConfigContext ctx) { - final PipelineJsonRenderer visitor = new PipelineJsonRenderer(ctx); - pipeline.traverseTopologically(visitor); - return visitor.jsonBuilder.toString(); - } - - /** - * This method creates a JSON representation for Beam Portable Pipeline. - * - * @param pipeline The beam portable pipeline - * @return JSON string representation of the pipeline - */ - @DoNotCall("JSON DAG for portable pipeline is not supported yet.") - public static String toJsonString(RunnerApi.Pipeline pipeline) { - throw new UnsupportedOperationException("JSON DAG for portable pipeline is not supported yet."); - } - - private final StringBuilder jsonBuilder = new StringBuilder(); - private final StringBuilder graphLinks = new StringBuilder(); - - private final StringBuilder transformIoInfo = new StringBuilder(); - private final Map valueToProducerNodeName = new HashMap<>(); - private final ConfigContext ctx; - private int indent; - - private PipelineJsonRenderer(ConfigContext ctx) { - this.ctx = ctx; - } - - @Nullable - private static SamzaIOInfo loadSamzaIOInfo() { - final Iterator beamIORegistrarIterator = - ServiceLoader.load(SamzaIORegistrar.class).iterator(); - return beamIORegistrarIterator.hasNext() - ? Iterators.getOnlyElement(beamIORegistrarIterator).getSamzaIO() - : null; - } - - @Override - public void enterPipeline(Pipeline p) { - writeLine("{ \n \"RootNode\": ["); - graphLinks.append(",\"graphLinks\": ["); - - // Do a pre-scan and build transformIoInfo for input and output PValues of each transform - // TODO: Refactor PipelineJsonRenderer to use SamzaPipelineVisitor instead of PipelineVisitor to - // build Beam_JSON_GRAPH - final Map> transformIOMap = buildTransformIOMap(p, ctx); - buildTransformIoJson(transformIOMap); - - enterBlock(); - } - - @Override - public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) { - String fullName = node.getFullName(); - writeLine("{ \"fullName\":\"%s\",", assignNodeName(fullName)); - if (node.getEnclosingNode() != null) { - String enclosingNodeName = node.getEnclosingNode().getFullName(); - writeLine(" \"enclosingNode\":\"%s\",", assignNodeName(enclosingNodeName)); - } - - Optional ioInfo = getIOInfo(node); - if (ioInfo.isPresent() && !ioInfo.get().isEmpty()) { - writeLine(" \"ioInfo\":\"%s\",", escapeString(ioInfo.get())); - } - - writeLine(" \"ChildNodes\":["); - enterBlock(); - return CompositeBehavior.ENTER_TRANSFORM; - } - - @Override - public void leaveCompositeTransform(TransformHierarchy.Node node) { - exitBlock(); - writeLine("]},"); - } - - @Override - public void visitPrimitiveTransform(TransformHierarchy.Node node) { - String fullName = node.getFullName(); - writeLine("{ \"fullName\":\"%s\",", escapeString(fullName)); - String enclosingNodeName = node.getEnclosingNode().getFullName(); - writeLine(" \"enclosingNode\":\"%s\"},", assignNodeName(enclosingNodeName)); - - node.getOutputs().values().forEach(x -> valueToProducerNodeName.put(x, fullName)); - node.getInputs() - .forEach( - (key, value) -> { - final String producerName = valueToProducerNodeName.get(value); - graphLinks.append( - String.format("{\"from\":\"%s\"," + "\"to\":\"%s\"},", producerName, fullName)); - }); - } - - @Override - public void visitValue(PValue value, TransformHierarchy.Node producer) {} - - @Override - public void leavePipeline(Pipeline pipeline) { - exitBlock(); - writeLine("]"); - // delete the last comma - int lastIndex = graphLinks.length() - 1; - if (graphLinks.charAt(lastIndex) == ',') { - graphLinks.deleteCharAt(lastIndex); - } - graphLinks.append("]"); - jsonBuilder.append(graphLinks); - // Attach transformIoInfo - transformName to input and output PCollection(PValues) - jsonBuilder.append(transformIoInfo); - jsonBuilder.append("}"); - } - - private void buildTransformIoJson(Map> transformIOMap) { - transformIoInfo.append(",\"transformIOInfo\": ["); - transformIOMap.forEach( - (transform, ioInfo) -> { - transformIoInfo.append( - String.format( - "{\"transformName\":\"%s\"," + "\"inputs\":\"%s\"," + "\"outputs\":\"%s\"},", - transform, ioInfo.getKey(), ioInfo.getValue())); - }); - // delete the last extra comma - int lastIndex = transformIoInfo.length() - 1; - if (transformIoInfo.charAt(lastIndex) == ',') { - transformIoInfo.deleteCharAt(lastIndex); - } - transformIoInfo.append("]"); - } - - private void enterBlock() { - indent += 4; - } - - private void exitBlock() { - indent -= 4; - } - - @FormatMethod - private void writeLine(@FormatString String format, Object... args) { - // Since we append a comma after every entry to the graph, we will need to remove that one extra - // comma towards the end of the JSON. - int secondLastCharIndex = jsonBuilder.length() - 2; - if (jsonBuilder.length() > 1 - && jsonBuilder.charAt(secondLastCharIndex) == ',' - && (format.startsWith("}") || format.startsWith("]"))) { - jsonBuilder.deleteCharAt(secondLastCharIndex); - } - if (indent != 0) { - jsonBuilder.append(String.format("%-" + indent + "s", "")); - } - jsonBuilder.append(String.format(format, args)); - jsonBuilder.append("\n"); - } - - private static String escapeString(String x) { - return x.replace("\"", "\\\""); - } - - private String assignNodeName(String nodeName) { - return escapeString(nodeName.isEmpty() ? OUTERMOST_NODE : nodeName); - } - - private Optional getIOInfo(TransformHierarchy.Node node) { - if (SAMZA_IO_INFO == null) { - return Optional.empty(); - } - return SAMZA_IO_INFO.getIOInfo(node); - } - - /** - * Builds a map from PTransform to its input and output PValues. The map is serialized as part of - * Beam_JSON_GRAPH - * - *

Please note this map needs to be built using SamzaPipelineVisitor instead of generic - * PipelineVisitor used here, reason being SamzaPipelineVisitor traverses the pipeline differently - * i.e. if a composite transform can be translated directly it won't further expand it. - * PipelineVisitor used here is not runner dependent visitor, its just used here for rendering - * purposes - */ - @VisibleForTesting - static Map> buildTransformIOMap( - Pipeline pipeline, ConfigContext ctx) { - final Map> pTransformToInputOutputMap = new HashMap<>(); - final SamzaPipelineTranslator.TransformVisitorFn configFn = - new SamzaPipelineTranslator.TransformVisitorFn() { - @Override - public > void apply( - T transform, - TransformHierarchy.Node node, - Pipeline pipeline, - TransformTranslator translator) { - ctx.setCurrentTransform(node.toAppliedPTransform(pipeline)); - List inputs = getIOPValueList(node.getInputs()).get(); - List outputs = getIOPValueList(node.getOutputs()).get(); - pTransformToInputOutputMap.put( - node.getFullName(), - new AbstractMap.SimpleEntry<>( - String.join(TRANSFORM_IO_MAP_DELIMITER, inputs), - String.join(TRANSFORM_IO_MAP_DELIMITER, outputs))); - ctx.clearCurrentTransform(); - } - }; - - final SamzaPipelineTranslator.SamzaPipelineVisitor visitor = - new SamzaPipelineTranslator.SamzaPipelineVisitor(configFn); - pipeline.traverseTopologically(visitor); - return pTransformToInputOutputMap; - } - - private static Supplier> getIOPValueList(Map, PCollection> map) { - return () -> map.values().stream().map(pColl -> pColl.getName()).collect(Collectors.toList()); - } - - // Reads the config to build transformIOMap, i.e. map of inputs & output PValues for each - // PTransform - public static Map, List>> getTransformIOMap( - Config config) { - checkNotNull(config, "Config cannot be null"); - final Map, List>> result = new HashMap<>(); - final String pipelineJsonGraph = config.get(SamzaRunner.BEAM_JSON_GRAPH); - if (pipelineJsonGraph == null) { - LOG.warn( - "Cannot build transformIOMap since Config: {} is found null ", - SamzaRunner.BEAM_JSON_GRAPH); - return result; - } - JsonObject jsonObject = JsonParser.parseString(pipelineJsonGraph).getAsJsonObject(); - JsonArray transformIOInfo = jsonObject.getAsJsonArray("transformIOInfo"); - transformIOInfo.forEach( - transform -> { - final String transformName = - transform.getAsJsonObject().get("transformName").getAsString(); - final String inputs = transform.getAsJsonObject().get("inputs").getAsString(); - final String outputs = transform.getAsJsonObject().get("outputs").getAsString(); - result.put(transformName, new AbstractMap.SimpleEntry<>(ioFunc(inputs), ioFunc(outputs))); - }); - return result; - } - - private static List ioFunc(String ioList) { - return Arrays.stream(ioList.split(PipelineJsonRenderer.TRANSFORM_IO_MAP_DELIMITER)) - .filter(item -> !item.isEmpty()) - .collect(Collectors.toList()); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/PortableConfigUtils.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/PortableConfigUtils.java deleted file mode 100644 index ee0bd7b72bb0..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/PortableConfigUtils.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.util.Map; -import org.apache.beam.runners.samza.SamzaPipelineOptions; - -/** A utility class to encapsulate. */ -public class PortableConfigUtils { - public static final String BEAM_PORTABLE_MODE = "beam.portable.mode"; - - private PortableConfigUtils() {} - - /** - * A helper method to distinguish if a pipeline is run using portable mode or classic mode. - * - * @param options pipeline options - * @return true if the pipeline is run in portable mode - */ - public static boolean isPortable(SamzaPipelineOptions options) { - Map override = options.getConfigOverride(); - if (override == null) { - return false; - } - - return Boolean.parseBoolean(override.getOrDefault(BEAM_PORTABLE_MODE, "false")); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaCoders.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaCoders.java deleted file mode 100644 index f546303b9b9b..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaCoders.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.samza.serializers.Serde; - -/** Utils for Coders creation/conversion in Samza. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaCoders { - - private SamzaCoders() {} - - public static Coder> of(PCollection pCollection) { - final Coder coder = pCollection.getCoder(); - final Coder windowCoder = - pCollection.getWindowingStrategy().getWindowFn().windowCoder(); - return WindowedValues.FullWindowedValueCoder.of(coder, windowCoder); - } - - public static Serde toSerde(final Coder coder) { - return new Serde() { - @Override - public T fromBytes(byte[] bytes) { - if (bytes != null) { - final ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - try { - return (T) coder.decode(bais); - } catch (IOException e) { - throw new RuntimeException(e); - } - } else { - return null; - } - } - - @Override - public byte[] toBytes(T t) { - if (t != null) { - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - coder.encode(t, baos); - } catch (IOException e) { - throw new RuntimeException(e); - } - return baos.toByteArray(); - } else { - return null; - } - } - }; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaPipelineExceptionListener.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaPipelineExceptionListener.java deleted file mode 100644 index 7715b183b5c9..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaPipelineExceptionListener.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import org.apache.beam.runners.samza.SamzaPipelineExceptionContext; -import org.apache.beam.runners.samza.SamzaPipelineOptions; - -/** - * An ExceptionListener following Observer pattern. Any runtime exception caught by {@code - * OpAdapter} will be notified to any concrete SamzaPipelineExceptionListener at Runtime - */ -public interface SamzaPipelineExceptionListener { - - void onException(SamzaPipelineExceptionContext exceptionContext); - - interface Registrar { - SamzaPipelineExceptionListener getExceptionListener(SamzaPipelineOptions samzaPipelineOptions); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaPipelineTranslatorUtils.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaPipelineTranslatorUtils.java deleted file mode 100644 index 999506e1e021..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/SamzaPipelineTranslatorUtils.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.io.IOException; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.fnexecution.wire.WireCoders; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.WindowedValues; - -/** Utilities for pipeline translation. */ -@SuppressWarnings({ - "rawtypes" // TODO(https://github.com/apache/beam/issues/20447) -}) -public final class SamzaPipelineTranslatorUtils { - private SamzaPipelineTranslatorUtils() {} - - public static WindowedValues.WindowedValueCoder instantiateCoder( - String collectionId, RunnerApi.Components components) { - PipelineNode.PCollectionNode collectionNode = - PipelineNode.pCollection(collectionId, components.getPcollectionsOrThrow(collectionId)); - try { - return (WindowedValues.WindowedValueCoder) - WireCoders.instantiateRunnerWireCoder(collectionNode, components); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * Escape the non-alphabet chars in the name so we can create a physical stream out of it. - * - *

This escape will replace any non-alphanumeric characters other than "-" and "_" with "_" - * including whitespace. - */ - public static String escape(String name) { - return name.replaceFirst(".*:([a-zA-Z#0-9]+).*", "$1").replaceAll("[^A-Za-z0-9_-]", "_"); - } - - public static PCollection.IsBounded isBounded(RunnerApi.PCollection pCollection) { - return pCollection.getIsBounded() == RunnerApi.IsBounded.Enum.BOUNDED - ? PCollection.IsBounded.BOUNDED - : PCollection.IsBounded.UNBOUNDED; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/StateUtils.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/StateUtils.java deleted file mode 100644 index 1db9db74074e..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/StateUtils.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.reflect.DoFnSignatures; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; - -/** Utils for determining stateful operators. */ -public class StateUtils { - - public static boolean isStateful(DoFn doFn) { - return DoFnSignatures.isStateful(doFn); - } - - public static boolean isStateful(RunnerApi.ExecutableStagePayload stagePayload) { - return stagePayload.getUserStatesCount() > 0 || stagePayload.getTimersCount() > 0; - } - - public static boolean isStateful(ExecutableStage executableStage) { - return executableStage.getUserStates().size() > 0 || executableStage.getTimers().size() > 0; - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/StoreIdGenerator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/StoreIdGenerator.java deleted file mode 100644 index aefba0186c8d..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/StoreIdGenerator.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.util.Set; - -/** - * This class encapsulates the logic to generate unique store id. For unique state ids across the - * Beam pipeline, store id is the same as the state id. For non-unique state ids, join the state id - * with an escaped transform name to generate a unique store id. - */ -public class StoreIdGenerator { - - private final Set nonUniqueStateIds; - - public StoreIdGenerator(Set nonUniqueStateId) { - this.nonUniqueStateIds = nonUniqueStateId; - } - - public String getId(String stateId, String transformFullName) { - String storeId = stateId; - if (nonUniqueStateIds.contains(stateId)) { - final String escapedName = SamzaPipelineTranslatorUtils.escape(transformFullName); - storeId = toUniqueStoreId(stateId, escapedName); - } - return storeId; - } - - /** Join state id and escaped PTransform name to uniquify store id. */ - private static String toUniqueStoreId(String stateId, String escapedPTransformName) { - return String.join("-", stateId, escapedPTransformName); - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/WindowUtils.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/WindowUtils.java deleted file mode 100644 index 7f39a9739c59..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/WindowUtils.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.io.IOException; -import org.apache.beam.model.pipeline.v1.RunnerApi; -import org.apache.beam.runners.fnexecution.wire.WireCoders; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.construction.RehydratedComponents; -import org.apache.beam.sdk.util.construction.WindowingStrategyTranslation; -import org.apache.beam.sdk.util.construction.graph.PipelineNode; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.sdk.values.WindowingStrategy; - -/** Utils for window operations. */ -public class WindowUtils { - - /** Get {@link WindowingStrategy} of given collection id from {@link RunnerApi.Components}. */ - public static WindowingStrategy getWindowStrategy( - String collectionId, RunnerApi.Components components) { - RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components); - - RunnerApi.WindowingStrategy windowingStrategyProto = - components.getWindowingStrategiesOrThrow( - components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId()); - - WindowingStrategy windowingStrategy; - try { - windowingStrategy = - WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents); - } catch (Exception e) { - throw new IllegalStateException( - String.format( - "Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), - e); - } - - @SuppressWarnings("unchecked") - WindowingStrategy ret = - (WindowingStrategy) windowingStrategy; - return ret; - } - - /** - * Instantiate {@link WindowedValues.WindowedValueCoder} for given collection id from {@link - * RunnerApi.Components}. - */ - public static WindowedValues.WindowedValueCoder instantiateWindowedCoder( - String collectionId, RunnerApi.Components components) { - PipelineNode.PCollectionNode collectionNode = - PipelineNode.pCollection(collectionId, components.getPcollectionsOrThrow(collectionId)); - try { - return (WindowedValues.WindowedValueCoder) - WireCoders.instantiateRunnerWireCoder(collectionNode, components); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/package-info.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/util/package-info.java deleted file mode 100644 index 23290fd4ffbb..000000000000 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/util/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Internal implementation of the Beam runner for Apache Samza. */ -package org.apache.beam.runners.samza.util; diff --git a/runners/samza/src/main/resources/log4j.properties b/runners/samza/src/main/resources/log4j.properties deleted file mode 100644 index 3ad91be1b55a..000000000000 --- a/runners/samza/src/main/resources/log4j.properties +++ /dev/null @@ -1,23 +0,0 @@ -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -# Update root logger to WARN and add log4j.category.org.apache.beam=INFO if executing in Intellij -log4j.rootLogger=INFO,console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n diff --git a/runners/samza/src/main/resources/samza-conf.properties b/runners/samza/src/main/resources/samza-conf.properties deleted file mode 100644 index 80ec36826797..000000000000 --- a/runners/samza/src/main/resources/samza-conf.properties +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# License); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# job config -processor.id=1 - -# default kafka system config -job.default.system=default -systems.default.samza.factory=org.apache.samza.system.kafka.KafkaSystemFactory -systems.default.consumer.zookeeper.connect=localhost:2181 -systems.default.producer.bootstrap.servers=localhost:9092 -systems.default.default.stream.replication.factor=1 - -# local deployment -app.runner.class=org.apache.samza.runtime.LocalApplicationRunner -job.coordinator.factory=org.apache.samza.standalone.PassthroughJobCoordinatorFactory -job.coordination.utils.factory=org.apache.samza.standalone.PassthroughCoordinationUtilsFactory -task.name.grouper.factory=org.apache.samza.container.grouper.task.SingleContainerGrouperFactory -task.commit.ms=-1 - -# jmx metrics reporter -metrics.reporters=jmx -metrics.reporter.jmx.class=org.apache.samza.metrics.reporter.JmxReporterFactory \ No newline at end of file diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidatorTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidatorTest.java deleted file mode 100644 index 2ef56c7ce63c..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/SamzaPipelineOptionsValidatorTest.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza; - -import static org.apache.beam.runners.samza.SamzaPipelineOptionsValidator.validateBundlingRelatedOptions; -import static org.apache.samza.config.JobConfig.JOB_CONTAINER_THREAD_POOL_SIZE; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.util.Collections; -import java.util.Map; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.junit.Test; - -/** Test for {@link SamzaPipelineOptionsValidator}. */ -public class SamzaPipelineOptionsValidatorTest { - - @Test(expected = IllegalArgumentException.class) - public void testBundleEnabledInMultiThreadedModeThrowsException() { - SamzaPipelineOptions mockOptions = mock(SamzaPipelineOptions.class); - Map config = ImmutableMap.of(JOB_CONTAINER_THREAD_POOL_SIZE, "10"); - - when(mockOptions.getMaxBundleSize()).thenReturn(2L); - when(mockOptions.getConfigOverride()).thenReturn(config); - validateBundlingRelatedOptions(mockOptions); - } - - @Test - public void testBundleEnabledInSingleThreadedMode() { - SamzaPipelineOptions mockOptions = mock(SamzaPipelineOptions.class); - when(mockOptions.getMaxBundleSize()).thenReturn(2L); - - try { - Map config = ImmutableMap.of(JOB_CONTAINER_THREAD_POOL_SIZE, "1"); - when(mockOptions.getConfigOverride()).thenReturn(config); - validateBundlingRelatedOptions(mockOptions); - - // In the absence of configuration make sure it is treated as single threaded mode. - when(mockOptions.getConfigOverride()).thenReturn(Collections.emptyMap()); - validateBundlingRelatedOptions(mockOptions); - } catch (Exception e) { - throw new AssertionError("Bundle size > 1 should be supported in single threaded mode"); - } - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/BoundedSourceSystemTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/BoundedSourceSystemTest.java deleted file mode 100644 index a6bc9940a745..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/BoundedSourceSystemTest.java +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.adapter; - -import static org.apache.beam.runners.samza.adapter.TestSourceHelpers.createElementMessage; -import static org.apache.beam.runners.samza.adapter.TestSourceHelpers.createEndOfStreamMessage; -import static org.apache.beam.runners.samza.adapter.TestSourceHelpers.createWatermarkMessage; -import static org.apache.beam.runners.samza.adapter.TestSourceHelpers.expectWrappedException; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.metrics.SamzaMetricsContainer; -import org.apache.beam.sdk.io.BoundedSource; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets; -import org.apache.samza.Partition; -import org.apache.samza.metrics.MetricsRegistryMap; -import org.apache.samza.system.IncomingMessageEnvelope; -import org.apache.samza.system.SystemConsumer; -import org.apache.samza.system.SystemStreamPartition; -import org.joda.time.Instant; -import org.junit.Test; - -/** Tests for {@link BoundedSourceSystem}. */ -public class BoundedSourceSystemTest { - private static final SystemStreamPartition DEFAULT_SSP = - new SystemStreamPartition("default-system", "default-system", new Partition(0)); - - // A reasonable time to wait to get all messages from the bounded source assuming no blocking. - private static final long DEFAULT_TIMEOUT_MILLIS = 1000; - private static final String NULL_STRING = null; - - @Test - public void testConsumerStartStop() throws IOException, InterruptedException { - final TestBoundedSource source = TestBoundedSource.createBuilder().build(); - - final BoundedSourceSystem.Consumer consumer = createConsumer(source); - - consumer.register(DEFAULT_SSP, "0"); - consumer.start(); - assertEquals( - Arrays.asList( - createWatermarkMessage(DEFAULT_SSP, BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(DEFAULT_SSP)), - consumeUntilTimeoutOrEos(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testConsumeOneMessage() throws IOException, InterruptedException { - final TestBoundedSource source = - TestBoundedSource.createBuilder().addElements("test").build(); - - final BoundedSourceSystem.Consumer consumer = createConsumer(source); - - consumer.register(DEFAULT_SSP, "0"); - consumer.start(); - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, "0", "test", BoundedWindow.TIMESTAMP_MIN_VALUE), - createWatermarkMessage(DEFAULT_SSP, BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(DEFAULT_SSP)), - consumeUntilTimeoutOrEos(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testAdvanceTimestamp() throws InterruptedException { - final Instant timestamp = Instant.now(); - - final TestBoundedSource source = - TestBoundedSource.createBuilder() - .addElements("before") - .setTimestamp(timestamp) - .addElements("after") - .build(); - - final BoundedSourceSystem.Consumer consumer = createConsumer(source); - - consumer.register(DEFAULT_SSP, "0"); - consumer.start(); - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, "0", "before", BoundedWindow.TIMESTAMP_MIN_VALUE), - createElementMessage(DEFAULT_SSP, "1", "after", timestamp), - createWatermarkMessage(DEFAULT_SSP, BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(DEFAULT_SSP)), - consumeUntilTimeoutOrEos(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testConsumeMultipleMessages() throws IOException, InterruptedException { - final TestBoundedSource source = - TestBoundedSource.createBuilder() - .addElements("test", "a", "few", "messages") - .build(); - - final BoundedSourceSystem.Consumer consumer = createConsumer(source); - - consumer.register(DEFAULT_SSP, "0"); - consumer.start(); - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, "0", "test", BoundedWindow.TIMESTAMP_MIN_VALUE), - createElementMessage(DEFAULT_SSP, "1", "a", BoundedWindow.TIMESTAMP_MIN_VALUE), - createElementMessage(DEFAULT_SSP, "2", "few", BoundedWindow.TIMESTAMP_MIN_VALUE), - createElementMessage(DEFAULT_SSP, "3", "messages", BoundedWindow.TIMESTAMP_MIN_VALUE), - createWatermarkMessage(DEFAULT_SSP, BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(DEFAULT_SSP)), - consumeUntilTimeoutOrEos(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testReaderThrowsAtStart() throws Exception { - final IOException exception = new IOException("Expected exception"); - - final TestBoundedSource source = - TestBoundedSource.createBuilder().addException(exception).build(); - - final BoundedSourceSystem.Consumer consumer = createConsumer(source); - - consumer.register(DEFAULT_SSP, "0"); - consumer.start(); - expectWrappedException( - exception, () -> consumeUntilTimeoutOrEos(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testReaderThrowsAtAdvance() throws Exception { - final IOException exception = new IOException("Expected exception"); - - final TestBoundedSource source = - TestBoundedSource.createBuilder() - .addElements("test", "a", "few", "good", "messages", "then", "...") - .addException(exception) - .build(); - - final BoundedSourceSystem.Consumer consumer = createConsumer(source); - - consumer.register(DEFAULT_SSP, "0"); - consumer.start(); - expectWrappedException( - exception, () -> consumeUntilTimeoutOrEos(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testTimeout() throws Exception { - final CountDownLatch advanceLatch = new CountDownLatch(1); - - final TestBoundedSource source = - TestBoundedSource.createBuilder() - .addElements("before") - .addLatch(advanceLatch) - .addElements("after") - .build(); - - final BoundedSourceSystem.Consumer consumer = createConsumer(source); - - consumer.register(DEFAULT_SSP, "0"); - consumer.start(); - assertEquals( - Collections.singletonList( - createElementMessage(DEFAULT_SSP, "0", "before", BoundedWindow.TIMESTAMP_MIN_VALUE)), - consumeUntilTimeoutOrEos(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - - advanceLatch.countDown(); - - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, "1", "after", BoundedWindow.TIMESTAMP_MIN_VALUE), - createWatermarkMessage(DEFAULT_SSP, BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(DEFAULT_SSP)), - consumeUntilTimeoutOrEos(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testSplit() throws IOException, InterruptedException { - final TestBoundedSource.SplittableBuilder builder = - TestBoundedSource.createSplits(3); - builder.forSplit(0).addElements("split-0"); - builder.forSplit(1).addElements("split-1"); - builder.forSplit(2).addElements("split-2"); - final TestBoundedSource source = builder.build(); - - final BoundedSourceSystem.Consumer consumer = createConsumer(source, 3); - - consumer.register(ssp(0), NULL_STRING); - consumer.register(ssp(1), NULL_STRING); - consumer.register(ssp(2), NULL_STRING); - consumer.start(); - - final Set offsets = new HashSet<>(); - - // check split0 - List envelopes = - consumeUntilTimeoutOrEos(consumer, ssp(0), DEFAULT_TIMEOUT_MILLIS); - assertEquals( - Arrays.asList( - createElementMessage( - ssp(0), envelopes.get(0).getOffset(), "split-0", BoundedWindow.TIMESTAMP_MIN_VALUE), - createWatermarkMessage(ssp(0), BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(ssp(0))), - envelopes); - offsets.add(envelopes.get(0).getOffset()); - - // check split1 - envelopes = consumeUntilTimeoutOrEos(consumer, ssp(1), DEFAULT_TIMEOUT_MILLIS); - assertEquals( - Arrays.asList( - createElementMessage( - ssp(1), envelopes.get(0).getOffset(), "split-1", BoundedWindow.TIMESTAMP_MIN_VALUE), - createWatermarkMessage(ssp(1), BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(ssp(1))), - envelopes); - offsets.add(envelopes.get(0).getOffset()); - - // check split2 - envelopes = consumeUntilTimeoutOrEos(consumer, ssp(2), DEFAULT_TIMEOUT_MILLIS); - assertEquals( - Arrays.asList( - createElementMessage( - ssp(2), envelopes.get(0).getOffset(), "split-2", BoundedWindow.TIMESTAMP_MIN_VALUE), - createWatermarkMessage(ssp(2), BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(ssp(2))), - envelopes); - offsets.add(envelopes.get(0).getOffset()); - - // check offsets - assertEquals(Sets.newHashSet("0", "1", "2"), offsets); - consumer.stop(); - } - - private static List consumeUntilTimeoutOrEos( - SystemConsumer consumer, SystemStreamPartition ssp, long timeoutMillis) - throws InterruptedException { - assertTrue("Expected timeoutMillis (" + timeoutMillis + ") >= 0", timeoutMillis >= 0); - - final List accumulator = new ArrayList<>(); - final long start = System.currentTimeMillis(); - long now = start; - while (timeoutMillis + start >= now) { - accumulator.addAll(pollOnce(consumer, ssp, now - start - timeoutMillis)); - if (!accumulator.isEmpty() && accumulator.get(accumulator.size() - 1).isEndOfStream()) { - break; - } - now = System.currentTimeMillis(); - } - return accumulator; - } - - private static List pollOnce( - SystemConsumer consumer, SystemStreamPartition ssp, long timeoutMillis) - throws InterruptedException { - final Set sspSet = Collections.singleton(ssp); - final Map> pollResult = - consumer.poll(sspSet, timeoutMillis); - assertEquals(sspSet, pollResult.keySet()); - assertNotNull(pollResult.get(ssp)); - return pollResult.get(ssp); - } - - private static BoundedSourceSystem.Consumer createConsumer(BoundedSource source) { - return createConsumer(source, 1); - } - - private static BoundedSourceSystem.Consumer createConsumer( - BoundedSource source, int splitNum) { - SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.as(SamzaPipelineOptions.class); - pipelineOptions.setMaxSourceParallelism(splitNum); - return new BoundedSourceSystem.Consumer<>( - source, pipelineOptions, new SamzaMetricsContainer(new MetricsRegistryMap()), "test-step"); - } - - private static SystemStreamPartition ssp(int partition) { - return new SystemStreamPartition("default-system", "default-system", new Partition(partition)); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestBoundedSource.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestBoundedSource.java deleted file mode 100644 index 928d2887eec2..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestBoundedSource.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.adapter; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.stream.Collectors; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.ElementEvent; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.Event; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.ExceptionEvent; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.LatchEvent; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.SourceBuilder; -import org.apache.beam.sdk.io.BoundedSource; -import org.apache.beam.sdk.options.PipelineOptions; -import org.joda.time.Instant; - -/** A bounded source that can be used for test purposes. */ -public class TestBoundedSource extends BoundedSource { - // each list of events is a split - private final List>> events; - - public static Builder createBuilder() { - return new Builder<>(); - } - - public static SplittableBuilder createSplits(int numSplits) { - return new SplittableBuilder<>(numSplits); - } - - private TestBoundedSource(List>> events) { - this.events = Collections.unmodifiableList(new ArrayList<>(events)); - } - - @Override - public List> split( - long desiredBundleSizeBytes, PipelineOptions options) throws Exception { - return events.stream() - .map(ev -> new TestBoundedSource<>(Collections.singletonList(ev))) - .collect(Collectors.toList()); - } - - @Override - public long getEstimatedSizeBytes(PipelineOptions options) throws Exception { - return events.size(); - } - - @Override - public BoundedReader createReader(PipelineOptions options) throws IOException { - assert events.size() == 1; - - return new Reader(events.get(0)); - } - - @Override - public void validate() {} - - /** A builder used to populate the events emitted by {@link TestBoundedSource}. */ - public static class Builder extends SourceBuilder> { - @Override - public TestBoundedSource build() { - return new TestBoundedSource<>(Collections.singletonList(getEvents())); - } - } - - /** - * A SplittableBuilder supports multiple splits and each split {@link TestUnboundedSource} can be - * built separately from the above Builder. - */ - public static class SplittableBuilder extends SourceBuilder> { - private final List> builders = new ArrayList<>(); - - private SplittableBuilder(int splits) { - while (splits != 0) { - builders.add(new Builder()); - --splits; - } - } - - @Override - public TestBoundedSource build() { - final List>> events = new ArrayList<>(); - builders.forEach(builder -> events.add(builder.getEvents())); - return new TestBoundedSource<>(events); - } - - public Builder forSplit(int split) { - return builders.get(split); - } - } - - private class Reader extends BoundedReader { - private final List> events; - private boolean started; - private boolean finished; - private int index = -1; - - private Reader(List> events) { - this.events = events; - } - - @Override - public boolean start() throws IOException { - if (started) { - throw new IllegalStateException("Start called when reader was already started"); - } - started = true; - return advance(); - } - - @Override - public boolean advance() throws IOException { - if (!started) { - throw new IllegalStateException("Advance called when reader was not started"); - } - - if (finished) { - return false; - } - - for (++index; index < events.size(); ++index) { - final Event event = events.get(index); - if (event instanceof ExceptionEvent) { - throw ((ExceptionEvent) event).exception; - } else if (event instanceof LatchEvent) { - try { - ((LatchEvent) event).latch.await(); - } catch (InterruptedException e) { - // Propagate interrupt - Thread.currentThread().interrupt(); - } - } else { - return true; - } - } - - finished = true; - return false; - } - - @Override - public T getCurrent() throws NoSuchElementException { - if (!started || finished) { - throw new NoSuchElementException(); - } - - final Event event = events.get(index); - assert event instanceof ElementEvent; - return ((ElementEvent) event).element; - } - - @Override - public Instant getCurrentTimestamp() throws NoSuchElementException { - if (!started || finished) { - throw new NoSuchElementException(); - } - - final Event event = events.get(index); - assert event instanceof ElementEvent; - return ((ElementEvent) event).timestamp; - } - - @Override - public void close() throws IOException {} - - @Override - public BoundedSource getCurrentSource() { - return TestBoundedSource.this; - } - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestCheckpointMark.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestCheckpointMark.java deleted file mode 100644 index d526d859dea4..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestCheckpointMark.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.adapter; - -import java.io.IOException; -import java.io.Serializable; -import org.apache.beam.sdk.io.UnboundedSource; - -/** A integer CheckpointMark for testing. */ -public class TestCheckpointMark implements UnboundedSource.CheckpointMark, Serializable { - final int checkpoint; - - private TestCheckpointMark(int checkpoint) { - this.checkpoint = checkpoint; - } - - @Override - public void finalizeCheckpoint() throws IOException { - // DO NOTHING - } - - static TestCheckpointMark of(int checkpoint) { - return new TestCheckpointMark(checkpoint); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestSourceHelpers.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestSourceHelpers.java deleted file mode 100644 index 60c0c27b8d1f..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestSourceHelpers.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.adapter; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.CountDownLatch; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.sdk.io.Source; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.samza.system.IncomingMessageEnvelope; -import org.apache.samza.system.SystemStreamPartition; -import org.joda.time.Instant; - -/** Helper classes and functions to build source for testing. */ -public class TestSourceHelpers { - - private TestSourceHelpers() {} - - interface Event {} - - static class ElementEvent implements Event { - final T element; - final Instant timestamp; - - private ElementEvent(T element, Instant timestamp) { - this.element = element; - this.timestamp = timestamp; - } - } - - static class WatermarkEvent implements Event { - final Instant watermark; - - private WatermarkEvent(Instant watermark) { - this.watermark = watermark; - } - } - - static class ExceptionEvent implements Event { - final IOException exception; - - private ExceptionEvent(IOException exception) { - this.exception = exception; - } - } - - static class LatchEvent implements Event { - final CountDownLatch latch; - - private LatchEvent(CountDownLatch latch) { - this.latch = latch; - } - } - - static class NoElementEvent implements Event {} - - /** A builder used to populate the events emitted by {@link TestBoundedSource}. */ - abstract static class SourceBuilder> { - private final List> events = new ArrayList<>(); - private Instant currentTimestamp = BoundedWindow.TIMESTAMP_MIN_VALUE; - - @SafeVarargs - public final SourceBuilder addElements(T... elements) { - for (T element : elements) { - events.add(new ElementEvent<>(element, currentTimestamp)); - } - return this; - } - - public SourceBuilder addException(IOException exception) { - events.add(new ExceptionEvent<>(exception)); - return this; - } - - public SourceBuilder addLatch(CountDownLatch latch) { - events.add(new LatchEvent<>(latch)); - return this; - } - - public SourceBuilder setTimestamp(Instant timestamp) { - assertTrue( - "Expected " + timestamp + " to be greater than or equal to " + currentTimestamp, - timestamp.isEqual(currentTimestamp) || timestamp.isAfter(currentTimestamp)); - currentTimestamp = timestamp; - return this; - } - - public SourceBuilder advanceWatermarkTo(Instant watermark) { - events.add(new WatermarkEvent<>(watermark)); - return this; - } - - public SourceBuilder noElements() { - events.add(new NoElementEvent()); - return this; - } - - protected List> getEvents() { - return this.events; - } - - public abstract W build(); - } - - static IncomingMessageEnvelope createElementMessage( - SystemStreamPartition ssp, String offset, String element, Instant timestamp) { - return new IncomingMessageEnvelope( - ssp, - offset, - null, - OpMessage.ofElement(WindowedValues.timestampedValueInGlobalWindow(element, timestamp))); - } - - static IncomingMessageEnvelope createWatermarkMessage( - SystemStreamPartition ssp, Instant watermark) { - return IncomingMessageEnvelope.buildWatermarkEnvelope(ssp, watermark.getMillis()); - } - - static IncomingMessageEnvelope createEndOfStreamMessage(SystemStreamPartition ssp) { - return IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp); - } - - static void expectWrappedException(Exception expectedException, Callable callable) - throws Exception { - try { - callable.call(); - fail("Expected exception (" + expectedException + "), but no exception was thrown"); - } catch (Exception e) { - Throwable currentException = e; - while (currentException != null) { - if (currentException.equals(expectedException)) { - return; - } - currentException = currentException.getCause(); - } - assertEquals(expectedException, e); - } - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestUnboundedSource.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestUnboundedSource.java deleted file mode 100644 index 0439354d67ea..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/TestUnboundedSource.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.adapter; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.stream.Collectors; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.ElementEvent; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.Event; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.ExceptionEvent; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.LatchEvent; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.NoElementEvent; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.SourceBuilder; -import org.apache.beam.runners.samza.adapter.TestSourceHelpers.WatermarkEvent; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.SerializableCoder; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** - * A unbounded source that can be used for test purposes. - * - * @param element type - */ -public class TestUnboundedSource extends UnboundedSource { - // each list of events is a split - private final List>> events; - - public static Builder createBuilder() { - return new Builder<>(); - } - - public static SplittableBuilder createSplits(int numSplits) { - return new SplittableBuilder<>(numSplits); - } - - private TestUnboundedSource(List>> events) { - this.events = Collections.unmodifiableList(new ArrayList<>(events)); - } - - @Override - public List> split( - int desiredNumSplits, PipelineOptions options) throws Exception { - return events.stream() - .map(ev -> new TestUnboundedSource<>(Collections.singletonList(ev))) - .collect(Collectors.toList()); - } - - @Override - public UnboundedReader createReader( - PipelineOptions options, @Nullable TestCheckpointMark checkpointMark) throws IOException { - assert events.size() == 1; - - return new Reader(events.get(0), checkpointMark); - } - - @Override - public Coder getCheckpointMarkCoder() { - return SerializableCoder.of(TestCheckpointMark.class); - } - - @Override - public void validate() {} - - /** A builder used to populate the events emitted by {@link TestUnboundedSource}. */ - public static class Builder extends SourceBuilder> { - - @Override - public TestUnboundedSource build() { - return new TestUnboundedSource<>(Collections.singletonList(getEvents())); - } - } - - /** - * A SplittableBuilder supports multiple splits and each split {@link TestUnboundedSource} can be - * built separately from the above Builder. - */ - public static class SplittableBuilder extends SourceBuilder> { - private final List> builders = new ArrayList<>(); - - private SplittableBuilder(int splits) { - while (splits != 0) { - builders.add(new Builder()); - --splits; - } - } - - @Override - public TestUnboundedSource build() { - List>> events = new ArrayList<>(); - builders.forEach(builder -> events.add(builder.getEvents())); - return new TestUnboundedSource<>(events); - } - - public Builder forSplit(int split) { - return builders.get(split); - } - } - - private class Reader extends UnboundedReader { - private final List> events; - private Instant curTime = BoundedWindow.TIMESTAMP_MIN_VALUE; - private Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE; - private boolean started; - private int index = -1; - private int offset; - - private Reader(List> events, TestCheckpointMark checkpointMark) { - this.events = events; - this.offset = checkpointMark == null ? -1 : checkpointMark.checkpoint; - } - - @Override - public boolean start() throws IOException { - if (started) { - throw new IllegalStateException("Start called when reader was already started"); - } - started = true; - return advance(); - } - - @Override - public boolean advance() throws IOException { - if (!started) { - throw new IllegalStateException("Advance called when reader was not started"); - } - - for (++index; index < events.size(); ++index) { - final Event event = events.get(index); - if (event instanceof ExceptionEvent) { - throw ((ExceptionEvent) event).exception; - } else if (event instanceof LatchEvent) { - try { - ((LatchEvent) event).latch.await(); - } catch (InterruptedException e) { - // Propagate interrupt - Thread.currentThread().interrupt(); - } - } else if (event instanceof WatermarkEvent) { - watermark = ((WatermarkEvent) event).watermark; - } else if (event instanceof NoElementEvent) { - return false; - } else { - curTime = ((ElementEvent) event).timestamp; - ++offset; - return true; - } - } - - return false; - } - - @Override - public T getCurrent() throws NoSuchElementException { - if (!started) { - throw new NoSuchElementException(); - } - - final Event event = events.get(index); - assert event instanceof ElementEvent; - return ((ElementEvent) event).element; - } - - @Override - public Instant getCurrentTimestamp() throws NoSuchElementException { - return curTime; - } - - @Override - public Instant getWatermark() { - return watermark; - } - - @Override - public CheckpointMark getCheckpointMark() { - return TestCheckpointMark.of(offset); - } - - @Override - public void close() throws IOException {} - - @Override - public UnboundedSource getCurrentSource() { - return TestUnboundedSource.this; - } - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystemTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystemTest.java deleted file mode 100644 index b171a5475c2b..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/adapter/UnboundedSourceSystemTest.java +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.adapter; - -import static org.apache.beam.runners.samza.adapter.TestSourceHelpers.createElementMessage; -import static org.apache.beam.runners.samza.adapter.TestSourceHelpers.createEndOfStreamMessage; -import static org.apache.beam.runners.samza.adapter.TestSourceHelpers.createWatermarkMessage; -import static org.apache.beam.runners.samza.adapter.TestSourceHelpers.expectWrappedException; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Base64; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.adapter.TestUnboundedSource.SplittableBuilder; -import org.apache.beam.runners.samza.metrics.SamzaMetricsContainer; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.samza.Partition; -import org.apache.samza.metrics.MetricsRegistryMap; -import org.apache.samza.system.IncomingMessageEnvelope; -import org.apache.samza.system.MessageType; -import org.apache.samza.system.SystemConsumer; -import org.apache.samza.system.SystemStreamPartition; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Ignore; -import org.junit.Test; - -/** Tests for {@link UnboundedSourceSystem}. */ -public class UnboundedSourceSystemTest { - - // A reasonable time to wait to get all messages from the source assuming no blocking. - private static final long DEFAULT_TIMEOUT_MILLIS = 1000; - private static final long DEFAULT_WATERMARK_TIMEOUT_MILLIS = 1000; - private static final String NULL_STRING = null; - - private static final SystemStreamPartition DEFAULT_SSP = - new SystemStreamPartition("default-system", "default-system", new Partition(0)); - - private static final Coder CHECKPOINT_MARK_CODER = - TestUnboundedSource.createBuilder().build().getCheckpointMarkCoder(); - - @Test - public void testConsumerStartStop() throws IOException, InterruptedException { - final TestUnboundedSource source = TestUnboundedSource.createBuilder().build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, offset(0)); - consumer.start(); - assertEquals( - Collections.EMPTY_LIST, - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testConsumeOneMessage() throws IOException, InterruptedException { - final TestUnboundedSource source = - TestUnboundedSource.createBuilder().addElements("test").build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, NULL_STRING); - consumer.start(); - assertEquals( - Arrays.asList( - createElementMessage( - DEFAULT_SSP, offset(0), "test", BoundedWindow.TIMESTAMP_MIN_VALUE)), - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testMaxWatermarkTriggersEndOfStreamMessage() - throws IOException, InterruptedException { - final TestUnboundedSource source = - TestUnboundedSource.createBuilder() - .addElements("test") - .advanceWatermarkTo(BoundedWindow.TIMESTAMP_MAX_VALUE) - .build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, NULL_STRING); - consumer.start(); - List actualList = - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS); - actualList.addAll( - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, offset(0), "test", BoundedWindow.TIMESTAMP_MIN_VALUE), - createWatermarkMessage(DEFAULT_SSP, BoundedWindow.TIMESTAMP_MAX_VALUE), - createEndOfStreamMessage(DEFAULT_SSP)), - actualList); - consumer.stop(); - } - - @Test - public void testAdvanceTimestamp() throws IOException, InterruptedException { - final Instant timestamp = Instant.now(); - - final TestUnboundedSource source = - TestUnboundedSource.createBuilder() - .addElements("before") - .setTimestamp(timestamp) - .addElements("after") - .build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, NULL_STRING); - consumer.start(); - assertEquals( - Arrays.asList( - createElementMessage( - DEFAULT_SSP, offset(0), "before", BoundedWindow.TIMESTAMP_MIN_VALUE), - createElementMessage(DEFAULT_SSP, offset(1), "after", timestamp)), - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testConsumeMultipleMessages() throws IOException, InterruptedException { - final Instant timestamp = Instant.now(); - final TestUnboundedSource source = - TestUnboundedSource.createBuilder() - .setTimestamp(timestamp) - .addElements("test", "a", "few", "messages") - .build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, NULL_STRING); - consumer.start(); - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, offset(0), "test", timestamp), - createElementMessage(DEFAULT_SSP, offset(1), "a", timestamp), - createElementMessage(DEFAULT_SSP, offset(2), "few", timestamp), - createElementMessage(DEFAULT_SSP, offset(3), "messages", timestamp)), - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testAdvanceWatermark() throws IOException, InterruptedException { - final Instant now = Instant.now(); - final Instant nowPlusOne = now.plus(Duration.millis(1L)); - final TestUnboundedSource source = - TestUnboundedSource.createBuilder() - .setTimestamp(now) - .addElements("first") - .setTimestamp(nowPlusOne) - .addElements("second") - .advanceWatermarkTo(now) - .build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, NULL_STRING); - consumer.start(); - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, offset(0), "first", now), - createElementMessage(DEFAULT_SSP, offset(1), "second", nowPlusOne), - createWatermarkMessage(DEFAULT_SSP, now)), - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_WATERMARK_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - @Ignore("https://github.com/apache/beam/issues/20376") - public void testMultipleAdvanceWatermark() throws IOException, InterruptedException { - final Instant now = Instant.now(); - final Instant nowPlusOne = now.plus(Duration.millis(1L)); - final Instant nowPlusTwo = now.plus(Duration.millis(2L)); - final TestUnboundedSource source = - TestUnboundedSource.createBuilder() - .setTimestamp(now) - .addElements("first") - .advanceWatermarkTo(now) - .noElements() // will output the first watermark - .setTimestamp(nowPlusOne) - .addElements("second") - .setTimestamp(nowPlusTwo) - .addElements("third") - .advanceWatermarkTo(nowPlusOne) - .build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, NULL_STRING); - consumer.start(); - // consume to the first watermark - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, offset(0), "first", now), - createWatermarkMessage(DEFAULT_SSP, now)), - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_WATERMARK_TIMEOUT_MILLIS)); - - // consume to the second watermark - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, offset(1), "second", nowPlusOne), - createElementMessage(DEFAULT_SSP, offset(2), "third", nowPlusTwo), - createWatermarkMessage(DEFAULT_SSP, nowPlusOne)), - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_WATERMARK_TIMEOUT_MILLIS)); - - consumer.stop(); - } - - @Test - public void testReaderThrowsAtStart() throws Exception { - final IOException exception = new IOException("Expected exception"); - - final TestUnboundedSource source = - TestUnboundedSource.createBuilder().addException(exception).build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, NULL_STRING); - consumer.start(); - expectWrappedException( - exception, - () -> consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testReaderThrowsAtAdvance() throws Exception { - final IOException exception = new IOException("Expected exception"); - - final TestUnboundedSource source = - TestUnboundedSource.createBuilder() - .addElements("test", "a", "few", "good", "messages", "then", "...") - .addException(exception) - .build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, offset(0)); - consumer.start(); - expectWrappedException( - exception, - () -> consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testTimeout() throws Exception { - final CountDownLatch advanceLatch = new CountDownLatch(1); - final Instant now = Instant.now(); - final Instant nowPlusOne = now.plus(Duration.millis(1)); - - final TestUnboundedSource source = - TestUnboundedSource.createBuilder() - .setTimestamp(now) - .addElements("before") - .addLatch(advanceLatch) - .setTimestamp(nowPlusOne) - .addElements("after") - .advanceWatermarkTo(nowPlusOne) - .build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source); - - consumer.register(DEFAULT_SSP, NULL_STRING); - consumer.start(); - assertEquals( - Collections.singletonList(createElementMessage(DEFAULT_SSP, offset(0), "before", now)), - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - - advanceLatch.countDown(); - - assertEquals( - Arrays.asList( - createElementMessage(DEFAULT_SSP, offset(1), "after", nowPlusOne), - createWatermarkMessage(DEFAULT_SSP, nowPlusOne)), - consumeUntilTimeoutOrWatermark(consumer, DEFAULT_SSP, DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - @Test - public void testRestartFromCheckpoint() throws IOException, InterruptedException { - final SplittableBuilder builder = TestUnboundedSource.createSplits(3); - builder.forSplit(0).addElements("split-0"); - builder.forSplit(1).addElements("split-1"); - builder.forSplit(2).addElements("split-2"); - final TestUnboundedSource source = builder.build(); - - final UnboundedSourceSystem.Consumer consumer = - createConsumer(source, 3); - - consumer.register(ssp(0), offset(10)); - consumer.register(ssp(1), offset(5)); - consumer.register(ssp(2), offset(8)); - consumer.start(); - assertEquals( - Arrays.asList( - createElementMessage(ssp(0), offset(11), "split-0", BoundedWindow.TIMESTAMP_MIN_VALUE)), - consumeUntilTimeoutOrWatermark(consumer, ssp(0), DEFAULT_TIMEOUT_MILLIS)); - assertEquals( - Arrays.asList( - createElementMessage(ssp(1), offset(6), "split-1", BoundedWindow.TIMESTAMP_MIN_VALUE)), - consumeUntilTimeoutOrWatermark(consumer, ssp(1), DEFAULT_TIMEOUT_MILLIS)); - assertEquals( - Arrays.asList( - createElementMessage(ssp(2), offset(9), "split-2", BoundedWindow.TIMESTAMP_MIN_VALUE)), - consumeUntilTimeoutOrWatermark(consumer, ssp(2), DEFAULT_TIMEOUT_MILLIS)); - consumer.stop(); - } - - private static UnboundedSourceSystem.Consumer createConsumer( - TestUnboundedSource source) { - return createConsumer(source, 1); - } - - private static UnboundedSourceSystem.Consumer createConsumer( - TestUnboundedSource source, int splitNum) { - SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.as(SamzaPipelineOptions.class); - pipelineOptions.setWatermarkInterval(0L); // emit immediately - pipelineOptions.setMaxSourceParallelism(splitNum); - return new UnboundedSourceSystem.Consumer<>( - source, pipelineOptions, new SamzaMetricsContainer(new MetricsRegistryMap()), "test-step"); - } - - private static List consumeUntilTimeoutOrWatermark( - SystemConsumer consumer, SystemStreamPartition ssp, long timeoutMillis) - throws InterruptedException { - assertTrue("Expected timeoutMillis (" + timeoutMillis + ") >= 0", timeoutMillis >= 0); - - final List accumulator = new ArrayList<>(); - final long start = System.currentTimeMillis(); - long now = start; - while (timeoutMillis + start >= now) { - accumulator.addAll(pollOnce(consumer, ssp, now - start - timeoutMillis)); - if (!accumulator.isEmpty() - && MessageType.of(accumulator.get(accumulator.size() - 1).getMessage()) - == MessageType.WATERMARK) { - break; - } - now = System.currentTimeMillis(); - } - return accumulator; - } - - private static List pollOnce( - SystemConsumer consumer, SystemStreamPartition ssp, long timeoutMillis) - throws InterruptedException { - final Set sspSet = Collections.singleton(ssp); - final Map> pollResult = - consumer.poll(sspSet, timeoutMillis); - assertEquals(sspSet, pollResult.keySet()); - assertNotNull(pollResult.get(ssp)); - return pollResult.get(ssp); - } - - private static String offset(int offset) throws IOException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - CHECKPOINT_MARK_CODER.encode(TestCheckpointMark.of(offset), baos); - return Base64.getEncoder().encodeToString(baos.toByteArray()); - } - - private static SystemStreamPartition ssp(int partition) { - return new SystemStreamPartition("default-system", "default-system", new Partition(partition)); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/metrics/TestSamzaRunnerWithTransformMetrics.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/metrics/TestSamzaRunnerWithTransformMetrics.java deleted file mode 100644 index 49143a5b10bb..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/metrics/TestSamzaRunnerWithTransformMetrics.java +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assume.assumeTrue; -import static org.mockito.Mockito.any; -import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.when; - -import java.util.Map; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.TestSamzaRunner; -import org.apache.beam.runners.samza.runtime.OpEmitter; -import org.apache.beam.runners.samza.util.InMemoryMetricsReporter; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.Values; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.samza.context.Context; -import org.apache.samza.metrics.Counter; -import org.apache.samza.metrics.Gauge; -import org.apache.samza.metrics.Metric; -import org.apache.samza.metrics.Timer; -import org.apache.samza.system.WatermarkMessage; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Test; - -public class TestSamzaRunnerWithTransformMetrics { - @Test - public void testSamzaRunnerWithDefaultMetrics() { - // TODO(https://github.com/apache/beam/issues/32208) - assumeTrue(System.getProperty("java.version").startsWith("1.")); - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - InMemoryMetricsReporter inMemoryMetricsReporter = new InMemoryMetricsReporter(); - options.setMetricsReporters(ImmutableList.of(inMemoryMetricsReporter)); - options.setRunner(TestSamzaRunner.class); - options.setEnableTransformMetrics(true); - TestSamzaRunner testSamzaRunner = TestSamzaRunner.fromOptions(options); - Pipeline pipeline = Pipeline.create(options); - // Create a pipeline - PCollection> output = - pipeline - .apply( - "Mock data", - Create.of( - KV.of("bad-key", KV.of("a", 97)), - KV.of("hello", KV.of("a", 97)), - KV.of("hello", KV.of("b", 42)), - KV.of("hello", KV.of("c", 12)))) - .apply("Filter valid keys", Filter.by(x -> x.getKey().equals("hello"))) - .apply(Values.create()) - .apply("Fixed-window", Window.into(FixedWindows.of(Duration.standardSeconds(10)))) - .apply(Count.perKey()); - - // check pipeline is working fine - PAssert.that(output).containsInAnyOrder(KV.of("a", 1L), KV.of("b", 1L), KV.of("c", 1L)); - testSamzaRunner.run(pipeline); - - Map pTransformContainerMetrics = - inMemoryMetricsReporter - .getMetricsRegistry("samza-container-1") - .getGroup("SamzaBeamTransformMetrics"); - Map pTransformTaskMetrics = - inMemoryMetricsReporter - .getMetricsRegistry("TaskName-Partition 0") - .getGroup("SamzaBeamTransformMetrics"); - - // SamzaTransformMetrics group must be initialized - assertFalse(pTransformTaskMetrics.isEmpty()); - assertFalse(pTransformContainerMetrics.isEmpty()); - - // Throughput Metrics are Per container by default - assertEquals( - 4, - ((Counter) - pTransformContainerMetrics.get("Mock_data_Read_CreateSource_-num-output-messages")) - .getCount()); - assertEquals( - 4, - ((Counter) - pTransformContainerMetrics.get( - "Filter_valid_keys_ParDo_Anonymous__ParMultiDo_Anonymous_-num-input-messages")) - .getCount()); - // One message is dropped from filter - assertEquals( - 3, - ((Counter) - pTransformContainerMetrics.get( - "Filter_valid_keys_ParDo_Anonymous__ParMultiDo_Anonymous_-num-output-messages")) - .getCount()); - assertEquals( - 3, - ((Counter) - pTransformContainerMetrics.get( - "Values_Values_Map_ParMultiDo_Anonymous_-num-input-messages")) - .getCount()); - assertEquals( - 3, - ((Counter) - pTransformContainerMetrics.get( - "Values_Values_Map_ParMultiDo_Anonymous_-num-output-messages")) - .getCount()); - assertEquals( - 3, - ((Counter) pTransformContainerMetrics.get("Fixed_window_Window_Assign-num-input-messages")) - .getCount()); - assertEquals( - 3, - ((Counter) pTransformContainerMetrics.get("Fixed_window_Window_Assign-num-output-messages")) - .getCount()); - assertEquals( - 3, - ((Counter) pTransformContainerMetrics.get("Combine_perKey_Count_-num-input-messages")) - .getCount()); - assertEquals( - 3, - ((Counter) pTransformContainerMetrics.get("Combine_perKey_Count_-num-output-messages")) - .getCount()); - - // Throughput Metrics are per container by default - assertNotNull( - pTransformContainerMetrics.get( - "Filter_valid_keys_ParDo_Anonymous__ParMultiDo_Anonymous_-handle-message-ns")); - assertNotNull( - pTransformContainerMetrics.get( - "Values_Values_Map_ParMultiDo_Anonymous_-handle-message-ns")); - assertNotNull(pTransformContainerMetrics.get("Combine_perKey_Count_-handle-message-ns")); - - // Watermark Metrics are Per task by default - assertTrue( - ((Gauge) - pTransformTaskMetrics.get("Mock_data_Read_CreateSource_-output-watermark-ms")) - .getValue() - >= 0); - assertNotNull( - pTransformTaskMetrics.get( - "Filter_valid_keys_ParDo_Anonymous__ParMultiDo_Anonymous_-output-watermark-ms")); - assertNotNull( - pTransformTaskMetrics.get("Values_Values_Map_ParMultiDo_Anonymous_-output-watermark-ms")); - assertNotNull(pTransformTaskMetrics.get("Combine_perKey_Count_-output-watermark-ms")); - } - - @Test - public void testSamzaInputAndOutputMetricOp() { - final WindowedValue windowedValue = - WindowedValues.timestampedValueInGlobalWindow("value-1", new Instant()); - final WindowedValue windowedValue2 = - WindowedValues.timestampedValueInGlobalWindow("value-2", new Instant()); - final WatermarkMessage watermarkMessage = - new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()); - - OpEmitter opEmitter = mock(OpEmitter.class); - doNothing().when(opEmitter).emitElement(any()); - doNothing().when(opEmitter).emitWatermark(any()); - - Counter inputCounter = new Counter("filter-input-counter"); - Counter outputCounter = new Counter("filter-output-counter"); - Gauge watermarkProgress = new Gauge<>("filter-output-watermark", 0L); - Gauge cacheSize = new Gauge<>("filter-arrival-time-cache-size", 0L); - Timer latency = new Timer("filter-latency"); - - SamzaTransformMetrics samzaTransformMetrics = mock(SamzaTransformMetrics.class); - doNothing().when(samzaTransformMetrics).register(any(), any()); - when(samzaTransformMetrics.getTransformInputThroughput("filter")).thenReturn(inputCounter); - when(samzaTransformMetrics.getTransformOutputThroughput("filter")).thenReturn(outputCounter); - when(samzaTransformMetrics.getTransformWatermarkProgress("filter")) - .thenReturn(watermarkProgress); - when(samzaTransformMetrics.getTransformLatencyMetric("filter")).thenReturn(latency); - when(samzaTransformMetrics.getTransformCacheSize("filter")).thenReturn(cacheSize); - - SamzaTransformMetricRegistry samzaTransformMetricRegistry = - spy(new SamzaTransformMetricRegistry(samzaTransformMetrics)); - samzaTransformMetricRegistry.register("filter", "dummy-pvalue.in", mock(Context.class)); - samzaTransformMetricRegistry.register("filter", "dummy-pvalue.out", mock(Context.class)); - - SamzaMetricOp inputMetricOp = - new SamzaMetricOp<>( - "dummy-pvalue.in", - "filter", - SamzaMetricOpFactory.OpType.INPUT, - samzaTransformMetricRegistry); - - inputMetricOp.processElement(windowedValue, opEmitter); - inputMetricOp.processElement(windowedValue2, opEmitter); - inputMetricOp.processWatermark(new Instant(watermarkMessage.getTimestamp()), opEmitter); - - // Input throughput must be updated - assertEquals(2, inputCounter.getCount()); - // Avg arrival time for the PValue must be updated - assertTrue( - samzaTransformMetricRegistry - .getAverageArrivalTimeMap("filter") - .get("dummy-pvalue.in") - .containsKey(watermarkMessage.getTimestamp())); - - SamzaMetricOp outputMetricOp = - new SamzaMetricOp<>( - "dummy-pvalue.out", - "filter", - SamzaMetricOpFactory.OpType.OUTPUT, - samzaTransformMetricRegistry); - outputMetricOp.init(ImmutableList.of("dummy-pvalue.in"), ImmutableList.of("dummy-pvalue.out")); - - outputMetricOp.processElement(windowedValue, opEmitter); - outputMetricOp.processElement(windowedValue2, opEmitter); - outputMetricOp.processWatermark(new Instant(watermarkMessage.getTimestamp()), opEmitter); - - // Output throughput must be updated - assertEquals(2, outputCounter.getCount()); - // Output watermark must be updated - assertEquals(watermarkMessage.getTimestamp(), watermarkProgress.getValue().longValue()); - // Latency must be positive - assertTrue(latency.getSnapshot().getAverage() > 0); - // Cache size must be 0 - assertEquals(0L, cacheSize.getValue().longValue()); - } - - @Test - public void testSamzaInputAndOutputGBKMetricOp() { - final WindowedValue windowedValue = - WindowedValues.timestampedValueInGlobalWindow("value-1", new Instant()); - final WindowedValue windowedValue2 = - WindowedValues.timestampedValueInGlobalWindow("value-2", new Instant()); - final WatermarkMessage watermarkMessage = - new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()); - - OpEmitter opEmitter = mock(OpEmitter.class); - doNothing().when(opEmitter).emitElement(any()); - doNothing().when(opEmitter).emitWatermark(any()); - - Counter inputCounter = new Counter("Count-perKey-input-counter"); - Counter outputCounter = new Counter("Count-perKey-output-counter"); - Gauge watermarkProgress = new Gauge<>("Count-perKey-output-watermark", 0L); - Timer latency = new Timer("Count-perKey-latency"); - Gauge cacheSize = new Gauge<>("Count-perKey-arrival-time-cache-size", 0L); - - SamzaTransformMetrics samzaTransformMetrics = mock(SamzaTransformMetrics.class); - doNothing().when(samzaTransformMetrics).register(any(), any()); - when(samzaTransformMetrics.getTransformInputThroughput("Count-perKey")) - .thenReturn(inputCounter); - when(samzaTransformMetrics.getTransformOutputThroughput("Count-perKey")) - .thenReturn(outputCounter); - when(samzaTransformMetrics.getTransformWatermarkProgress("Count-perKey")) - .thenReturn(watermarkProgress); - when(samzaTransformMetrics.getTransformLatencyMetric("Count-perKey")).thenReturn(latency); - when(samzaTransformMetrics.getTransformCacheSize("Count-perKey")).thenReturn(cacheSize); - - SamzaTransformMetricRegistry samzaTransformMetricRegistry = - spy(new SamzaTransformMetricRegistry(samzaTransformMetrics)); - samzaTransformMetricRegistry.register("Count-perKey", "window-assign.in", mock(Context.class)); - samzaTransformMetricRegistry.register("Count-perKey", "window-assign.out", mock(Context.class)); - - SamzaGBKMetricOp inputMetricOp = - new SamzaGBKMetricOp<>( - "window-assign.in", - "Count-perKey", - SamzaMetricOpFactory.OpType.INPUT, - samzaTransformMetricRegistry); - - inputMetricOp.processElement(windowedValue, opEmitter); - inputMetricOp.processElement(windowedValue2, opEmitter); - inputMetricOp.processWatermark(new Instant(watermarkMessage.getTimestamp()), opEmitter); - - // Input throughput must be updated - assertEquals(2, inputCounter.getCount()); - // Avg arrival time for must be present for one Global Window - assertEquals( - 1, samzaTransformMetricRegistry.getAverageArrivalTimeMapForGBK("Count-perKey").size()); - - SamzaGBKMetricOp outputMetricOp = - new SamzaGBKMetricOp<>( - "window-assign.out", - "Count-perKey", - SamzaMetricOpFactory.OpType.OUTPUT, - samzaTransformMetricRegistry); - - outputMetricOp.processElement(windowedValue, opEmitter); - outputMetricOp.processElement(windowedValue2, opEmitter); - outputMetricOp.processWatermark(new Instant(watermarkMessage.getTimestamp()), opEmitter); - - // Output throughput must be updated - assertEquals(2, outputCounter.getCount()); - // Output watermark must be updated - assertEquals(watermarkMessage.getTimestamp(), watermarkProgress.getValue().longValue()); - // Latency must be positive - assertTrue(latency.getSnapshot().getAverage() > 0); - // Cache size must be 0 - assertEquals(0L, cacheSize.getValue().longValue()); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/metrics/TestSamzaTransformMetricsRegistry.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/metrics/TestSamzaTransformMetricsRegistry.java deleted file mode 100644 index 75f6ddd5fefa..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/metrics/TestSamzaTransformMetricsRegistry.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.metrics; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.any; -import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.when; - -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.samza.context.Context; -import org.apache.samza.metrics.Gauge; -import org.apache.samza.metrics.Timer; -import org.apache.samza.system.WatermarkMessage; -import org.joda.time.Instant; -import org.junit.Test; - -public class TestSamzaTransformMetricsRegistry { - - @Test - public void testSamzaTransformMetricsRegistryForNonShuffleOperators() { - final WatermarkMessage watermarkMessage = - new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()); - final long avgInputArrivalTime = System.currentTimeMillis(); - - Timer latency = new Timer("filter-latency"); - Gauge cacheSize = new Gauge("filter-cache-size", 0L); - SamzaTransformMetrics samzaTransformMetrics = mock(SamzaTransformMetrics.class); - doNothing().when(samzaTransformMetrics).register(any(), any()); - when(samzaTransformMetrics.getTransformLatencyMetric("filter")).thenReturn(latency); - when(samzaTransformMetrics.getTransformCacheSize("filter")).thenReturn(cacheSize); - - SamzaTransformMetricRegistry samzaTransformMetricRegistry = - spy(new SamzaTransformMetricRegistry(samzaTransformMetrics)); - - samzaTransformMetricRegistry.register("filter", "dummy-pvalue.in", mock(Context.class)); - samzaTransformMetricRegistry.register("filter", "dummy-pvalue.out", mock(Context.class)); - - // Update the avg arrival time - samzaTransformMetricRegistry.updateArrivalTimeMap( - "filter", "dummy-pvalue.in", watermarkMessage.getTimestamp(), avgInputArrivalTime); - samzaTransformMetricRegistry.updateArrivalTimeMap( - "filter", "dummy-pvalue.out", watermarkMessage.getTimestamp(), avgInputArrivalTime + 100); - - // Check the avg arrival time is updated - assertEquals( - avgInputArrivalTime, - samzaTransformMetricRegistry - .getAverageArrivalTimeMap("filter") - .get("dummy-pvalue.in") - .get(watermarkMessage.getTimestamp()) - .longValue()); - assertEquals( - avgInputArrivalTime + 100, - samzaTransformMetricRegistry - .getAverageArrivalTimeMap("filter") - .get("dummy-pvalue.out") - .get(watermarkMessage.getTimestamp()) - .longValue()); - - // Emit the latency metric - samzaTransformMetricRegistry.emitLatencyMetric( - "filter", - ImmutableList.of("dummy-pvalue.in"), - ImmutableList.of("dummy-pvalue.out"), - watermarkMessage.getTimestamp(), - "task 0"); - - // Check the latency metric is updated - assertTrue(100 == latency.getSnapshot().getAverage()); - - // Check the avg arrival time is cleared - assertFalse( - samzaTransformMetricRegistry - .getAverageArrivalTimeMap("filter") - .get("dummy-pvalue.in") - .containsKey(watermarkMessage.getTimestamp())); - assertFalse( - samzaTransformMetricRegistry - .getAverageArrivalTimeMap("filter") - .get("dummy-pvalue.out") - .containsKey(watermarkMessage.getTimestamp())); - // Cache size must be 0 - assertEquals(0L, cacheSize.getValue().longValue()); - } - - @Test - public void testSamzaTransformMetricsRegistryForDataShuffleOperators() { - Timer latency = new Timer("Count-perKey-latency"); - Gauge cacheSize = new Gauge("Count-perKey-cache-size", 0L); - - SamzaTransformMetrics samzaTransformMetrics = mock(SamzaTransformMetrics.class); - doNothing().when(samzaTransformMetrics).register(any(), any()); - when(samzaTransformMetrics.getTransformLatencyMetric("Count.perKey")).thenReturn(latency); - when(samzaTransformMetrics.getTransformCacheSize("Count.perKey")).thenReturn(cacheSize); - - SamzaTransformMetricRegistry samzaTransformMetricRegistry = - spy(new SamzaTransformMetricRegistry(samzaTransformMetrics)); - - samzaTransformMetricRegistry.register("Count.perKey", "window-assign.in", mock(Context.class)); - samzaTransformMetricRegistry.register("Count.perKey", "window-assign.out", mock(Context.class)); - - final BoundedWindow first = - new BoundedWindow() { - @Override - public Instant maxTimestamp() { - return new Instant(2048L); - } - }; - final BoundedWindow second = - new BoundedWindow() { - @Override - public Instant maxTimestamp() { - return new Instant(689743L); - } - }; - - // Update the avg arrival time - samzaTransformMetricRegistry.updateArrivalTimeMap("Count.perKey", first, 1048L); - samzaTransformMetricRegistry.updateArrivalTimeMap("Count.perKey", second, 4897L); - - // Check the avg arrival time is updated - assertEquals( - 1048L, - samzaTransformMetricRegistry - .getAverageArrivalTimeMapForGBK("Count.perKey") - .get(first) - .longValue()); - assertEquals( - 4897L, - samzaTransformMetricRegistry - .getAverageArrivalTimeMapForGBK("Count.perKey") - .get(second) - .longValue()); - - // Emit the latency metric - samzaTransformMetricRegistry.emitLatencyMetric("Count.perKey", first, 2048L, "task 0"); - samzaTransformMetricRegistry.emitLatencyMetric("Count.perKey", second, 5897L, "task 0"); - - // Check the latency metric is updated - assertTrue(1000 == latency.getSnapshot().getAverage()); - - // Check the avg arrival time is cleared - assertFalse( - samzaTransformMetricRegistry - .getAverageArrivalTimeMapForGBK("Count.perKey") - .containsKey(first)); - assertFalse( - samzaTransformMetricRegistry - .getAverageArrivalTimeMapForGBK("Count.perKey") - .containsKey(second)); - - // Failure testing - samzaTransformMetricRegistry.updateArrivalTimeMap("random-transform", first, 1048L); - samzaTransformMetricRegistry.updateArrivalTimeMap("random-transform", first, 1048L); - // No data updated - assertFalse( - samzaTransformMetricRegistry - .getAverageArrivalTimeMapForGBK("Count.perKey") - .containsKey(first)); - assertNull(samzaTransformMetricRegistry.getAverageArrivalTimeMapForGBK("random-transform")); - // Emit the bad latency metric - samzaTransformMetricRegistry.emitLatencyMetric("random-transform", first, 2048L, "task 0"); - samzaTransformMetricRegistry.emitLatencyMetric("random-transform", first, 0, "task 0"); - // Check the latency metric is same - assertTrue(1000 == latency.getSnapshot().getAverage()); - // Cache size must be 0 - assertEquals(0L, cacheSize.getValue().longValue()); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/AsyncDoFnRunnerTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/AsyncDoFnRunnerTest.java deleted file mode 100644 index 4040040d3e04..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/AsyncDoFnRunnerTest.java +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.mockito.Mockito.any; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.mock; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.state.CombiningState; -import org.apache.beam.sdk.state.StateSpec; -import org.apache.beam.sdk.state.StateSpecs; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.sdk.values.WindowedValues; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; - -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - // TODO(https://github.com/apache/beam/issues/21230): Remove when new version of - // errorprone is released (2.11.0) - "unused" -}) -/** - * Tests for {@link AsyncDoFnRunner}. - * - *

Note due to the bug in SAMZA-2761, end-of-stream can cause shutdown while there are still - * messages in process in asynchronous mode. As a temporary solution, we add more bundles to process - * in the test inputs. - */ -public class AsyncDoFnRunnerTest implements Serializable { - - @Rule - public final transient TestPipeline pipeline = - TestPipeline.fromOptions( - PipelineOptionsFactory.fromArgs( - "--runner=TestSamzaRunner", - "--maxBundleSize=5", - "--numThreadsForProcessElement=5") - .create()); - - @Test - @Ignore("https://github.com/apache/beam/issues/23745") - public void testSimplePipeline() { - List input = new ArrayList<>(); - for (int i = 1; i < 20; i++) { - input.add(i); - } - PCollection square = - pipeline - .apply(Create.of(input)) - .apply(Filter.by(x -> x <= 5)) - .apply(MapElements.into(TypeDescriptors.integers()).via(x -> x * x)); - - PAssert.that(square).containsInAnyOrder(Arrays.asList(1, 4, 9, 16, 25)); - - pipeline.run(); - } - - @Test - @Ignore("https://github.com/apache/beam/issues/23745") - public void testPipelineWithState() { - final List> input = - new ArrayList<>( - Arrays.asList( - KV.of("apple", "red"), - KV.of("banana", "yellow"), - KV.of("apple", "yellow"), - KV.of("grape", "purple"), - KV.of("banana", "yellow"))); - final Map expectedCount = ImmutableMap.of("apple", 2, "banana", 2, "grape", 1); - - // TODO: remove after SAMZA-2761 fix - for (int i = 0; i < 20; i++) { - input.add(KV.of("*", "*")); - } - - final DoFn, KV> fn = - new DoFn, KV>() { - - @StateId("cc") - private final StateSpec> countState = - StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers()); - - @ProcessElement - public void processElement( - ProcessContext c, @StateId("cc") CombiningState countState) { - - if (c.element().getKey().equals("*")) { - return; - } - - countState.add(1); - String key = c.element().getKey(); - int n = countState.read(); - if (n >= expectedCount.get(key)) { - c.output(KV.of(key, n)); - } - } - }; - - PCollection> counts = pipeline.apply(Create.of(input)).apply(ParDo.of(fn)); - - PAssert.that(counts) - .containsInAnyOrder( - expectedCount.entrySet().stream() - .map(entry -> KV.of(entry.getKey(), entry.getValue())) - .collect(Collectors.toList())); - - pipeline.run(); - } - - @Test - @Ignore("https://github.com/apache/beam/issues/23745") - public void testPipelineWithAggregation() { - final List> input = - new ArrayList<>( - Arrays.asList( - KV.of("apple", 2L), - KV.of("banana", 5L), - KV.of("apple", 8L), - KV.of("grape", 10L), - KV.of("banana", 5L))); - - // TODO: remove after SAMZA-2761 fix - for (int i = 0; i < 50; i++) { - input.add(KV.of("*", 0L)); - } - - PCollection> sums = - pipeline - .apply(Create.of(input)) - .apply(Filter.by(x -> !x.getKey().equals("*"))) - .apply(Sum.longsPerKey()); - - PAssert.that(sums) - .containsInAnyOrder( - Arrays.asList(KV.of("apple", 10L), KV.of("banana", 10L), KV.of("grape", 10L))); - - pipeline.run(); - } - - @Test - public void testKeyedOutputFutures() { - // We test the scenario that two elements of the same key needs to be processed in order. - final DoFnRunner, Void> doFnRunner = mock(DoFnRunner.class); - final AtomicInteger prev = new AtomicInteger(0); - final CountDownLatch latch = new CountDownLatch(1); - doAnswer( - invocation -> { - latch.await(); - WindowedValue> wv = invocation.getArgument(0); - Integer val = wv.getValue().getValue(); - - // Verify the previous element has been fully processed by checking the prev value - assertEquals(val - 1, prev.get()); - - prev.set(val); - return null; - }) - .when(doFnRunner) - .processElement(any()); - - SamzaPipelineOptions options = PipelineOptionsFactory.as(SamzaPipelineOptions.class); - options.setNumThreadsForProcessElement(4); - - final OpEmitter opEmitter = new OpAdapter.OpEmitterImpl<>(); - final FutureCollector futureCollector = new FutureCollectorImpl<>(); - futureCollector.prepare(); - - final AsyncDoFnRunner, Void> asyncDoFnRunner = - AsyncDoFnRunner.create(doFnRunner, opEmitter, futureCollector, true, options); - - final String appleKey = "apple"; - - final WindowedValue> input1 = - WindowedValues.valueInGlobalWindow(KV.of(appleKey, 1)); - - final WindowedValue> input2 = - WindowedValues.valueInGlobalWindow(KV.of(appleKey, 2)); - - asyncDoFnRunner.processElement(input1); - asyncDoFnRunner.processElement(input2); - // Resume input1 process afterwards - latch.countDown(); - - // Waiting for the futures to be resolved - try { - futureCollector.finish().toCompletableFuture().get(); - } catch (Exception e) { - // ignore interruption here. - } - - // The final val should be the last element value - assertEquals(2, prev.get()); - // The appleKey in keyedOutputFutures map should be removed - assertFalse(asyncDoFnRunner.hasOutputFuturesForKey(appleKey)); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/ClassicBundleManagerTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/ClassicBundleManagerTest.java deleted file mode 100644 index 09b51349aa49..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/ClassicBundleManagerTest.java +++ /dev/null @@ -1,457 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.util.Collection; -import java.util.Collections; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; -import java.util.concurrent.CountDownLatch; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Instant; -import org.junit.Before; -import org.junit.Test; -import org.mockito.ArgumentCaptor; - -/** Unit tests for {@linkplain ClassicBundleManager}. */ -public final class ClassicBundleManagerTest { - private static final long MAX_BUNDLE_SIZE = 3; - private static final long MAX_BUNDLE_TIME_MS = 2000; - private static final String BUNDLE_CHECK_TIMER_ID = "bundle-check-test-timer"; - - private FutureCollector mockFutureCollector; - private ClassicBundleManager bundleManager; - private BundleManager.BundleProgressListener bundleProgressListener; - private Scheduler> mockScheduler; - - @Before - public void setUp() { - mockFutureCollector = mock(FutureCollector.class); - bundleProgressListener = mock(ClassicBundleManager.BundleProgressListener.class); - mockScheduler = mock(Scheduler.class); - bundleManager = - new ClassicBundleManager<>( - bundleProgressListener, - mockFutureCollector, - MAX_BUNDLE_SIZE, - MAX_BUNDLE_TIME_MS, - mockScheduler, - BUNDLE_CHECK_TIMER_ID); - } - - @Test - public void testWhenFirstTryStartBundleThenStartsBundle() { - bundleManager.tryStartBundle(); - - verify(bundleProgressListener, times(1)).onBundleStarted(); - assertEquals( - "Expected the number of element in the current bundle to be 1", - 1L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected the pending bundle count to be 1", 1L, bundleManager.getPendingBundleCount()); - assertTrue("tryStartBundle() did not start the bundle", bundleManager.isBundleStarted()); - } - - @Test(expected = IllegalArgumentException.class) - public void testWhenCurrentBundleDoneFutureIsNotNullThenStartBundleFails() { - bundleManager.setCurrentBundleDoneFuture(CompletableFuture.completedFuture(null)); - bundleManager.tryStartBundle(); - } - - @Test - public void testWhenSignalFailureThenResetCurrentBundle() { - doThrow(new RuntimeException("User start bundle threw an exception")) - .when(bundleProgressListener) - .onBundleStarted(); - - try { - bundleManager.tryStartBundle(); - } catch (RuntimeException e) { - bundleManager.signalFailure(e); - } - - // verify if the signal failure only resets appropriate attributes of bundle - verify(mockFutureCollector, times(1)).prepare(); - verify(mockFutureCollector, times(1)).discard(); - assertEquals( - "Expected the number of element in the current bundle to 0", - 0L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected pending bundle count to be 0", 0L, bundleManager.getPendingBundleCount()); - assertFalse("Error didn't reset the bundle as expected.", bundleManager.isBundleStarted()); - } - - @Test - public void testWhenMultipleTryStartThenOnlyStartBundleOnce() { - bundleManager.tryStartBundle(); - bundleManager.tryStartBundle(); - - // second invocation should not start the bundle - verify(bundleProgressListener, times(1)).onBundleStarted(); - assertEquals( - "Expected the number of element in the current bundle to be 2", - 2L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected the pending bundle count to be 1", 1L, bundleManager.getPendingBundleCount()); - assertTrue("tryStartBundle() did not start the bundle", bundleManager.isBundleStarted()); - } - - /* - * Setup the bundle manager with default max bundle size as 3 and max bundle close timeout to 2 seconds. - * The test verifies the following - * 1. Bundle gets closed on tryFinishBundle() - * a. pending bundle count == 0 - * b. element in current bundle == 0 - * c. isBundleStarted == false - * 2. onBundleFinished callback is invoked on the progress listener - */ - @Test - public void testWhenTryFinishBundleThenBundleIsReset() { - OpEmitter mockEmitter = mock(OpEmitter.class); - when(mockFutureCollector.finish()) - .thenReturn( - CompletableFuture.completedFuture(Collections.singleton(mock(WindowedValue.class)))); - - bundleManager.tryStartBundle(); - bundleManager.tryStartBundle(); - bundleManager.tryStartBundle(); - bundleManager.tryFinishBundle(mockEmitter); - - verify(mockEmitter, times(1)).emitFuture(any()); - verify(bundleProgressListener, times(1)).onBundleFinished(mockEmitter); - assertEquals( - "Expected the number of element in the current bundle to be 0", - 0L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected the pending bundle count to be 0", 0L, bundleManager.getPendingBundleCount()); - assertFalse("tryFinishBundle() did not close the bundle", bundleManager.isBundleStarted()); - } - - @Test - public void testTryFinishBundleClosesBundleOnMaxWatermark() { - OpEmitter mockEmitter = mock(OpEmitter.class); - when(mockFutureCollector.finish()) - .thenReturn( - CompletableFuture.completedFuture(Collections.singleton(mock(WindowedValue.class)))); - bundleManager.setBundleWatermarkHold(BoundedWindow.TIMESTAMP_MAX_VALUE); - - bundleManager.tryStartBundle(); - bundleManager.tryStartBundle(); - bundleManager.tryFinishBundle(mockEmitter); - - verify(mockEmitter, times(1)).emitFuture(any()); - verify(bundleProgressListener, times(1)).onBundleFinished(mockEmitter); - assertEquals( - "Expected the number of element in the current bundle to be 0", - 0L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected the pending bundle count to be 0", 0L, bundleManager.getPendingBundleCount()); - assertFalse("tryFinishBundle() did not close the bundle", bundleManager.isBundleStarted()); - } - - /* - * Set up the bundle manager with defaults and ensure the bundle manager doesn't close the current active bundle. - */ - @Test - public void testTryFinishBundleShouldNotCloseBundle() { - OpEmitter mockEmitter = mock(OpEmitter.class); - when(mockFutureCollector.finish()) - .thenReturn( - CompletableFuture.completedFuture(Collections.singleton(mock(WindowedValue.class)))); - - bundleManager.tryStartBundle(); - bundleManager.tryFinishBundle(mockEmitter); - - verify(mockFutureCollector, times(1)).finish(); - verify(mockEmitter, times(1)).emitFuture(any()); - verify(bundleProgressListener, times(0)).onBundleFinished(mockEmitter); - assertEquals( - "Expected the number of element in the current bundle to be 1", - 1L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected the pending bundle count to be 1", 1L, bundleManager.getPendingBundleCount()); - assertTrue("tryFinishBundle() did not close the bundle", bundleManager.isBundleStarted()); - } - - @Test - public void testTryFinishBundleWhenNoBundleInProgress() { - OpEmitter mockEmitter = mock(OpEmitter.class); - when(mockFutureCollector.finish()) - .thenReturn(CompletableFuture.completedFuture(Collections.emptyList())); - - bundleManager.tryFinishBundle(mockEmitter); - - verify(mockEmitter, times(1)).emitFuture(any()); - assertNull( - "tryFinishBundle() should not set the future when no bundle in progress", - bundleManager.getCurrentBundleDoneFuture()); - } - - @Test - public void testProcessWatermarkWhenNoBundleInProgress() { - Instant now = Instant.now(); - OpEmitter mockEmitter = mock(OpEmitter.class); - bundleManager.processWatermark(now, mockEmitter); - verify(bundleProgressListener, times(1)).onWatermark(now, mockEmitter); - } - - /* - * The test validates processing watermark during an active bundle in progress and also validates - * if the watermark hold is propagated down stream after the output futures are resolved. - */ - @Test - public void testProcessWatermarkWithPendingBundles() { - CountDownLatch latch = new CountDownLatch(1); - Instant watermark = Instant.now(); - OpEmitter mockEmitter = mock(OpEmitter.class); - - // We need to capture the finish bundle future to know if we can check for output watermark - // and verify other callbacks get invoked. - Class>>> outputFutureClass = - (Class>>>) (Class) CompletionStage.class; - ArgumentCaptor>>> captor = - ArgumentCaptor.forClass(outputFutureClass); - - when(mockFutureCollector.finish()) - .thenReturn( - CompletableFuture.supplyAsync( - () -> { - try { - latch.await(); - } catch (InterruptedException e) { - throw new AssertionError("Test interrupted when waiting for latch"); - } - - return Collections.singleton(mock(WindowedValue.class)); - })); - - testWatermarkHoldWhenPendingBundleInProgress(mockEmitter, captor, watermark); - testWatermarkHoldPropagatesAfterFutureResolution(mockEmitter, captor, latch, watermark); - } - - @Test - public void testMaxWatermarkPropagationForPendingBundle() { - Instant watermark = BoundedWindow.TIMESTAMP_MAX_VALUE; - OpEmitter mockEmitter = mock(OpEmitter.class); - bundleManager.setPendingBundleCount(1); - bundleManager.processWatermark(watermark, mockEmitter); - verify(bundleProgressListener, times(1)).onWatermark(watermark, mockEmitter); - } - - @Test - public void testMaxWatermarkWithBundleInProgress() { - Instant watermark = BoundedWindow.TIMESTAMP_MAX_VALUE; - OpEmitter mockEmitter = mock(OpEmitter.class); - - when(mockFutureCollector.finish()) - .thenReturn( - CompletableFuture.completedFuture(Collections.singleton(mock(WindowedValue.class)))); - - bundleManager.tryStartBundle(); - bundleManager.tryStartBundle(); - - // should force close bundle - bundleManager.processWatermark(watermark, mockEmitter); - verify(bundleProgressListener, times(1)).onWatermark(watermark, mockEmitter); - } - - @Test - public void testProcessTimerWithBundleTimeElapsed() { - ClassicBundleManager bundleManager = - new ClassicBundleManager<>( - bundleProgressListener, - mockFutureCollector, - MAX_BUNDLE_SIZE, - 0, - mockScheduler, - BUNDLE_CHECK_TIMER_ID); - OpEmitter mockEmitter = mock(OpEmitter.class); - KeyedTimerData mockTimer = mock(KeyedTimerData.class); - TimerInternals.TimerData mockTimerData = mock(TimerInternals.TimerData.class); - - when(mockFutureCollector.finish()) - .thenReturn( - CompletableFuture.completedFuture(Collections.singleton(mock(WindowedValue.class)))); - when(mockTimerData.getTimerId()).thenReturn(BUNDLE_CHECK_TIMER_ID); - when(mockTimer.getTimerData()).thenReturn(mockTimerData); - - bundleManager.tryStartBundle(); - bundleManager.processTimer(mockTimer, mockEmitter); - - verify(mockEmitter, times(1)).emitFuture(any()); - verify(bundleProgressListener, times(1)).onBundleFinished(mockEmitter); - assertEquals( - "Expected the number of element in the current bundle to be 0", - 0L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected the pending bundle count to be 0", 0L, bundleManager.getPendingBundleCount()); - assertFalse("tryFinishBundle() did not close the bundle", bundleManager.isBundleStarted()); - } - - @Test - public void testProcessTimerWithTimeLessThanMaxBundleTime() { - OpEmitter mockEmitter = mock(OpEmitter.class); - KeyedTimerData mockTimer = mock(KeyedTimerData.class); - TimerInternals.TimerData mockTimerData = mock(TimerInternals.TimerData.class); - - when(mockTimerData.getTimerId()).thenReturn(BUNDLE_CHECK_TIMER_ID); - when(mockTimer.getTimerData()).thenReturn(mockTimerData); - - when(mockFutureCollector.finish()) - .thenReturn(CompletableFuture.completedFuture(Collections.emptyList())); - - bundleManager.tryStartBundle(); - bundleManager.processTimer(mockTimer, mockEmitter); - - verify(mockFutureCollector, times(1)).finish(); - verify(mockEmitter, times(1)).emitFuture(any()); - verify(bundleProgressListener, times(0)).onBundleFinished(mockEmitter); - assertEquals( - "Expected the number of element in the current bundle to be 1", - 1L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected the pending bundle count to be 1", 1L, bundleManager.getPendingBundleCount()); - assertTrue("tryFinishBundle() closed the bundle", bundleManager.isBundleStarted()); - } - - @Test - public void testProcessTimerIgnoresNonBundleTimers() { - OpEmitter mockEmitter = mock(OpEmitter.class); - KeyedTimerData mockTimer = mock(KeyedTimerData.class); - TimerInternals.TimerData mockTimerData = mock(TimerInternals.TimerData.class); - - when(mockTimerData.getTimerId()).thenReturn("NotBundleTimer"); - when(mockTimer.getTimerData()).thenReturn(mockTimerData); - - bundleManager.tryStartBundle(); - bundleManager.processTimer(mockTimer, mockEmitter); - - verify(mockFutureCollector, times(0)).finish(); - verify(mockEmitter, times(0)).emitFuture(any()); - verify(bundleProgressListener, times(0)).onBundleFinished(mockEmitter); - assertEquals( - "Expected the number of element in the current bundle to be 1", - 1L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected the pending bundle count to be 1", 1L, bundleManager.getPendingBundleCount()); - assertTrue("tryFinishBundle() closed the bundle", bundleManager.isBundleStarted()); - } - - @Test - public void testSignalFailureResetsTheBundleAndCollector() { - bundleManager.tryStartBundle(); - - bundleManager.signalFailure(mock(Throwable.class)); - verify(mockFutureCollector, times(1)).prepare(); - verify(mockFutureCollector, times(1)).discard(); - assertEquals( - "Expected the number of element in the current bundle to 0", - 0L, - bundleManager.getCurrentBundleElementCount()); - assertEquals( - "Expected pending bundle count to be 0", 0L, bundleManager.getPendingBundleCount()); - assertFalse("Error didn't reset the bundle as expected.", bundleManager.isBundleStarted()); - } - - /* - * We validate the following - * 1. Process watermark is held since there is a pending bundle. - * 2. Watermark propagates down stream once the output future is resolved. - * 3. The watermark propagated is the one that was held before closing the bundle - * 4. onBundleFinished and onWatermark callbacks are triggered - * 5. Pending bundle count is decremented once the future is resolved - */ - private void testWatermarkHoldPropagatesAfterFutureResolution( - OpEmitter mockEmitter, - ArgumentCaptor>>> captor, - CountDownLatch latch, - Instant sealedWatermark) { - Instant higherWatermark = Instant.now(); - - // Process watermark should result in watermark hold again since pending bundle count > 1 - bundleManager.processWatermark(higherWatermark, mockEmitter); - verify(bundleProgressListener, times(0)).onWatermark(higherWatermark, mockEmitter); - - // Resolving the process output futures should result in watermark propagation - latch.countDown(); - CompletionStage validationFuture = - captor - .getValue() - .thenAccept( - results -> { - verify(bundleProgressListener, times(1)).onBundleFinished(mockEmitter); - verify(bundleProgressListener, times(1)) - .onWatermark(sealedWatermark, mockEmitter); - assertEquals( - "Expected the pending bundle count to be 0", - 0L, - bundleManager.getPendingBundleCount()); - }); - - validationFuture.toCompletableFuture().join(); - } - - /* - * We validate the following - * 1. Watermark is held since there is a bundle in progress - * 2. Callbacks are not invoked when tryFinishBundle() is invoked since the future is unresolved - * 3. Watermark hold is sealed and output future is emitted - */ - private void testWatermarkHoldWhenPendingBundleInProgress( - OpEmitter mockEmitter, - ArgumentCaptor>>> captor, - Instant watermark) { - // Starts the bundle and reach the max bundle size so that tryFinishBundle() seals the current - // bundle - bundleManager.tryStartBundle(); - bundleManager.tryStartBundle(); - bundleManager.tryStartBundle(); - - bundleManager.processWatermark(watermark, mockEmitter); - verify(bundleProgressListener, times(0)).onWatermark(watermark, mockEmitter); - - // Bundle is still unresolved although sealed since count down the latch is not yet decremented. - bundleManager.tryFinishBundle(mockEmitter); - verify(mockFutureCollector, times(1)).finish(); - verify(mockEmitter, times(1)).emitFuture(captor.capture()); - assertFalse("tryFinishBundle() closed the bundle", bundleManager.isBundleStarted()); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/FutureCollectorImplTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/FutureCollectorImplTest.java deleted file mode 100644 index b0b9b5450f23..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/FutureCollectorImplTest.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.util.Collection; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; -import java.util.stream.Collectors; -import org.apache.beam.sdk.values.WindowedValue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -/** Unit tests for {@linkplain FutureCollectorImpl}. */ -public final class FutureCollectorImplTest { - private static final List RESULTS = ImmutableList.of("hello", "world"); - private FutureCollector futureCollector = new FutureCollectorImpl<>(); - - @Before - public void setup() { - futureCollector = new FutureCollectorImpl<>(); - } - - @Test(expected = IllegalStateException.class) - public void testAddWithoutPrepareCallThrowsException() { - futureCollector.add(mock(CompletionStage.class)); - } - - @Test - public void testFinishWithoutPrepareReturnsEmptyCollection() { - CompletionStage>> resultFuture = futureCollector.finish(); - CompletionStage validationFuture = - resultFuture.thenAccept( - result -> { - Assert.assertTrue("Expected the result to be empty", result.isEmpty()); - }); - validationFuture.toCompletableFuture().join(); - } - - @Test - public void testFinishReturnsExpectedResults() { - WindowedValue mockWindowedValue = mock(WindowedValue.class); - - when(mockWindowedValue.getValue()).thenReturn("hello").thenReturn("world"); - - futureCollector.prepare(); - futureCollector.add(CompletableFuture.completedFuture(mockWindowedValue)); - futureCollector.add(CompletableFuture.completedFuture(mockWindowedValue)); - - CompletionStage>> resultFuture = futureCollector.finish(); - CompletionStage validationFuture = - resultFuture.thenAccept( - results -> { - List actualResults = - results.stream().map(WindowedValue::getValue).collect(Collectors.toList()); - Assert.assertEquals( - "Expected the result to be {hello, world}", RESULTS, actualResults); - }); - validationFuture.toCompletableFuture().join(); - } - - @Test - public void testMultiplePrepareCallsWithoutFinishThrowsException() { - futureCollector.prepare(); - - try { - futureCollector.prepare(); - Assert.fail("Second invocation of prepare should throw IllegalStateException"); - } catch (IllegalStateException ex) { - } - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java deleted file mode 100644 index 73454cc95421..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.junit.Assume.assumeTrue; - -import java.io.Serializable; -import java.util.Arrays; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.TestStream; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TimestampedValue; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; - -/** Tests for GroupByKeyOp. */ -public class GroupByKeyOpTest implements Serializable { - - @BeforeClass - public static void beforeClass() { - // TODO(https://github.com/apache/beam/issues/32208) - assumeTrue(System.getProperty("java.version").startsWith("1.")); - } - - @Rule - public final transient TestPipeline pipeline = - TestPipeline.fromOptions( - PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner").create()); - - @Rule - public final transient TestPipeline dropLateDataPipeline = - TestPipeline.fromOptions( - PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner", "--dropLateData=true") - .create()); - - @Test - public void testDefaultGbk() { - TestStream.Builder testStream = - TestStream.create(VarIntCoder.of()) - .addElements(TimestampedValue.of(1, new Instant(1000))) - .addElements(TimestampedValue.of(2, new Instant(2000))) - .advanceWatermarkTo(new Instant(3000)) - .addElements(TimestampedValue.of(10, new Instant(1000))) - .advanceWatermarkTo(new Instant(10000)); - - PCollection aggregated = - pipeline - .apply(testStream.advanceWatermarkToInfinity()) - .apply( - Window.into(FixedWindows.of(Duration.standardSeconds(3))) - .accumulatingFiredPanes()) - .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); - - PAssert.that(aggregated).containsInAnyOrder(Arrays.asList(3, 10)); - - pipeline.run().waitUntilFinish(); - } - - @Test - public void testDropLateDataNonKeyed() { - TestStream.Builder testStream = - TestStream.create(VarIntCoder.of()) - .addElements(TimestampedValue.of(1, new Instant(1000))) - .addElements(TimestampedValue.of(2, new Instant(2000))) - .advanceWatermarkTo(new Instant(3000)) - .addElements(TimestampedValue.of(10, new Instant(1000))) - .advanceWatermarkTo(new Instant(10000)); - - PCollection aggregated = - dropLateDataPipeline - .apply(testStream.advanceWatermarkToInfinity()) - .apply( - Window.into(FixedWindows.of(Duration.standardSeconds(3))) - .accumulatingFiredPanes()) - .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); - - PAssert.that(aggregated).containsInAnyOrder(3); - - dropLateDataPipeline.run().waitUntilFinish(); - } - - @Test - public void testDropLateDataKeyed() { - TestStream.Builder> testStream = - TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) - .addElements(TimestampedValue.of(KV.of("a", 1), new Instant(1000))) - .addElements(TimestampedValue.of(KV.of("b", 2), new Instant(2000))) - .addElements(TimestampedValue.of(KV.of("a", 3), new Instant(2500))) - .advanceWatermarkTo(new Instant(3000)) - .addElements(TimestampedValue.of(KV.of("a", 10), new Instant(1000))) - .advanceWatermarkTo(new Instant(10000)); - - PCollection> aggregated = - dropLateDataPipeline - .apply(testStream.advanceWatermarkToInfinity()) - .apply( - Window.>into(FixedWindows.of(Duration.standardSeconds(3))) - .accumulatingFiredPanes()) - .apply(Sum.integersPerKey()); - - PAssert.that(aggregated).containsInAnyOrder(Arrays.asList(KV.of("a", 4), KV.of("b", 2))); - - dropLateDataPipeline.run().waitUntilFinish(); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/KeyedTimerDataTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/KeyedTimerDataTest.java deleted file mode 100644 index 1a2f82b1a0d7..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/KeyedTimerDataTest.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.io.ByteArrayOutputStream; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.testing.CoderProperties; -import org.apache.beam.sdk.transforms.windowing.GlobalWindow; -import org.apache.beam.sdk.values.CausedByDrain; -import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; -import org.joda.time.Instant; -import org.junit.Test; - -/** Tests for {@link KeyedTimerData}. */ -public class KeyedTimerDataTest { - private static final Coder STRING_CODER = StringUtf8Coder.of(); - private static final Instant TIMESTAMP = - new DateTime(2020, 8, 11, 13, 42, 9, DateTimeZone.UTC).toInstant(); - // TODO: LISAMZA-19205 Test OUTPUT_TIMESTAMP after outputTimestamp is encoded - // private static final Instant OUTPUT_TIMESTAMP = TIMESTAMP.plus(Duration.standardSeconds(30)); - - @Test - public void testCoder() throws Exception { - final TimerInternals.TimerData td = - TimerInternals.TimerData.of( - "timer", - StateNamespaces.global(), - TIMESTAMP, - TIMESTAMP, - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - - final String key = "timer-key"; - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - STRING_CODER.encode(key, baos); - final byte[] keyBytes = baos.toByteArray(); - final KeyedTimerData ktd = new KeyedTimerData<>(keyBytes, key, td); - - final KeyedTimerData.KeyedTimerDataCoder ktdCoder = - new KeyedTimerData.KeyedTimerDataCoder<>(STRING_CODER, GlobalWindow.Coder.INSTANCE); - - // TODO: LISAMZA-19205: use CoderProperties.coderDecodeEncodeEqual - CoderProperties.coderDecodeEncodeEqualInContext(ktdCoder, Coder.Context.OUTER, ktd); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/PortableBundleManagerTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/PortableBundleManagerTest.java deleted file mode 100644 index 522e146d21e6..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/PortableBundleManagerTest.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import org.apache.beam.runners.core.TimerInternals; -import org.apache.samza.operators.Scheduler; -import org.joda.time.Instant; -import org.junit.Before; -import org.junit.Test; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; - -public class PortableBundleManagerTest { - - @Mock BundleManager.BundleProgressListener bundleProgressListener; - @Mock Scheduler> bundleTimerScheduler; - @Mock OpEmitter emitter; - - PortableBundleManager portableBundleManager; - - private static final String TIMER_ID = "timerId"; - - private static final long MAX_BUNDLE_TIME_MS = 100000; - - @Before - public void setup() { - MockitoAnnotations.initMocks(this); - } - - @Test - public void test() { - portableBundleManager = - new PortableBundleManager<>( - bundleProgressListener, 1, MAX_BUNDLE_TIME_MS, bundleTimerScheduler, TIMER_ID); - portableBundleManager.tryStartBundle(); - - verify(bundleProgressListener, times(1)).onBundleStarted(); - } - - @Test - public void testWhen() { - portableBundleManager = - new PortableBundleManager<>( - bundleProgressListener, 4, MAX_BUNDLE_TIME_MS, bundleTimerScheduler, TIMER_ID); - portableBundleManager.tryStartBundle(); - portableBundleManager.tryStartBundle(); - - verify(bundleProgressListener, times(1)).onBundleStarted(); - } - - @Test - public void testWhenElementCountNotReachedTHenBundleDoesntFinish() { - portableBundleManager = - new PortableBundleManager<>( - bundleProgressListener, 4, MAX_BUNDLE_TIME_MS, bundleTimerScheduler, TIMER_ID); - portableBundleManager.tryStartBundle(); - portableBundleManager.tryStartBundle(); - portableBundleManager.tryFinishBundle(emitter); - - verify(bundleProgressListener, times(1)).onBundleStarted(); - verify(bundleProgressListener, times(0)).onBundleFinished(any()); - } - - @Test - public void testWhenElementCountReachedThenFinishBundle() { - portableBundleManager = - new PortableBundleManager<>( - bundleProgressListener, 4, MAX_BUNDLE_TIME_MS, bundleTimerScheduler, TIMER_ID); - portableBundleManager.tryStartBundle(); - portableBundleManager.tryStartBundle(); - portableBundleManager.tryStartBundle(); - portableBundleManager.tryStartBundle(); - portableBundleManager.tryFinishBundle(emitter); - - verify(bundleProgressListener, times(1)).onBundleStarted(); - verify(bundleProgressListener, times(1)).onBundleFinished(any()); - } - - @Test - public void testWhenBundleTimeReachedThenFinishBundle() throws Exception { - portableBundleManager = - new PortableBundleManager<>(bundleProgressListener, 4, 1, bundleTimerScheduler, TIMER_ID); - portableBundleManager.tryStartBundle(); - Thread.sleep(2); - portableBundleManager.tryFinishBundle(emitter); - - verify(bundleProgressListener, times(1)).onBundleStarted(); - verify(bundleProgressListener, times(1)).onBundleFinished(any()); - } - - @Test - public void testWhenSignalFailureThenResetBundle() throws Exception { - portableBundleManager = - new PortableBundleManager<>(bundleProgressListener, 4, 1, bundleTimerScheduler, TIMER_ID); - portableBundleManager.tryStartBundle(); - portableBundleManager.signalFailure(new Exception()); - portableBundleManager.tryStartBundle(); - - verify(bundleProgressListener, times(2)).onBundleStarted(); - } - - @Test - public void testProcessWatermarkWhenBundleNotStarted() { - Instant watermark = new Instant(); - portableBundleManager = - new PortableBundleManager<>(bundleProgressListener, 4, 1, bundleTimerScheduler, TIMER_ID); - portableBundleManager.processWatermark(watermark, emitter); - verify(bundleProgressListener, times(1)).onWatermark(eq(watermark), eq(emitter)); - } - - @Test - public void testQueueWatermarkWhenBundleStarted() { - Instant watermark = new Instant(); - portableBundleManager = - new PortableBundleManager<>(bundleProgressListener, 1, 1, bundleTimerScheduler, TIMER_ID); - - portableBundleManager.tryStartBundle(); - portableBundleManager.processWatermark(watermark, emitter); - verify(bundleProgressListener, times(0)).onWatermark(eq(watermark), eq(emitter)); - - portableBundleManager.tryFinishBundle(emitter); - verify(bundleProgressListener, times(1)).onWatermark(eq(watermark), eq(emitter)); - } - - @Test - public void testProcessTimerTriesFinishBundle() { - portableBundleManager = - new PortableBundleManager<>(bundleProgressListener, 1, 1, bundleTimerScheduler, TIMER_ID); - - portableBundleManager.tryStartBundle(); - KeyedTimerData keyedTimerData = mock(KeyedTimerData.class); - TimerInternals.TimerData timerData = mock(TimerInternals.TimerData.class); - when(keyedTimerData.getTimerData()).thenReturn(timerData); - when(timerData.getTimerId()).thenReturn(TIMER_ID); - - portableBundleManager.processTimer(keyedTimerData, emitter); - verify(bundleProgressListener, times(1)).onBundleFinished(any()); - verify(bundleTimerScheduler).schedule(any(KeyedTimerData.class), anyLong()); - } - - @Test - public void testDifferentTimerIdIsIgnored() { - portableBundleManager = - new PortableBundleManager<>(bundleProgressListener, 1, 1, bundleTimerScheduler, TIMER_ID); - - portableBundleManager.tryStartBundle(); - KeyedTimerData keyedTimerData = mock(KeyedTimerData.class); - TimerInternals.TimerData timerData = mock(TimerInternals.TimerData.class); - when(keyedTimerData.getTimerData()).thenReturn(timerData); - when(timerData.getTimerId()).thenReturn("NOT_TIMER_ID"); - - portableBundleManager.processTimer(keyedTimerData, emitter); - verify(bundleProgressListener, times(0)).onBundleFinished(any()); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaMetricsBundleProgressHandlerTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaMetricsBundleProgressHandlerTest.java deleted file mode 100644 index 7fde83c02e5f..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaMetricsBundleProgressHandlerTest.java +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.apache.beam.runners.core.metrics.MonitoringInfoConstants.TypeUrns.DISTRIBUTION_INT64_TYPE; -import static org.apache.beam.runners.core.metrics.MonitoringInfoConstants.TypeUrns.LATEST_INT64_TYPE; -import static org.apache.beam.runners.core.metrics.MonitoringInfoConstants.TypeUrns.SUM_INT64_TYPE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.nio.charset.Charset; -import java.util.HashMap; -import java.util.Map; -import org.apache.beam.model.fnexecution.v1.BeamFnApi; -import org.apache.beam.model.pipeline.v1.MetricsApi; -import org.apache.beam.runners.core.metrics.CounterCell; -import org.apache.beam.runners.core.metrics.DistributionCell; -import org.apache.beam.runners.core.metrics.GaugeCell; -import org.apache.beam.runners.core.metrics.MonitoringInfoConstants; -import org.apache.beam.runners.samza.metrics.SamzaMetricsContainer; -import org.apache.beam.sdk.metrics.MetricName; -import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; -import org.apache.samza.metrics.MetricsRegistryMap; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Before; -import org.junit.Test; - -public class SamzaMetricsBundleProgressHandlerTest { - - public static final String EXPECTED_NAMESPACE = "namespace"; - public static final String EXPECTED_COUNTER_NAME = "counterName"; - private MetricsRegistryMap metricsRegistryMap; - private SamzaMetricsContainer samzaMetricsContainer; - - private SamzaMetricsBundleProgressHandler samzaMetricsBundleProgressHandler; - private String stepName = "stepName"; - Map transformIdToUniqueName = new HashMap<>(); - - @Before - public void setup() { - metricsRegistryMap = new MetricsRegistryMap(); - samzaMetricsContainer = new SamzaMetricsContainer(metricsRegistryMap); - samzaMetricsBundleProgressHandler = - new SamzaMetricsBundleProgressHandler( - stepName, samzaMetricsContainer, transformIdToUniqueName); - } - - @Test - public void testCounter() { - // Hex for 123 - byte[] payload = "\173".getBytes(Charset.defaultCharset()); - - MetricsApi.MonitoringInfo monitoringInfo = - MetricsApi.MonitoringInfo.newBuilder() - .setType(SUM_INT64_TYPE) - .setPayload(ByteString.copyFrom(payload)) - .putLabels(MonitoringInfoConstants.Labels.NAMESPACE, EXPECTED_NAMESPACE) - .putLabels(MonitoringInfoConstants.Labels.NAME, EXPECTED_COUNTER_NAME) - .build(); - BeamFnApi.ProcessBundleResponse response = - BeamFnApi.ProcessBundleResponse.newBuilder().addMonitoringInfos(monitoringInfo).build(); - - // Execute - samzaMetricsBundleProgressHandler.onCompleted(response); - - // Verify - MetricName metricName = MetricName.named(EXPECTED_NAMESPACE, EXPECTED_COUNTER_NAME); - CounterCell counter = - (CounterCell) samzaMetricsContainer.getContainer(stepName).getCounter(metricName); - - assertEquals(counter.getCumulative(), (Long) 123L); - } - - @Test - public void testGauge() { - // TimeStamp = 0, Value = 123 - byte[] payload = "\000\173".getBytes(Charset.defaultCharset()); - - MetricsApi.MonitoringInfo monitoringInfo = - MetricsApi.MonitoringInfo.newBuilder() - .setType(LATEST_INT64_TYPE) - .setPayload(ByteString.copyFrom(payload)) - .putLabels(MonitoringInfoConstants.Labels.NAMESPACE, EXPECTED_NAMESPACE) - .putLabels(MonitoringInfoConstants.Labels.NAME, EXPECTED_COUNTER_NAME) - .build(); - BeamFnApi.ProcessBundleResponse response = - BeamFnApi.ProcessBundleResponse.newBuilder().addMonitoringInfos(monitoringInfo).build(); - - // Execute - samzaMetricsBundleProgressHandler.onCompleted(response); - - // Verify - MetricName metricName = MetricName.named(EXPECTED_NAMESPACE, EXPECTED_COUNTER_NAME); - GaugeCell gauge = (GaugeCell) samzaMetricsContainer.getContainer(stepName).getGauge(metricName); - - assertEquals(123L, gauge.getCumulative().value()); - assertTrue( - gauge.getCumulative().timestamp().isBefore(Instant.now().plus(Duration.millis(500)))); - assertTrue( - gauge.getCumulative().timestamp().isAfter(Instant.now().minus(Duration.millis(500)))); - } - - @Test - public void testDistribution() { - // Count = 123, sum = 124, min = 125, max = 126 - byte[] payload = "\173\174\175\176".getBytes(Charset.defaultCharset()); - - MetricsApi.MonitoringInfo monitoringInfo = - MetricsApi.MonitoringInfo.newBuilder() - .setType(DISTRIBUTION_INT64_TYPE) - .setPayload(ByteString.copyFrom(payload)) - .putLabels(MonitoringInfoConstants.Labels.NAMESPACE, EXPECTED_NAMESPACE) - .putLabels(MonitoringInfoConstants.Labels.NAME, EXPECTED_COUNTER_NAME) - .build(); - BeamFnApi.ProcessBundleResponse response = - BeamFnApi.ProcessBundleResponse.newBuilder().addMonitoringInfos(monitoringInfo).build(); - - // Execute - samzaMetricsBundleProgressHandler.onCompleted(response); - - // Verify - MetricName metricName = MetricName.named(EXPECTED_NAMESPACE, EXPECTED_COUNTER_NAME); - DistributionCell gauge = - (DistributionCell) samzaMetricsContainer.getContainer(stepName).getDistribution(metricName); - - assertEquals(123L, gauge.getCumulative().count()); - assertEquals(124L, gauge.getCumulative().sum()); - assertEquals(125L, gauge.getCumulative().min()); - assertEquals(126L, gauge.getCumulative().max()); - } - - @Test - public void testEmptyPayload() { - - byte[] emptyPayload = "".getBytes(Charset.defaultCharset()); - - MetricsApi.MonitoringInfo emptyMonitoringInfo = - MetricsApi.MonitoringInfo.newBuilder() - .setType(SUM_INT64_TYPE) - .setPayload(ByteString.copyFrom(emptyPayload)) - .putLabels(MonitoringInfoConstants.Labels.NAMESPACE, EXPECTED_NAMESPACE) - .putLabels(MonitoringInfoConstants.Labels.NAME, EXPECTED_COUNTER_NAME) - .build(); - // Hex for 123 - byte[] payload = "\173".getBytes(Charset.defaultCharset()); - - MetricsApi.MonitoringInfo monitoringInfo = - MetricsApi.MonitoringInfo.newBuilder() - .setType(SUM_INT64_TYPE) - .setPayload(ByteString.copyFrom(payload)) - .putLabels(MonitoringInfoConstants.Labels.NAMESPACE, EXPECTED_NAMESPACE) - .putLabels(MonitoringInfoConstants.Labels.NAME, EXPECTED_COUNTER_NAME) - .build(); - BeamFnApi.ProcessBundleResponse response = - BeamFnApi.ProcessBundleResponse.newBuilder() - .addMonitoringInfos(emptyMonitoringInfo) - .addMonitoringInfos(monitoringInfo) - .addMonitoringInfos(emptyMonitoringInfo) - .build(); - - // Execute - samzaMetricsBundleProgressHandler.onCompleted(response); - - // Verify - MetricName metricName = MetricName.named(EXPECTED_NAMESPACE, EXPECTED_COUNTER_NAME); - CounterCell counter = - (CounterCell) samzaMetricsContainer.getContainer(stepName).getCounter(metricName); - - assertEquals(counter.getCumulative(), (Long) 123L); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternalsTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternalsTest.java deleted file mode 100644 index 9409efbcf394..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaStoreStateInternalsTest.java +++ /dev/null @@ -1,432 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assume.assumeTrue; - -import java.io.File; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.TestSamzaRunner; -import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue; -import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValueSerdeFactory; -import org.apache.beam.runners.samza.state.SamzaMapState; -import org.apache.beam.runners.samza.state.SamzaSetState; -import org.apache.beam.runners.samza.translation.ConfigBuilder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.state.CombiningState; -import org.apache.beam.sdk.state.MapState; -import org.apache.beam.sdk.state.ReadableState; -import org.apache.beam.sdk.state.SetState; -import org.apache.beam.sdk.state.StateSpec; -import org.apache.beam.sdk.state.StateSpecs; -import org.apache.beam.sdk.state.ValueState; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterators; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets; -import org.apache.samza.context.ContainerContext; -import org.apache.samza.context.JobContext; -import org.apache.samza.metrics.MetricsRegistry; -import org.apache.samza.serializers.Serde; -import org.apache.samza.storage.StorageEngineFactory; -import org.apache.samza.storage.kv.Entry; -import org.apache.samza.storage.kv.KeyValueIterator; -import org.apache.samza.storage.kv.KeyValueStore; -import org.apache.samza.storage.kv.KeyValueStoreMetrics; -import org.apache.samza.storage.kv.inmemory.InMemoryKeyValueStorageEngineFactory; -import org.apache.samza.storage.kv.inmemory.InMemoryKeyValueStore; -import org.apache.samza.system.SystemStreamPartition; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; - -/** Tests for SamzaStoreStateInternals. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - // TODO(https://github.com/apache/beam/issues/21230): Remove when new version of - // errorprone is released (2.11.0) - "unused" -}) -public class SamzaStoreStateInternalsTest implements Serializable { - @Rule - public final transient TestPipeline pipeline = - TestPipeline.fromOptions( - PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner").create()); - - @BeforeClass - public static void beforeClass() { - // TODO(https://github.com/apache/beam/issues/32208) - assumeTrue(System.getProperty("java.version").startsWith("1.")); - } - - @Test - public void testMapStateIterator() { - final String stateId = "foo"; - final String countStateId = "count"; - - DoFn>, KV> fn = - new DoFn>, KV>() { - - @StateId(stateId) - private final StateSpec> mapState = - StateSpecs.map(StringUtf8Coder.of(), VarIntCoder.of()); - - @StateId(countStateId) - private final StateSpec> countState = - StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers()); - - @ProcessElement - public void processElement( - ProcessContext c, - @StateId(stateId) MapState mapState, - @StateId(countStateId) CombiningState count) { - SamzaMapState state = (SamzaMapState) mapState; - KV value = c.element().getValue(); - state.put(value.getKey(), value.getValue()); - count.add(1); - if (count.read() >= 4) { - final List> content = new ArrayList<>(); - final Iterator> iterator = state.readIterator().read(); - while (iterator.hasNext()) { - Map.Entry entry = iterator.next(); - content.add(KV.of(entry.getKey(), entry.getValue())); - c.output(KV.of(entry.getKey(), entry.getValue())); - } - - assertEquals( - content, ImmutableList.of(KV.of("a", 97), KV.of("b", 42), KV.of("c", 12))); - } - } - }; - - PCollection> output = - pipeline - .apply( - Create.of( - KV.of("hello", KV.of("a", 97)), - KV.of("hello", KV.of("b", 42)), - KV.of("hello", KV.of("b", 42)), - KV.of("hello", KV.of("c", 12)))) - .apply(ParDo.of(fn)); - - PAssert.that(output).containsInAnyOrder(KV.of("a", 97), KV.of("b", 42), KV.of("c", 12)); - - pipeline.run(); - } - - @Test - public void testSetStateIterator() { - final String stateId = "foo"; - final String countStateId = "count"; - - DoFn, Set> fn = - new DoFn, Set>() { - - @StateId(stateId) - private final StateSpec> setState = StateSpecs.set(VarIntCoder.of()); - - @StateId(countStateId) - private final StateSpec> countState = - StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers()); - - @ProcessElement - public void processElement( - ProcessContext c, - @StateId(stateId) SetState setState, - @StateId(countStateId) CombiningState count) { - SamzaSetState state = (SamzaSetState) setState; - ReadableState isEmpty = state.isEmpty(); - state.add(c.element().getValue()); - assertFalse(isEmpty.read()); - count.add(1); - if (count.read() >= 4) { - final Set content = new HashSet<>(); - final Iterator iterator = state.readIterator().read(); - while (iterator.hasNext()) { - Integer value = iterator.next(); - content.add(value); - } - c.output(content); - - assertEquals(content, Sets.newHashSet(97, 42, 12)); - } - } - }; - - PCollection> output = - pipeline - .apply( - Create.of( - KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))) - .apply(ParDo.of(fn)); - - PAssert.that(output).containsInAnyOrder(Sets.newHashSet(97, 42, 12)); - - pipeline.run(); - } - - @Test - public void testValueStateSameIdAcrossParDo() { - final String stateId = "foo"; - - DoFn, KV> fn = - new DoFn, KV>() { - - @StateId(stateId) - private final StateSpec> intState = - StateSpecs.value(VarIntCoder.of()); - - @ProcessElement - public void processElement( - @StateId(stateId) ValueState state, OutputReceiver> r) { - Integer currentValue = MoreObjects.firstNonNull(state.read(), 0); - r.output(KV.of("sizzle", currentValue)); - state.write(currentValue + 1); - } - }; - - DoFn, Integer> fn2 = - new DoFn, Integer>() { - - @StateId(stateId) - private final StateSpec> intState = - StateSpecs.value(VarIntCoder.of()); - - @ProcessElement - public void processElement( - @StateId(stateId) ValueState state, OutputReceiver r) { - Integer currentValue = MoreObjects.firstNonNull(state.read(), 13); - r.output(currentValue); - state.write(currentValue + 13); - } - }; - - PCollection> intermediate = - pipeline - .apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))) - .apply("First stateful ParDo", ParDo.of(fn)); - - PCollection output = intermediate.apply("Second stateful ParDo", ParDo.of(fn2)); - - PAssert.that(intermediate) - .containsInAnyOrder(KV.of("sizzle", 0), KV.of("sizzle", 1), KV.of("sizzle", 2)); - PAssert.that(output).containsInAnyOrder(13, 26, 39); - pipeline.run(); - } - - @Test - public void testValueStateSameIdAcrossParDoWithSameName() { - final String stateId = "foo"; - - DoFn, KV> fn = - new DoFn, KV>() { - - @StateId(stateId) - private final StateSpec> intState = - StateSpecs.value(VarIntCoder.of()); - - @ProcessElement - public void processElement( - @StateId(stateId) ValueState state, OutputReceiver> r) { - Integer currentValue = MoreObjects.firstNonNull(state.read(), 0); - r.output(KV.of("hello", currentValue)); - state.write(currentValue + 1); - } - }; - - DoFn, Integer> fn2 = - new DoFn, Integer>() { - - @StateId(stateId) - private final StateSpec> intState = - StateSpecs.value(VarIntCoder.of()); - - @ProcessElement - public void processElement( - @StateId(stateId) ValueState state, OutputReceiver r) { - Integer currentValue = MoreObjects.firstNonNull(state.read(), 13); - r.output(currentValue); - state.write(currentValue + 13); - } - }; - - PCollection> intermediate = - pipeline - .apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))) - .apply("Stateful ParDo with Same Name", ParDo.of(fn)); - - PCollection output = - intermediate.apply("Stateful ParDo with Same Name", ParDo.of(fn2)); - - PAssert.that(intermediate) - .containsInAnyOrder(KV.of("hello", 0), KV.of("hello", 1), KV.of("hello", 2)); - - PAssert.that(output).containsInAnyOrder(13, 26, 39); - pipeline.run(); - } - - /** A storage engine to create test stores. */ - public static class TestStorageEngine extends InMemoryKeyValueStorageEngineFactory { - - @Override - public KeyValueStore getKVStore( - String storeName, - File storeDir, - MetricsRegistry registry, - SystemStreamPartition changeLogSystemStreamPartition, - JobContext jobContext, - ContainerContext containerContext, - StorageEngineFactory.StoreMode readWrite) { - KeyValueStoreMetrics metrics = new KeyValueStoreMetrics(storeName, registry); - return new TestStore(metrics); - } - } - - /** A test store based on InMemoryKeyValueStore. */ - public static class TestStore extends InMemoryKeyValueStore { - static List iterators = Collections.synchronizedList(new ArrayList<>()); - - public TestStore(KeyValueStoreMetrics metrics) { - super(metrics); - } - - @Override - public KeyValueIterator range(byte[] from, byte[] to) { - TestKeyValueIteraor iter = new TestKeyValueIteraor(super.range(from, to)); - iterators.add(iter); - return iter; - } - - static class TestKeyValueIteraor implements KeyValueIterator { - private final KeyValueIterator iter; - boolean closed = false; - - TestKeyValueIteraor(KeyValueIterator iter) { - this.iter = iter; - } - - @Override - public void close() { - iter.close(); - closed = true; - } - - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public Entry next() { - return iter.next(); - } - } - } - - @Test - public void testIteratorClosed() { - final String stateId = "foo"; - - DoFn, Set> fn = - new DoFn, Set>() { - - @StateId(stateId) - private final StateSpec> setState = StateSpecs.set(VarIntCoder.of()); - - @ProcessElement - public void processElement( - ProcessContext c, @StateId(stateId) SetState setState) { - SamzaSetState state = (SamzaSetState) setState; - state.add(c.element().getValue()); - - // the iterator for size needs to be closed - int size = Iterators.size(state.readIterator().read()); - - if (size > 1) { - final Iterator iterator = state.readIterator().read(); - assertTrue(iterator.hasNext()); - // this iterator should be closed too - iterator.next(); - } - } - }; - - pipeline - .apply( - Create.of( - KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))) - .apply(ParDo.of(fn)); - - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setRunner(TestSamzaRunner.class); - Map configs = new HashMap<>(ConfigBuilder.localRunConfig()); - configs.put("stores.foo.factory", TestStorageEngine.class.getName()); - pipeline.getOptions().as(SamzaPipelineOptions.class).setConfigOverride(configs); - pipeline.run(); - - // The test code creates 7 underlying iterators, and 1 more is created during state.clear() - // Verify all of them are closed - assertEquals(8, TestStore.iterators.size()); - TestStore.iterators.forEach(iter -> assertTrue(iter.closed)); - } - - @Test - public void testStateValueSerde() throws IOException { - StateValueSerdeFactory stateValueSerdeFactory = new StateValueSerdeFactory(); - Serde> serde = (Serde) stateValueSerdeFactory.getSerde("Test", null); - int value = 123; - Coder coder = VarIntCoder.of(); - - byte[] valueBytes = serde.toBytes(StateValue.of(value, coder)); - StateValue stateValue1 = serde.fromBytes(valueBytes); - StateValue stateValue2 = StateValue.of(valueBytes); - assertEquals(stateValue1.getValue(coder).intValue(), value); - assertEquals(stateValue2.getValue(coder).intValue(), value); - - Integer nullValue = null; - byte[] nullBytes = serde.toBytes(StateValue.of(nullValue, coder)); - StateValue nullStateValue = serde.fromBytes(nullBytes); - assertNull(nullBytes); - assertNull(nullStateValue.getValue(coder)); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactoryTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactoryTest.java deleted file mode 100644 index ecedd7ae79f1..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SamzaTimerInternalsFactoryTest.java +++ /dev/null @@ -1,752 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray; -import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArraySerdeFactory; -import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue; -import org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValueSerdeFactory; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.values.CausedByDrain; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.samza.config.MapConfig; -import org.apache.samza.context.TaskContext; -import org.apache.samza.metrics.MetricsRegistryMap; -import org.apache.samza.operators.Scheduler; -import org.apache.samza.serializers.Serde; -import org.apache.samza.storage.kv.KeyValueStore; -import org.apache.samza.storage.kv.KeyValueStoreMetrics; -import org.apache.samza.storage.kv.RocksDbKeyValueStore; -import org.apache.samza.storage.kv.SerializedKeyValueStore; -import org.apache.samza.storage.kv.SerializedKeyValueStoreMetrics; -import org.joda.time.Instant; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.FlushOptions; -import org.rocksdb.Options; -import org.rocksdb.WriteOptions; - -/** - * Tests for {@link SamzaTimerInternalsFactory}. Covers both event-time timers and processing-timer - * timers. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SamzaTimerInternalsFactoryTest { - @Rule public transient TemporaryFolder temporaryFolder = new TemporaryFolder(); - - private KeyValueStore> createStore() { - final Options options = new Options(); - options.setCreateIfMissing(true); - - RocksDbKeyValueStore rocksStore = - new RocksDbKeyValueStore( - temporaryFolder.getRoot(), - options, - new MapConfig(), - false, - "beamStore", - new WriteOptions(), - new FlushOptions(), - new KeyValueStoreMetrics("beamStore", new MetricsRegistryMap())); - - return new SerializedKeyValueStore<>( - rocksStore, - new ByteArraySerdeFactory.ByteArraySerde(), - new StateValueSerdeFactory.StateValueSerde(), - new SerializedKeyValueStoreMetrics("beamStore", new MetricsRegistryMap())); - } - - private static SamzaStoreStateInternals.Factory createNonKeyedStateInternalsFactory( - SamzaPipelineOptions pipelineOptions, KeyValueStore> store) { - final TaskContext context = mock(TaskContext.class); - when(context.getStore(anyString())).thenReturn((KeyValueStore) store); - - return SamzaStoreStateInternals.createNonKeyedStateInternalsFactory( - "42", context, pipelineOptions); - } - - private static SamzaTimerInternalsFactory createTimerInternalsFactory( - Scheduler> timerRegistry, - String timerStateId, - SamzaPipelineOptions pipelineOptions, - KeyValueStore> store) { - - final SamzaStoreStateInternals.Factory nonKeyedStateInternalsFactory = - createNonKeyedStateInternalsFactory(pipelineOptions, store); - - return SamzaTimerInternalsFactory.createTimerInternalFactory( - StringUtf8Coder.of(), - timerRegistry, - timerStateId, - nonKeyedStateInternalsFactory, - (WindowingStrategy) WindowingStrategy.globalDefault(), - PCollection.IsBounded.BOUNDED, - pipelineOptions); - } - - private static class TestTimerRegistry implements Scheduler> { - private final List> timers = new ArrayList<>(); - - @Override - public void schedule(KeyedTimerData key, long timestamp) { - timers.add(key); - } - - @Override - public void delete(KeyedTimerData key) { - timers.remove(key); - } - } - - @Test - public void testEventTimeTimers() { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - - final KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - final TimerInternals.TimerData timer1 = - TimerInternals.TimerData.of( - "timer1", - nameSpace, - new Instant(10), - new Instant(10), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer1); - - final TimerInternals.TimerData timer2 = - TimerInternals.TimerData.of( - "timer2", - nameSpace, - new Instant(100), - new Instant(100), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer2); - - timerInternalsFactory.setInputWatermark(new Instant(5)); - Collection> readyTimers = timerInternalsFactory.removeReadyTimers(); - assertTrue(readyTimers.isEmpty()); - - timerInternalsFactory.setInputWatermark(new Instant(20)); - readyTimers = timerInternalsFactory.removeReadyTimers(); - assertEquals(1, readyTimers.size()); - assertEquals(timer1, readyTimers.iterator().next().getTimerData()); - - timerInternalsFactory.setInputWatermark(new Instant(150)); - readyTimers = timerInternalsFactory.removeReadyTimers(); - assertEquals(1, readyTimers.size()); - assertEquals(timer2, readyTimers.iterator().next().getTimerData()); - - store.close(); - } - - @Test - public void testRestoreEventBufferSize() throws Exception { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - - KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final String key = "testKey"; - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey(key); - final TimerInternals.TimerData timer1 = - TimerInternals.TimerData.of( - "timer1", - nameSpace, - new Instant(10), - new Instant(10), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer1); - - store.close(); - - // restore by creating a new instance - store = createStore(); - - final SamzaTimerInternalsFactory restoredFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - assertEquals(1, restoredFactory.getEventTimeBuffer().size()); - - restoredFactory.setInputWatermark(new Instant(150)); - Collection> readyTimers = restoredFactory.removeReadyTimers(); - assertEquals(1, readyTimers.size()); - - // Timer 1 should be evicted from buffer - assertTrue(restoredFactory.getEventTimeBuffer().isEmpty()); - final TimerInternals restoredTimerInternals = restoredFactory.timerInternalsForKey(key); - final TimerInternals.TimerData timer2 = - TimerInternals.TimerData.of( - "timer2", - nameSpace, - new Instant(200), - new Instant(200), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - restoredTimerInternals.setTimer(timer2); - - // Timer 2 should be added to the Event buffer - assertEquals(1, restoredFactory.getEventTimeBuffer().size()); - // Timer 2 should not be ready - readyTimers = restoredFactory.removeReadyTimers(); - assertEquals(0, readyTimers.size()); - - restoredFactory.setInputWatermark(new Instant(250)); - - // Timer 2 should be ready - readyTimers = restoredFactory.removeReadyTimers(); - assertEquals(1, readyTimers.size()); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - StringUtf8Coder.of().encode(key, baos); - byte[] keyBytes = baos.toByteArray(); - assertEquals( - new ArrayList<>(readyTimers), Arrays.asList(new KeyedTimerData<>(keyBytes, key, timer2))); - - store.close(); - } - - @Test - public void testRestore() throws Exception { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - - KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final String key = "testKey"; - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey(key); - final TimerInternals.TimerData timer1 = - TimerInternals.TimerData.of( - "timer1", - nameSpace, - new Instant(10), - new Instant(10), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer1); - - final TimerInternals.TimerData timer2 = - TimerInternals.TimerData.of( - "timer2", - nameSpace, - new Instant(100), - new Instant(100), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer2); - - store.close(); - - // restore by creating a new instance - store = createStore(); - final SamzaTimerInternalsFactory restoredFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - restoredFactory.setInputWatermark(new Instant(150)); - Collection> readyTimers = restoredFactory.removeReadyTimers(); - assertEquals(2, readyTimers.size()); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - StringUtf8Coder.of().encode(key, baos); - byte[] keyBytes = baos.toByteArray(); - assertEquals( - new ArrayList<>(readyTimers), - Arrays.asList( - new KeyedTimerData<>(keyBytes, key, timer1), - new KeyedTimerData<>(keyBytes, key, timer2))); - - store.close(); - } - - @Test - public void testProcessingTimeTimers() throws IOException { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - - KeyValueStore> store = createStore(); - TestTimerRegistry timerRegistry = new TestTimerRegistry(); - - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(timerRegistry, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - final TimerInternals.TimerData timer1 = - TimerInternals.TimerData.of( - "timer1", - nameSpace, - new Instant(10), - new Instant(10), - TimeDomain.PROCESSING_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer1); - - final TimerInternals.TimerData timer2 = - TimerInternals.TimerData.of( - "timer2", - nameSpace, - new Instant(100), - new Instant(100), - TimeDomain.PROCESSING_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer2); - - final TimerInternals.TimerData timer3 = - TimerInternals.TimerData.of( - "timer3", - "timerFamilyId3", - nameSpace, - new Instant(100), - new Instant(100), - TimeDomain.PROCESSING_TIME); - timerInternals.setTimer(timer3); - assertEquals(3, timerRegistry.timers.size()); - - store.close(); - - // restore by creating a new instance - store = createStore(); - TestTimerRegistry restoredRegistry = new TestTimerRegistry(); - final SamzaTimerInternalsFactory restoredFactory = - createTimerInternalsFactory(restoredRegistry, "timer", pipelineOptions, store); - - assertEquals(3, restoredRegistry.timers.size()); - - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - StringUtf8Coder.of().encode("testKey", baos); - final byte[] keyBytes = baos.toByteArray(); - restoredFactory.removeProcessingTimer(new KeyedTimerData<>(keyBytes, "testKey", timer1)); - restoredFactory.removeProcessingTimer(new KeyedTimerData<>(keyBytes, "testKey", timer2)); - restoredFactory.removeProcessingTimer(new KeyedTimerData<>(keyBytes, "testKey", timer3)); - store.close(); - } - - @Test - public void testOverride() { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - - KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - final TimerInternals.TimerData timer1 = - TimerInternals.TimerData.of( - "timerId", - nameSpace, - new Instant(10), - new Instant(10), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer1); - - // this timer should override the first timer - final TimerInternals.TimerData timer2 = - TimerInternals.TimerData.of( - "timerId", - nameSpace, - new Instant(100), - new Instant(100), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer2); - - final TimerInternals.TimerData timer3 = - TimerInternals.TimerData.of( - "timerId2", - nameSpace, - new Instant(200), - new Instant(200), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer3); - - // this timer shouldn't override since it has a different id - timerInternalsFactory.setInputWatermark(new Instant(50)); - Collection> readyTimers = timerInternalsFactory.removeReadyTimers(); - assertEquals(0, readyTimers.size()); - - timerInternalsFactory.setInputWatermark(new Instant(150)); - readyTimers = timerInternalsFactory.removeReadyTimers(); - assertEquals(1, readyTimers.size()); - - timerInternalsFactory.setInputWatermark(new Instant(250)); - readyTimers = timerInternalsFactory.removeReadyTimers(); - assertEquals(1, readyTimers.size()); - - store.close(); - } - - /** - * Test the number of expired event timers for each watermark does not exceed the predefined - * limit. - */ - @Test - public void testMaxExpiredEventTimersProcessAtOnce() { - // If maxExpiredTimersToProcessOnce <= the number of expired timers, then load - // "maxExpiredTimersToProcessOnce" timers. - testMaxExpiredEventTimersProcessAtOnce(10, 10, 5, 5); - testMaxExpiredEventTimersProcessAtOnce(10, 10, 10, 10); - - // If maxExpiredTimersToProcessOnce > the number of expired timers, then load all the ready - // timers. - testMaxExpiredEventTimersProcessAtOnce(10, 10, 20, 10); - } - - private void testMaxExpiredEventTimersProcessAtOnce( - int totalNumberOfTimersInStore, - int totalNumberOfExpiredTimers, - int maxExpiredTimersToProcessOnce, - int expectedExpiredTimersToProcess) { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - pipelineOptions.setMaxReadyTimersToProcessOnce(maxExpiredTimersToProcessOnce); - - final KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - - TimerInternals.TimerData timer; - for (int i = 0; i < totalNumberOfTimersInStore; i++) { - timer = - TimerInternals.TimerData.of( - "timer" + i, - nameSpace, - new Instant(i), - new Instant(i), - TimeDomain.EVENT_TIME, - CausedByDrain.NORMAL); - timerInternals.setTimer(timer); - } - - // Set the timestamp of the input watermark to be the value of totalNumberOfExpiredTimers - // so that totalNumberOfExpiredTimers timers are expected be expired with respect to this - // watermark. - final Instant inputWatermark = new Instant(totalNumberOfExpiredTimers); - timerInternalsFactory.setInputWatermark(inputWatermark); - final Collection> readyTimers = - timerInternalsFactory.removeReadyTimers(); - assertEquals(expectedExpiredTimersToProcess, readyTimers.size()); - store.close(); - } - - @Test - public void testBufferSizeNotExceedingPipelineOptionValue() { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - pipelineOptions.setEventTimerBufferSize(2); - - final KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - - // prepare 5 timers. - // timers in memory are then timestamped from 0 - 1; - // timers in store are then timestamped from 0 - 4. - for (int i = 0; i < 5; i++) { - timerInternals.setTimer( - nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME); - } - - // only two timers are supposed to be in event time buffer - assertEquals(2, timerInternalsFactory.getEventTimeBuffer().size()); - - store.close(); - } - - @Test - public void testAllTimersAreFiredWithReload() { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - pipelineOptions.setEventTimerBufferSize(2); - - final KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - - // prepare 3 timers. - // timers in memory now are timestamped from 0 - 1; - // timers in store now are timestamped from 0 - 2. - for (int i = 0; i < 3; i++) { - timerInternals.setTimer( - nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME); - } - - // total number of event time timers to fire equals to the number of timers in store - Collection> readyTimers; - timerInternalsFactory.setInputWatermark(new Instant(3)); - readyTimers = timerInternalsFactory.removeReadyTimers(); - // buffer should reload from store and all timers are supposed to be fired. - assertEquals(3, readyTimers.size()); - - store.close(); - } - - /** - * Test the total number of event time timers reloaded into memory is aligned with the number of - * the event time timers written to the store. Moreover, event time timers reloaded into memory is - * maintained in order. - */ - @Test - public void testAllTimersAreFiredInOrder() { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - pipelineOptions.setEventTimerBufferSize(5); - - final KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - - // prepare 8 timers. - // timers in memory now are timestamped from 0 - 4; - // timers in store now are timestamped from 0 - 7. - for (int i = 0; i < 8; i++) { - timerInternals.setTimer( - nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME); - } - - // fire the first 2 timers. - // timers in memory now are timestamped from 2 - 4; - // timers in store now are timestamped from 2 - 7. - Collection> readyTimers; - timerInternalsFactory.setInputWatermark(new Instant(1)); - long lastTimestamp = 0; - readyTimers = timerInternalsFactory.removeReadyTimers(); - for (KeyedTimerData keyedTimerData : readyTimers) { - final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis(); - assertTrue(lastTimestamp <= currentTimeStamp); - lastTimestamp = currentTimeStamp; - } - assertEquals(2, readyTimers.size()); - - // add another 12 timers. - // timers in memory (reloaded for three times) now are timestamped from 2 - 4; 5 - 9; 10 - 14; - // 15 - 19. - // timers in store now are timestamped from 2 - 19. - // the total number of timers to fire is 18. - for (int i = 8; i < 20; i++) { - timerInternals.setTimer( - nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME); - } - timerInternalsFactory.setInputWatermark(new Instant(20)); - lastTimestamp = 0; - readyTimers = timerInternalsFactory.removeReadyTimers(); - for (KeyedTimerData keyedTimerData : readyTimers) { - final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis(); - assertTrue(lastTimestamp <= currentTimeStamp); - lastTimestamp = currentTimeStamp; - } - assertEquals(18, readyTimers.size()); - - store.close(); - } - - @Test - public void testNewTimersAreInsertedInOrder() { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - pipelineOptions.setEventTimerBufferSize(5); - - final KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - - // prepare 10 timers. - // timers in memory now are timestamped from 0 - 4; - // timers in store now are timestamped from 0 - 9. - for (int i = 0; i < 10; i++) { - timerInternals.setTimer( - nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME); - } - - // fire the first 2 timers. - // timers in memory now are timestamped from 2 - 4; - // timers in store now are timestamped from 2 - 9. - Collection> readyTimers; - timerInternalsFactory.setInputWatermark(new Instant(1)); - long lastTimestamp = 0; - readyTimers = timerInternalsFactory.removeReadyTimers(); - for (KeyedTimerData keyedTimerData : readyTimers) { - final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis(); - assertTrue(lastTimestamp <= currentTimeStamp); - lastTimestamp = currentTimeStamp; - } - assertEquals(2, readyTimers.size()); - - // add 3 timers but timer 2 has duplicate so drop. - // timers in memory now are timestamped from 0 to 2 prefixed with lateTimer, and 2 to - // 4 prefixed with timer, timestamp is in order; - // timers in store now are timestamped from 0 to 2 prefixed with lateTimer, and 2 to 9 - // prefixed with timer, timestamp is in order; - for (int i = 0; i < 3; i++) { - timerInternals.setTimer( - nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME); - } - - // there are 11 timers in state now. - // watermark 5 comes, so 6 timers will be evicted because their timestamp is less than 5. - // memory will be reloaded once to have 5 to 8 left (reload to have 4 to 8, but 4 is evicted), 5 - // to 9 left in store. - // all of them are in order for firing. - timerInternalsFactory.setInputWatermark(new Instant(5)); - lastTimestamp = 0; - readyTimers = timerInternalsFactory.removeReadyTimers(); - for (KeyedTimerData keyedTimerData : readyTimers) { - final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis(); - assertTrue(lastTimestamp <= currentTimeStamp); - lastTimestamp = currentTimeStamp; - } - assertEquals(6, readyTimers.size()); - assertEquals(4, timerInternalsFactory.getEventTimeBuffer().size()); - - // watermark 10 comes, so all timers will be evicted in order. - timerInternalsFactory.setInputWatermark(new Instant(10)); - readyTimers = timerInternalsFactory.removeReadyTimers(); - for (KeyedTimerData keyedTimerData : readyTimers) { - final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis(); - assertTrue(lastTimestamp <= currentTimeStamp); - lastTimestamp = currentTimeStamp; - } - assertEquals(4, readyTimers.size()); - assertEquals(0, timerInternalsFactory.getEventTimeBuffer().size()); - - store.close(); - } - - @Test - public void testBufferRefilledAfterRestoreToNonFullState() { - final SamzaPipelineOptions pipelineOptions = - PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - pipelineOptions.setEventTimerBufferSize(5); - - final KeyValueStore> store = createStore(); - final SamzaTimerInternalsFactory timerInternalsFactory = - createTimerInternalsFactory(null, "timer", pipelineOptions, store); - - final StateNamespace nameSpace = StateNamespaces.global(); - final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey"); - - // prepare (buffer capacity + 1) 6 timers. - // timers in memory now are timestamped from 0 - 4; - // timer in store now is timestamped 6. - for (int i = 0; i < 6; i++) { - timerInternals.setTimer( - nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME); - } - - // total number of event time timers to fire equals to the number of timers in store - Collection> readyTimers; - timerInternalsFactory.setInputWatermark(new Instant(4)); - readyTimers = timerInternalsFactory.removeReadyTimers(); - assertEquals(5, readyTimers.size()); - // reloaded timer5 - assertEquals(1, timerInternalsFactory.getEventTimeBuffer().size()); - - for (int i = 6; i < 13; i++) { - timerInternals.setTimer( - nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME); - } - // timers should go into buffer not state - assertEquals(5, timerInternalsFactory.getEventTimeBuffer().size()); - - // watermark 10 comes,6 timers will be evicted in order and 2 still in buffer. - timerInternalsFactory.setInputWatermark(new Instant(10)); - readyTimers = timerInternalsFactory.removeReadyTimers(); - long lastTimestamp = 0; - for (KeyedTimerData keyedTimerData : readyTimers) { - final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis(); - assertTrue(lastTimestamp <= currentTimeStamp); - lastTimestamp = currentTimeStamp; - } - assertEquals(6, readyTimers.size()); - assertEquals(2, timerInternalsFactory.getEventTimeBuffer().size()); - - store.close(); - } - - @Test - public void testByteArray() { - ByteArray key1 = ByteArray.of("hello world".getBytes(StandardCharsets.UTF_8)); - Serde serde = new ByteArraySerdeFactory().getSerde("", null); - byte[] keyBytes = serde.toBytes(key1); - ByteArray key2 = serde.fromBytes(keyBytes); - assertEquals(key1, key2); - - Map map = new HashMap<>(); - map.put(key1, "found it"); - assertEquals("found it", map.get(key2)); - - map.remove(key1); - assertTrue(!map.containsKey(key2)); - assertTrue(map.isEmpty()); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SdkHarnessDoFnRunnerTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SdkHarnessDoFnRunnerTest.java deleted file mode 100644 index e6029beb93b0..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/SdkHarnessDoFnRunnerTest.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.runtime; - -import java.util.concurrent.TimeoutException; -import org.junit.Test; - -public class SdkHarnessDoFnRunnerTest { - - @Test(expected = TimeoutException.class) - public void testRunWithTimeoutOccurred() throws Exception { - SamzaDoFnRunners.SdkHarnessDoFnRunner.runWithTimeout( - 100, - () -> { - try { - Thread.sleep(500); - } catch (InterruptedException ignored) { - } - }); - } - - @Test - public void testRunWithTimeoutDisabled() throws Exception { - SamzaDoFnRunners.SdkHarnessDoFnRunner.runWithTimeout( - -1, - () -> { - try { - Thread.sleep(500); - } catch (InterruptedException ignored) { - } - }); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/ConfigGeneratorTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/ConfigGeneratorTest.java deleted file mode 100644 index 9fbc515979b0..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/ConfigGeneratorTest.java +++ /dev/null @@ -1,461 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import org.apache.beam.runners.samza.SamzaExecutionEnvironment; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.SamzaRunner; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.state.StateSpec; -import org.apache.beam.sdk.state.StateSpecs; -import org.apache.beam.sdk.state.ValueState; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.Impulse; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps; -import org.apache.samza.config.Config; -import org.apache.samza.config.JobConfig; -import org.apache.samza.config.JobCoordinatorConfig; -import org.apache.samza.config.TaskConfig; -import org.apache.samza.config.ZkConfig; -import org.apache.samza.job.yarn.YarnJobFactory; -import org.apache.samza.runtime.LocalApplicationRunner; -import org.apache.samza.runtime.RemoteApplicationRunner; -import org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory; -import org.apache.samza.storage.kv.inmemory.InMemoryKeyValueStorageEngineFactory; -import org.apache.samza.zk.ZkJobCoordinatorFactory; -import org.junit.Test; - -/** Test config generations for {@link org.apache.beam.runners.samza.SamzaRunner}. */ -// TODO(https://github.com/apache/beam/issues/21230): Remove when new version of errorprone is -// released (2.11.0) -@SuppressWarnings("unused") -public class ConfigGeneratorTest { - private static final String APP_RUNNER_CLASS = "app.runner.class"; - private static final String JOB_FACTORY_CLASS = "job.factory.class"; - - @Test - public void testStatefulBeamStoreConfig() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestStoreConfig"); - options.setRunner(SamzaRunner.class); - - Pipeline pipeline = Pipeline.create(options); - pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); - - pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config = configBuilder.build(); - - assertEquals( - RocksDbKeyValueStorageEngineFactory.class.getName(), - config.get("stores.beamStore.factory")); - assertEquals("byteArraySerde", config.get("stores.beamStore.key.serde")); - assertEquals("stateValueSerde", config.get("stores.beamStore.msg.serde")); - assertNull(config.get("stores.beamStore.changelog")); - - options.setStateDurable(true); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config2 = configBuilder.build(); - assertEquals( - "TestStoreConfig-1-beamStore-changelog", config2.get("stores.beamStore.changelog")); - } - - @Test - public void testStatelessBeamStoreConfig() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestStoreConfig"); - options.setRunner(SamzaRunner.class); - - Pipeline pipeline = Pipeline.create(options); - pipeline.apply(Impulse.create()).apply(Filter.by(Objects::nonNull)); - - pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config = configBuilder.build(); - - assertEquals( - InMemoryKeyValueStorageEngineFactory.class.getName(), - config.get("stores.beamStore.factory")); - assertEquals("byteArraySerde", config.get("stores.beamStore.key.serde")); - assertEquals("stateValueSerde", config.get("stores.beamStore.msg.serde")); - assertNull(config.get("stores.beamStore.changelog")); - - options.setStateDurable(true); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config2 = configBuilder.build(); - // For stateless jobs, ignore state durable pipeline option. - assertNull(config2.get("stores.beamStore.changelog")); - } - - @Test - public void testSamzaLocalExecutionEnvironmentConfig() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestEnvConfig"); - options.setRunner(SamzaRunner.class); - options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.LOCAL); - - Pipeline pipeline = Pipeline.create(options); - pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); - - pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config = configBuilder.build(); - - assertTrue( - Maps.difference(config, ConfigBuilder.localRunConfig()).entriesOnlyOnRight().isEmpty()); - } - - @Test - public void testSamzaYarnExecutionEnvironmentConfig() { - final String yarnPackagePath = "yarn.package.path"; - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestEnvConfig"); - options.setRunner(SamzaRunner.class); - options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.YARN); - options.setConfigOverride( - ImmutableMap.builder() - .put( - yarnPackagePath, - "file://${basedir}/target/${project.artifactId}-${pom.version}-dist.tar.gz") - .build()); - - Pipeline pipeline = Pipeline.create(options); - pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); - - pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - try { - Config config = configBuilder.build(); - assertEquals(config.get(APP_RUNNER_CLASS), RemoteApplicationRunner.class.getName()); - assertEquals(config.get(JOB_FACTORY_CLASS), YarnJobFactory.class.getName()); - } catch (IllegalArgumentException e) { - throw new AssertionError( - String.format( - "Failed to validate correct configs for %s samza execution environment", - SamzaExecutionEnvironment.YARN), - e); - } - } - - @Test - public void testSamzaStandAloneExecutionEnvironmentConfig() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestEnvConfig"); - options.setRunner(SamzaRunner.class); - options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.STANDALONE); - options.setConfigOverride( - ImmutableMap.builder().put(ZkConfig.ZK_CONNECT, "localhost:2181").build()); - - Pipeline pipeline = Pipeline.create(options); - pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); - - pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - try { - Config config = configBuilder.build(); - assertEquals(config.get(APP_RUNNER_CLASS), LocalApplicationRunner.class.getName()); - assertEquals( - config.get(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY), - ZkJobCoordinatorFactory.class.getName()); - } catch (IllegalArgumentException e) { - throw new AssertionError( - String.format( - "Failed to validate correct configs for %s samza execution environment", - SamzaExecutionEnvironment.STANDALONE), - e); - } - } - - @Test - public void testUserStoreConfig() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestStoreConfig"); - options.setRunner(SamzaRunner.class); - - Pipeline pipeline = Pipeline.create(options); - pipeline - .apply( - Create.empty(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings()))) - .apply( - ParDo.of( - new DoFn, Void>() { - private static final String testState = "testState"; - - @StateId(testState) - private final StateSpec> state = StateSpecs.value(); - - @ProcessElement - public void processElement( - ProcessContext context, @StateId(testState) ValueState state) {} - })); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - final ConfigBuilder configBuilder = new ConfigBuilder(options); - - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config = configBuilder.build(); - - assertEquals( - RocksDbKeyValueStorageEngineFactory.class.getName(), - config.get("stores.testState.factory")); - assertEquals("byteArraySerde", config.get("stores.testState.key.serde")); - assertEquals("stateValueSerde", config.get("stores.testState.msg.serde")); - assertNull(config.get("stores.testState.changelog")); - - options.setStateDurable(true); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config2 = configBuilder.build(); - assertEquals( - "TestStoreConfig-1-testState-changelog", config2.get("stores.testState.changelog")); - } - - @Test - public void testUserStoreConfigSameStateIdAcrossParDo() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestStoreConfig"); - options.setRunner(SamzaRunner.class); - - Pipeline pipeline = Pipeline.create(options); - pipeline - .apply( - Create.empty(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings()))) - .apply( - "First stateful ParDo", - ParDo.of( - new DoFn, KV>() { - private static final String testState = "testState"; - - @StateId(testState) - private final StateSpec> state = StateSpecs.value(); - - @ProcessElement - public void processElement( - ProcessContext context, @StateId(testState) ValueState state) { - context.output(context.element()); - } - })) - .apply( - "Second stateful ParDo", - ParDo.of( - new DoFn, Void>() { - private static final String testState = "testState"; - - @StateId(testState) - private final StateSpec> state = StateSpecs.value(); - - @ProcessElement - public void processElement( - ProcessContext context, @StateId(testState) ValueState state) {} - })); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config = configBuilder.build(); - - assertEquals( - RocksDbKeyValueStorageEngineFactory.class.getName(), - config.get("stores.testState-First_stateful_ParDo.factory")); - assertEquals("byteArraySerde", config.get("stores.testState-First_stateful_ParDo.key.serde")); - assertEquals("stateValueSerde", config.get("stores.testState-First_stateful_ParDo.msg.serde")); - assertNull(config.get("stores.testState-First_stateful_ParDo.changelog")); - - assertEquals( - RocksDbKeyValueStorageEngineFactory.class.getName(), - config.get("stores.testState-Second_stateful_ParDo.factory")); - assertEquals("byteArraySerde", config.get("stores.testState-Second_stateful_ParDo.key.serde")); - assertEquals("stateValueSerde", config.get("stores.testState-Second_stateful_ParDo.msg.serde")); - assertNull(config.get("stores.testState-Second_stateful_ParDo.changelog")); - - options.setStateDurable(true); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config2 = configBuilder.build(); - assertEquals( - "TestStoreConfig-1-testState-First_stateful_ParDo-changelog", - config2.get("stores.testState-First_stateful_ParDo.changelog")); - assertEquals( - "TestStoreConfig-1-testState-Second_stateful_ParDo-changelog", - config2.get("stores.testState-Second_stateful_ParDo.changelog")); - } - - @Test - public void testUserStoreConfigSameStateIdAndPTransformName() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestStoreConfig"); - options.setRunner(SamzaRunner.class); - - Pipeline pipeline = Pipeline.create(options); - pipeline - .apply( - Create.empty(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings()))) - .apply( - "Same stateful ParDo Name", - ParDo.of( - new DoFn, KV>() { - private static final String testState = "testState"; - - @StateId(testState) - private final StateSpec> state = StateSpecs.value(); - - @ProcessElement - public void processElement( - ProcessContext context, @StateId(testState) ValueState state) { - context.output(context.element()); - } - })) - .apply( - "Same stateful ParDo Name", - ParDo.of( - new DoFn, Void>() { - private static final String testState = "testState"; - - @StateId(testState) - private final StateSpec> state = StateSpecs.value(); - - @ProcessElement - public void processElement( - ProcessContext context, @StateId(testState) ValueState state) {} - })); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config = configBuilder.build(); - - assertEquals( - RocksDbKeyValueStorageEngineFactory.class.getName(), - config.get("stores.testState-Same_stateful_ParDo_Name.factory")); - assertEquals( - "byteArraySerde", config.get("stores.testState-Same_stateful_ParDo_Name.key.serde")); - assertEquals( - "stateValueSerde", config.get("stores.testState-Same_stateful_ParDo_Name.msg.serde")); - assertNull(config.get("stores.testState-Same_stateful_ParDo_Name.changelog")); - - assertEquals( - RocksDbKeyValueStorageEngineFactory.class.getName(), - config.get("stores.testState-Same_stateful_ParDo_Name2.factory")); - assertEquals( - "byteArraySerde", config.get("stores.testState-Same_stateful_ParDo_Name2.key.serde")); - assertEquals( - "stateValueSerde", config.get("stores.testState-Same_stateful_ParDo_Name2.msg.serde")); - assertNull(config.get("stores.testState-Same_stateful_ParDo_Name2.changelog")); - - options.setStateDurable(true); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Config config2 = configBuilder.build(); - assertEquals( - "TestStoreConfig-1-testState-Same_stateful_ParDo_Name-changelog", - config2.get("stores.testState-Same_stateful_ParDo_Name.changelog")); - assertEquals( - "TestStoreConfig-1-testState-Same_stateful_ParDo_Name2-changelog", - config2.get("stores.testState-Same_stateful_ParDo_Name2.changelog")); - } - - @Test - public void testCreateBundleConfig() { - // autosizing = 0: disabled - // autosizing = 1: enabled - for (int autosizing = 0; autosizing < 2; autosizing++) { - final SamzaPipelineOptions options = PipelineOptionsFactory.as(SamzaPipelineOptions.class); - final Map config = new HashMap<>(); - - // bundle size = 1 - options.setMaxBundleSize(1); - config.put(JobConfig.JOB_CONTAINER_THREAD_POOL_SIZE, "5"); - if (autosizing != 0) { - // Test autosizing enabled, the output should be the same - config.put(JobConfig.JOB_AUTOSIZING_CONTAINER_THREAD_POOL_SIZE, "5"); - } - - Map bundleConfig = ConfigBuilder.createBundleConfig(options, config); - - assertEquals("1", bundleConfig.get(TaskConfig.MAX_CONCURRENCY)); - assertNull(bundleConfig.get(JobConfig.JOB_CONTAINER_THREAD_POOL_SIZE)); - assertNull(bundleConfig.get(JobConfig.JOB_AUTOSIZING_CONTAINER_THREAD_POOL_SIZE)); - assertEquals(1, options.getNumThreadsForProcessElement()); - - // bundle size = 3, NumThreadsForProcessElement = 10 - options.setMaxBundleSize(3); - options.setNumThreadsForProcessElement(10); - bundleConfig = ConfigBuilder.createBundleConfig(options, config); - - assertEquals("3", bundleConfig.get(TaskConfig.MAX_CONCURRENCY)); - assertEquals("0", bundleConfig.get(JobConfig.JOB_CONTAINER_THREAD_POOL_SIZE)); - assertEquals("0", bundleConfig.get(JobConfig.JOB_AUTOSIZING_CONTAINER_THREAD_POOL_SIZE)); - assertEquals(10, options.getNumThreadsForProcessElement()); - - // bundle size = 3, NumThreadsForProcessElement = 1 (default), threadPoolSize = 5 - options.setNumThreadsForProcessElement(1); - bundleConfig = ConfigBuilder.createBundleConfig(options, config); - - assertEquals("3", bundleConfig.get(TaskConfig.MAX_CONCURRENCY)); - assertEquals("0", bundleConfig.get(JobConfig.JOB_CONTAINER_THREAD_POOL_SIZE)); - assertEquals("0", bundleConfig.get(JobConfig.JOB_AUTOSIZING_CONTAINER_THREAD_POOL_SIZE)); - assertEquals(5, options.getNumThreadsForProcessElement()); - } - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/SamzaImpulseSystemTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/SamzaImpulseSystemTest.java deleted file mode 100644 index 9cc0cc10894a..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/SamzaImpulseSystemTest.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.samza.Partition; -import org.apache.samza.config.MapConfig; -import org.apache.samza.system.IncomingMessageEnvelope; -import org.apache.samza.system.SystemConsumer; -import org.apache.samza.system.SystemStreamPartition; -import org.apache.samza.system.WatermarkMessage; -import org.junit.Assert; -import org.junit.Test; - -/** - * Tests for {@link - * org.apache.beam.runners.samza.translation.SamzaImpulseSystemFactory.SamzaImpulseSystemConsumer}. - */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) -}) -public class SamzaImpulseSystemTest { - @Test - public void testSamzaImpulseSystemConsumer() throws Exception { - SystemConsumer consumer = - new SamzaImpulseSystemFactory().getConsumer("default-system", new MapConfig(), null); - Map> result = - consumer.poll(Collections.singleton(sspForPartition(0)), 100); - Assert.assertEquals(1, result.size()); - Assert.assertTrue(result.containsKey(sspForPartition(0))); - - List messageEnvelopes = result.get(sspForPartition(0)); - Assert.assertEquals(3, messageEnvelopes.size()); - - Assert.assertTrue(messageEnvelopes.get(0).getMessage() instanceof OpMessage); - OpMessage impulseEvent = (OpMessage) messageEnvelopes.get(0).getMessage(); - Assert.assertEquals(OpMessage.Type.ELEMENT, impulseEvent.getType()); - - Assert.assertTrue(messageEnvelopes.get(1).getMessage() instanceof WatermarkMessage); - - Assert.assertTrue(messageEnvelopes.get(2).isEndOfStream()); - } - - private SystemStreamPartition sspForPartition(int i) { - return new SystemStreamPartition("default-system", "default-stream", new Partition(i)); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/TranslationContextTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/TranslationContextTest.java deleted file mode 100644 index bb357dd6aced..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/translation/TranslationContextTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.translation; - -import static org.junit.Assert.assertNotNull; -import static org.mockito.Mockito.mock; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.runtime.OpMessage; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PValue; -import org.apache.samza.application.descriptors.StreamApplicationDescriptor; -import org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl; -import org.apache.samza.config.Config; -import org.apache.samza.config.MapConfig; -import org.apache.samza.operators.KV; -import org.apache.samza.operators.MessageStream; -import org.apache.samza.operators.functions.MapFunction; -import org.apache.samza.serializers.KVSerde; -import org.apache.samza.serializers.NoOpSerde; -import org.apache.samza.serializers.Serde; -import org.apache.samza.system.descriptors.GenericInputDescriptor; -import org.apache.samza.system.descriptors.GenericSystemDescriptor; -import org.junit.Test; - -@SuppressWarnings({"rawtypes"}) -public class TranslationContextTest { - private final GenericInputDescriptor testInputDescriptor = - new GenericSystemDescriptor("mockSystem", "mockFactoryClassName") - .getInputDescriptor("test-input-1", mock(Serde.class)); - MapFunction keyFn = m -> m.toString(); - MapFunction valueFn = m -> m; - private final String streamName = "testStream"; - KVSerde serde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); - StreamApplicationDescriptor streamApplicationDescriptor = - new StreamApplicationDescriptorImpl( - appDesc -> { - MessageStream inputStream = appDesc.getInputStream(testInputDescriptor); - inputStream.partitionBy(keyFn, valueFn, serde, streamName); - }, - getConfig()); - Map idMap = new HashMap<>(); - Set nonUniqueStateIds = new HashSet<>(); - TranslationContext translationContext = - new TranslationContext( - streamApplicationDescriptor, idMap, nonUniqueStateIds, mock(SamzaPipelineOptions.class)); - - @Test - public void testRegisterInputMessageStreams() { - final PCollection output = mock(PCollection.class); - List topics = Arrays.asList("stream1", "stream2"); - List inputDescriptors = - topics.stream() - .map(topicName -> createSamzaInputDescriptor(topicName, topicName)) - .collect(Collectors.toList()); - - translationContext.registerInputMessageStreams(output, inputDescriptors); - - assertNotNull(translationContext.getMessageStream(output)); - } - - public GenericInputDescriptor>> createSamzaInputDescriptor( - String systemName, String streamId) { - final Serde>> kvSerde = - KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); - return new GenericSystemDescriptor(systemName, "factoryClass") - .getInputDescriptor(streamId, kvSerde); - } - - private static Config getConfig() { - HashMap configMap = new HashMap<>(); - configMap.put("job.name", "testJobName"); - configMap.put("job.id", "testJobId"); - return new MapConfig(configMap); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/DoFnUtilsTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/util/DoFnUtilsTest.java deleted file mode 100644 index cef1a463ff96..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/DoFnUtilsTest.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import static org.junit.Assert.assertEquals; - -import java.io.Serializable; -import java.util.Objects; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.GroupByKey; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.util.construction.PipelineTranslation; -import org.apache.beam.sdk.util.construction.graph.ExecutableStage; -import org.apache.beam.sdk.util.construction.graph.GreedyPipelineFuser; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.junit.Test; - -public class DoFnUtilsTest implements Serializable { - private final Pipeline pipeline = Pipeline.create(); - - @Test - public void testExecutableStageWithoutOutput() { - pipeline.apply(Create.of(KV.of(1L, "1"))); - - assertEquals("[Create.Values-]", DoFnUtils.toStepName(getOnlyExecutableStage(pipeline))); - } - - @Test - public void testExecutableStageWithCustomizedName() { - pipeline.apply("MyCreateOf", Create.of(KV.of(1L, "1"))); - assertEquals("[MyCreateOf-]", DoFnUtils.toStepName(getOnlyExecutableStage(pipeline))); - } - - @Test - public void testExecutableStageWithOutput() { - pipeline - .apply("MyCreateOf", Create.of(KV.of(1L, "1"))) - .apply("MyFilterBy", Filter.by(Objects::nonNull)) - .apply(GroupByKey.create()); - - assertEquals("[MyCreateOf-MyFilterBy]", DoFnUtils.toStepName(getOnlyExecutableStage(pipeline))); - } - - @Test - public void testExecutableStageWithPDone() { - pipeline - .apply("MyCreateOf", Create.of("1")) - .apply( - "PDoneTransform", - new PTransform, PDone>() { - @Override - public PDone expand(PCollection input) { - return PDone.in(pipeline); - } - }); - - assertEquals("[MyCreateOf-]", DoFnUtils.toStepName(getOnlyExecutableStage(pipeline))); - } - - private static ExecutableStage getOnlyExecutableStage(Pipeline p) { - return Iterables.getOnlyElement( - GreedyPipelineFuser.fuse(PipelineTranslation.toProto(p)).getFusedStages()); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/FutureUtilsTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/util/FutureUtilsTest.java deleted file mode 100644 index c261fc623af3..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/FutureUtilsTest.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.util.Collection; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; -import java.util.concurrent.CountDownLatch; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists; -import org.junit.Assert; -import org.junit.Test; - -/** Unit tests for {@linkplain FutureUtils}. */ -public final class FutureUtilsTest { - private static final List RESULTS = ImmutableList.of("hello", "world"); - - @Test - public void testFlattenFuturesForCollection() { - CompletionStage> resultFuture = - FutureUtils.flattenFutures( - ImmutableList.of( - CompletableFuture.completedFuture("hello"), - CompletableFuture.completedFuture("world"))); - - CompletionStage validationFuture = - resultFuture.thenAccept( - actualResults -> { - Assert.assertEquals( - "Expected flattened results to contain {hello, world}", - RESULTS, - Lists.newArrayList(actualResults)); - }); - - validationFuture.toCompletableFuture().join(); - } - - @Test - public void testFlattenFuturesForFailedFuture() { - CompletionStage> resultFuture = - FutureUtils.flattenFutures( - ImmutableList.of( - CompletableFuture.completedFuture("hello"), - createFailedFuture(new RuntimeException()))); - - CompletionStage validationFuture = - resultFuture.handle( - (results, ex) -> { - Assert.assertTrue( - "Expected exception to be of RuntimeException", ex instanceof RuntimeException); - return null; - }); - - validationFuture.toCompletableFuture().join(); - } - - @Test - public void testWaitForAllFutures() { - CountDownLatch latch = new CountDownLatch(1); - CompletionStage> resultFuture = - FutureUtils.flattenFutures( - ImmutableList.of( - CompletableFuture.supplyAsync( - () -> { - try { - latch.await(); - } catch (InterruptedException e) { - return ""; - } - - return "hello"; - }), - CompletableFuture.supplyAsync( - () -> { - latch.countDown(); - return "world"; - }))); - - CompletionStage validationFuture = - resultFuture.thenAccept( - actualResults -> { - Assert.assertEquals( - "Expected flattened results to contain {hello, world}", - RESULTS, - Lists.newArrayList(actualResults)); - }); - - validationFuture.toCompletableFuture().join(); - } - - private static CompletionStage createFailedFuture(Throwable t) { - CompletableFuture future = new CompletableFuture<>(); - future.completeExceptionally(t); - return future; - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/InMemoryMetricsReporter.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/util/InMemoryMetricsReporter.java deleted file mode 100644 index fed43430a440..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/InMemoryMetricsReporter.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import java.util.HashMap; -import java.util.Map; -import org.apache.samza.metrics.MetricsReporter; -import org.apache.samza.metrics.ReadableMetricsRegistry; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** An in-memory {@link MetricsReporter} for testing. */ -public class InMemoryMetricsReporter implements MetricsReporter { - private Map registries; - - public InMemoryMetricsReporter() { - registries = new HashMap<>(); - } - - @Override - public void start() {} - - @Override - public void register(String source, ReadableMetricsRegistry registry) { - registries.put(source, registry); - } - - @Override - public void stop() {} - - public @Nullable ReadableMetricsRegistry getMetricsRegistry(@NonNull String source) { - return registries.get(source); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/PipelineJsonRendererTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/util/PipelineJsonRendererTest.java deleted file mode 100644 index 0a4f532808b1..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/PipelineJsonRendererTest.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import static org.junit.Assert.assertEquals; - -import com.google.gson.JsonParser; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import org.apache.beam.runners.samza.SamzaExecutionEnvironment; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.apache.beam.runners.samza.SamzaRunner; -import org.apache.beam.runners.samza.translation.ConfigBuilder; -import org.apache.beam.runners.samza.translation.ConfigContext; -import org.apache.beam.runners.samza.translation.PViewToIdMapper; -import org.apache.beam.runners.samza.translation.SamzaPipelineTranslator; -import org.apache.beam.runners.samza.translation.SamzaTransformOverrides; -import org.apache.beam.runners.samza.translation.StateIdParser; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.Impulse; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.TimestampedValue; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Test; - -/** Tests for {@link org.apache.beam.runners.samza.util.PipelineJsonRenderer}. */ -public class PipelineJsonRendererTest { - - @Test - public void testEmptyPipeline() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setRunner(SamzaRunner.class); - - Pipeline p = Pipeline.create(options); - final Map idMap = PViewToIdMapper.buildIdMap(p); - final Set nonUniqueStateIds = StateIdParser.scan(p); - final ConfigContext ctx = new ConfigContext(idMap, nonUniqueStateIds, options); - - String jsonDag = - "{ \"RootNode\": [" - + " { \"fullName\":\"OuterMostNode\"," - + " \"ChildNodes\":[ ]}],\"graphLinks\": [],\"transformIOInfo\": []" - + "}"; - - assertEquals( - JsonParser.parseString(jsonDag), - JsonParser.parseString( - PipelineJsonRenderer.toJsonString(p, ctx).replaceAll(System.lineSeparator(), ""))); - } - - @Test - public void testCompositePipeline() throws IOException { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setRunner(SamzaRunner.class); - options.setJobName("TestEnvConfig"); - options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.LOCAL); - - Pipeline p = Pipeline.create(options); - - p.apply( - Create.timestamped( - TimestampedValue.of(KV.of(1, 1), new Instant(1)), - TimestampedValue.of(KV.of(2, 2), new Instant(2)))) - .apply(Window.into(FixedWindows.of(Duration.millis(10)))) - .apply(Sum.integersPerKey()); - - p.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); - - final Map idMap = PViewToIdMapper.buildIdMap(p); - final Set nonUniqueStateIds = StateIdParser.scan(p); - final ConfigContext ctx = new ConfigContext(idMap, nonUniqueStateIds, options); - - String jsonDagFileName = "src/test/resources/ExpectedDag.json"; - String jsonDag = - new String(Files.readAllBytes(Paths.get(jsonDagFileName)), StandardCharsets.UTF_8); - String renderedDag = PipelineJsonRenderer.toJsonString(p, ctx); - - assertEquals( - JsonParser.parseString(jsonDag), - JsonParser.parseString(renderedDag.replaceAll(System.lineSeparator(), ""))); - } - - @Test - public void testBeamTransformIOConfigGen() { - SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); - options.setJobName("TestEnvConfig"); - options.setRunner(SamzaRunner.class); - options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.LOCAL); - - Pipeline pipeline = Pipeline.create(options); - pipeline.apply(Impulse.create()).apply(Filter.by(Objects::nonNull)); - pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); - - final Map idMap = PViewToIdMapper.buildIdMap(pipeline); - final Set nonUniqueStateIds = StateIdParser.scan(pipeline); - final ConfigContext configCtx = new ConfigContext(idMap, nonUniqueStateIds, options); - - final ConfigBuilder configBuilder = new ConfigBuilder(options); - SamzaPipelineTranslator.createConfig(pipeline, configCtx, configBuilder); - final Map> transformInputOutput = - PipelineJsonRenderer.buildTransformIOMap(pipeline, configCtx); - - assertEquals(2, transformInputOutput.size()); - assertEquals("", transformInputOutput.get("Impulse").getKey()); // no input to impulse - assertEquals( - "Impulse.out", - transformInputOutput.get("Impulse").getValue()); // PValue for to Impulse.output - - // Input to Filter is PValue Output from Impulse - assertEquals( - "Impulse.out", - transformInputOutput.get("Filter/ParDo(Anonymous)/ParMultiDo(Anonymous)").getKey()); - // output PValue of filter - assertEquals( - "Filter/ParDo(Anonymous)/ParMultiDo(Anonymous).output", - transformInputOutput.get("Filter/ParDo(Anonymous)/ParMultiDo(Anonymous)").getValue()); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/PortableConfigUtilsTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/util/PortableConfigUtilsTest.java deleted file mode 100644 index 14ab5b1af5fb..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/PortableConfigUtilsTest.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; - -import java.util.HashMap; -import java.util.Map; -import org.apache.beam.runners.samza.SamzaPipelineOptions; -import org.junit.Assert; -import org.junit.Test; - -public class PortableConfigUtilsTest { - - @Test - public void testNonPortableMode() { - SamzaPipelineOptions mockOptions = mock(SamzaPipelineOptions.class); - Map config = new HashMap<>(); - config.put(PortableConfigUtils.BEAM_PORTABLE_MODE, "false"); - doReturn(config).when(mockOptions).getConfigOverride(); - Assert.assertFalse( - "Expected false for portable mode ", PortableConfigUtils.isPortable(mockOptions)); - } - - @Test - public void testNonPortableModeNullConfig() { - SamzaPipelineOptions mockOptions = mock(SamzaPipelineOptions.class); - doReturn(null).when(mockOptions).getConfigOverride(); - Assert.assertFalse( - "Expected false for portable mode ", PortableConfigUtils.isPortable(mockOptions)); - } - - @Test - public void testPortableMode() { - SamzaPipelineOptions mockOptions = mock(SamzaPipelineOptions.class); - Map config = new HashMap<>(); - config.put(PortableConfigUtils.BEAM_PORTABLE_MODE, "true"); - doReturn(config).when(mockOptions).getConfigOverride(); - Assert.assertTrue( - "Expected true for portable runner", PortableConfigUtils.isPortable(mockOptions)); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/TestHashIdGenerator.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/util/TestHashIdGenerator.java deleted file mode 100644 index cf765e3db221..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/TestHashIdGenerator.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import static org.mockito.Mockito.mock; - -import java.util.Set; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.Max; -import org.apache.beam.sdk.transforms.Min; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; -import org.junit.Assert; -import org.junit.Test; - -/** Test class for {@link HashIdGenerator}. */ -public class TestHashIdGenerator { - - @Test - public void testGetId() { - final HashIdGenerator idGenerator = new HashIdGenerator(); - final Set ids = - ImmutableSet.of( - idGenerator.getId(Count.perKey().getName()), - idGenerator.getId(MapElements.into(null).getName()), - idGenerator.getId(Count.globally().getName()), - idGenerator.getId(Combine.perKey(mock(SerializableFunction.class)).getName()), - idGenerator.getId(Min.perKey().getName()), - idGenerator.getId(Max.globally().getName())); - Assert.assertEquals(6, ids.size()); - } - - @Test - public void testGetShortId() { - final HashIdGenerator idGenerator = new HashIdGenerator(); - String id = idGenerator.getId("abcd"); - Assert.assertEquals("abcd", id); - } - - @Test - public void testSameNames() { - final HashIdGenerator idGenerator = new HashIdGenerator(); - String id1 = idGenerator.getId(Count.perKey().getName()); - String id2 = idGenerator.getId(Count.perKey().getName()); - Assert.assertNotEquals(id1, id2); - } - - @Test - public void testSameShortNames() { - final HashIdGenerator idGenerator = new HashIdGenerator(); - String id = idGenerator.getId("abcd"); - Assert.assertEquals("abcd", id); - String id2 = idGenerator.getId("abcd"); - Assert.assertNotEquals("abcd", id2); - } - - @Test - public void testLongHash() { - final HashIdGenerator idGenerator = new HashIdGenerator(10); - String id1 = idGenerator.getId(Count.perKey().getName()); - String id2 = idGenerator.getId(Count.perKey().getName()); - String id3 = idGenerator.getId(Count.perKey().getName()); - String id4 = idGenerator.getId(Count.perKey().getName()); - Assert.assertNotEquals(id1, id2); - Assert.assertNotEquals(id3, id2); - Assert.assertNotEquals(id3, id4); - } -} diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/WindowUtilsTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/util/WindowUtilsTest.java deleted file mode 100644 index 9c614175e5de..000000000000 --- a/runners/samza/src/test/java/org/apache/beam/runners/samza/util/WindowUtilsTest.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.samza.util; - -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.VarLongCoder; -import org.apache.beam.sdk.coders.VoidCoder; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.TimestampCombiner; -import org.apache.beam.sdk.util.construction.Environments; -import org.apache.beam.sdk.util.construction.SdkComponents; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.joda.time.Duration; -import org.junit.Test; - -/** Unit tests for {@link WindowUtils}. */ -public class WindowUtilsTest { - - @Test - public void testGetWindowStrategy() throws IOException { - SdkComponents components = SdkComponents.create(); - String environmentId = - components.registerEnvironment(Environments.createDockerEnvironment("java")); - WindowingStrategy expected = - WindowingStrategy.of(FixedWindows.of(Duration.standardMinutes(1))) - .withMode(WindowingStrategy.AccumulationMode.DISCARDING_FIRED_PANES) - .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW) - .withAllowedLateness(Duration.ZERO) - .withEnvironmentId(environmentId); - components.registerWindowingStrategy(expected); - String collectionId = - components.registerPCollection( - PCollection.createPrimitiveOutputInternal( - Pipeline.create(), expected, PCollection.IsBounded.BOUNDED, VoidCoder.of()) - .setName("name")); - - WindowingStrategy actual = - WindowUtils.getWindowStrategy(collectionId, components.toComponents()); - - assertEquals(expected, actual); - } - - @Test - public void testInstantiateWindowedCoder() throws IOException { - Coder> expectedValueCoder = - KvCoder.of(VarLongCoder.of(), StringUtf8Coder.of()); - SdkComponents components = SdkComponents.create(); - components.registerEnvironment(Environments.createDockerEnvironment("java")); - String collectionId = - components.registerPCollection( - PCollection.createPrimitiveOutputInternal( - Pipeline.create(), - WindowingStrategy.globalDefault(), - PCollection.IsBounded.BOUNDED, - expectedValueCoder) - .setName("name")); - - assertEquals( - expectedValueCoder, - WindowUtils.instantiateWindowedCoder(collectionId, components.toComponents()) - .getValueCoder()); - } -} diff --git a/runners/samza/src/test/resources/ExpectedDag.json b/runners/samza/src/test/resources/ExpectedDag.json deleted file mode 100644 index c61b80134d8a..000000000000 --- a/runners/samza/src/test/resources/ExpectedDag.json +++ /dev/null @@ -1,373 +0,0 @@ -{ - "RootNode": [ - { - "fullName": "OuterMostNode", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues", - "enclosingNode": "OuterMostNode", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values", - "enclosingNode": "Create.TimestampedValues", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)", - "enclosingNode": "Create.TimestampedValues/Create.Values", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/Impulse", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)" - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(OutputSingleSource)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(OutputSingleSource)/ParMultiDo(OutputSingleSource)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(OutputSingleSource)" - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Pair with initial restriction", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Pair with initial restriction/ParMultiDo(PairWithRestriction)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Pair with initial restriction" - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Split restriction", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Split restriction/ParMultiDo(SplitRestriction)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Split restriction" - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Explode windows", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Explode windows/ParMultiDo(ExplodeWindows)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Explode windows" - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys/Map", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys/Map/ParMultiDo(Anonymous)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys/Map" - } - ] - } - ] - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/SetIdentityWindow", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/SetIdentityWindow/Window.Assign", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/SetIdentityWindow" - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ReifyOriginalMetadata", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ReifyOriginalMetadata/ParDo(Anonymous)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ReifyOriginalMetadata", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ReifyOriginalMetadata/ParDo(Anonymous)/ParMultiDo(Anonymous)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ReifyOriginalMetadata/ParDo(Anonymous)" - } - ] - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/GroupByKey", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle" - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ExpandIterable", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ExpandIterable/ParMultiDo(Anonymous)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ExpandIterable" - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata/ParDo(Anonymous)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata/ParDo(Anonymous)/ParMultiDo(Anonymous)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata/ParDo(Anonymous)" - } - ] - } - ] - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values/Map", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values/Map/ParMultiDo(Anonymous)", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values/Map" - } - ] - } - ] - } - ] - }, - { - "fullName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/NaiveProcess", - "enclosingNode": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements" - } - ] - } - ] - } - ] - } - ] - } - ] - }, - { - "fullName": "Create.TimestampedValues/ParDo(ConvertTimestamps)", - "enclosingNode": "Create.TimestampedValues", - "ChildNodes": [ - { - "fullName": "Create.TimestampedValues/ParDo(ConvertTimestamps)/ParMultiDo(ConvertTimestamps)", - "enclosingNode": "Create.TimestampedValues/ParDo(ConvertTimestamps)" - } - ] - } - ] - }, - { - "fullName": "Window.Into()", - "enclosingNode": "OuterMostNode", - "ChildNodes": [ - { - "fullName": "Window.Into()/Window.Assign", - "enclosingNode": "Window.Into()" - } - ] - }, - { - "fullName": "Combine.perKey(SumInteger)", - "enclosingNode": "OuterMostNode", - "ChildNodes": [ - { - "fullName": "Combine.perKey(SumInteger)/GroupByKey", - "enclosingNode": "Combine.perKey(SumInteger)" - }, - { - "fullName": "Combine.perKey(SumInteger)/Combine.GroupedValues", - "enclosingNode": "Combine.perKey(SumInteger)", - "ChildNodes": [ - { - "fullName": "Combine.perKey(SumInteger)/Combine.GroupedValues/ParDo(Anonymous)", - "enclosingNode": "Combine.perKey(SumInteger)/Combine.GroupedValues", - "ChildNodes": [ - { - "fullName": "Combine.perKey(SumInteger)/Combine.GroupedValues/ParDo(Anonymous)/ParMultiDo(Anonymous)", - "enclosingNode": "Combine.perKey(SumInteger)/Combine.GroupedValues/ParDo(Anonymous)" - } - ] - } - ] - } - ] - } - ] - } - ], - "graphLinks": [ - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/Impulse", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(OutputSingleSource)/ParMultiDo(OutputSingleSource)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(OutputSingleSource)/ParMultiDo(OutputSingleSource)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Pair with initial restriction/ParMultiDo(PairWithRestriction)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Pair with initial restriction/ParMultiDo(PairWithRestriction)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Split restriction/ParMultiDo(SplitRestriction)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Split restriction/ParMultiDo(SplitRestriction)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Explode windows/ParMultiDo(ExplodeWindows)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Explode windows/ParMultiDo(ExplodeWindows)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys/Map/ParMultiDo(Anonymous)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys/Map/ParMultiDo(Anonymous)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/SetIdentityWindow/Window.Assign" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/SetIdentityWindow/Window.Assign", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ReifyOriginalMetadata/ParDo(Anonymous)/ParMultiDo(Anonymous)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ReifyOriginalMetadata/ParDo(Anonymous)/ParMultiDo(Anonymous)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/GroupByKey" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/GroupByKey", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ExpandIterable/ParMultiDo(Anonymous)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/ExpandIterable/ParMultiDo(Anonymous)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata/ParDo(Anonymous)/ParMultiDo(Anonymous)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata/ParDo(Anonymous)/ParMultiDo(Anonymous)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values/Map/ParMultiDo(Anonymous)" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values/Map/ParMultiDo(Anonymous)", - "to": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/NaiveProcess" - }, - { - "from": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/NaiveProcess", - "to": "Create.TimestampedValues/ParDo(ConvertTimestamps)/ParMultiDo(ConvertTimestamps)" - }, - { - "from": "Create.TimestampedValues/ParDo(ConvertTimestamps)/ParMultiDo(ConvertTimestamps)", - "to": "Window.Into()/Window.Assign" - }, - { - "from": "Window.Into()/Window.Assign", - "to": "Combine.perKey(SumInteger)/GroupByKey" - }, - { - "from": "Combine.perKey(SumInteger)/GroupByKey", - "to": "Combine.perKey(SumInteger)/Combine.GroupedValues/ParDo(Anonymous)/ParMultiDo(Anonymous)" - } - ], - "transformIOInfo": [ - { - "transformName": "Window.Into()/Window.Assign", - "inputs": "Create.TimestampedValues/ParDo(ConvertTimestamps)/ParMultiDo(ConvertTimestamps).output", - "outputs": "Window.Into()/Window.Assign.out" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys/Map/ParMultiDo(Anonymous)", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Explode windows/ParMultiDo(ExplodeWindows).output", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys/Map/ParMultiDo(Anonymous).output" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(OutputSingleSource)/ParMultiDo(OutputSingleSource)", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/Impulse.out", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(OutputSingleSource)/ParMultiDo(OutputSingleSource).output" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Split restriction/ParMultiDo(SplitRestriction)", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Pair with initial restriction/ParMultiDo(PairWithRestriction).output", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Split restriction/ParMultiDo(SplitRestriction).output" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values/Map/ParMultiDo(Anonymous)", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata/ParDo(Anonymous)/ParMultiDo(Anonymous).output", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values/Map/ParMultiDo(Anonymous).output" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/NaiveProcess", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Drop key/Values/Map/ParMultiDo(Anonymous).output", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper).output" - }, - { - "transformName": "Combine.perKey(SumInteger)", - "inputs": "Window.Into()/Window.Assign.out", - "outputs": "Combine.perKey(SumInteger)/Combine.GroupedValues/ParDo(Anonymous)/ParMultiDo(Anonymous).output" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Pair with initial restriction/ParMultiDo(PairWithRestriction)", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(OutputSingleSource)/ParMultiDo(OutputSingleSource).output", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Pair with initial restriction/ParMultiDo(PairWithRestriction).output" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Assign unique key/AddKeys/Map/ParMultiDo(Anonymous).output", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/ProcessKeyedElements/Reshuffle/RestoreMetadata/ParDo(Anonymous)/ParMultiDo(Anonymous).output" - }, - { - "transformName": "Create.TimestampedValues/ParDo(ConvertTimestamps)/ParMultiDo(ConvertTimestamps)", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper).output", - "outputs": "Create.TimestampedValues/ParDo(ConvertTimestamps)/ParMultiDo(ConvertTimestamps).output" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/Impulse", - "inputs": "", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/Impulse.out" - }, - { - "transformName": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Explode windows/ParMultiDo(ExplodeWindows)", - "inputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Split restriction/ParMultiDo(SplitRestriction).output", - "outputs": "Create.TimestampedValues/Create.Values/Read(CreateSource)/ParDo(BoundedSourceAsSDFWrapper)/ParMultiDo(BoundedSourceAsSDFWrapper)/Explode windows/ParMultiDo(ExplodeWindows).output" - } - ] -} \ No newline at end of file diff --git a/runners/samza/src/test/resources/log4j-test.properties b/runners/samza/src/test/resources/log4j-test.properties deleted file mode 100644 index 8a95279413c2..000000000000 --- a/runners/samza/src/test/resources/log4j-test.properties +++ /dev/null @@ -1,26 +0,0 @@ -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -# Set root logger level to ERROR. -# set manually to INFO for debugging purposes. -log4j.rootLogger=INFO,testlogger - -# ConsoleAppender. -log4j.appender.testlogger=org.apache.log4j.ConsoleAppender -log4j.appender.testlogger.layout=org.apache.log4j.PatternLayout -log4j.appender.testlogger.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n diff --git a/sdks/go/examples/large_wordcount/large_wordcount.go b/sdks/go/examples/large_wordcount/large_wordcount.go index eb9cf3010e75..63e6b3b88efb 100644 --- a/sdks/go/examples/large_wordcount/large_wordcount.go +++ b/sdks/go/examples/large_wordcount/large_wordcount.go @@ -74,7 +74,6 @@ import ( _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dot" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal" ) diff --git a/sdks/go/pkg/beam/runners/samza/samza.go b/sdks/go/pkg/beam/runners/samza/samza.go deleted file mode 100644 index 01a7c5233af2..000000000000 --- a/sdks/go/pkg/beam/runners/samza/samza.go +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package samza contains the Samza runner. -package samza - -import ( - "context" - - "github.com/apache/beam/sdks/v2/go/pkg/beam" - "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal" -) - -func init() { - beam.RegisterRunner("samza", Execute) - beam.RegisterRunner("SamzaRunner", Execute) -} - -// Execute runs the given pipeline on Samza. Convenience wrapper over the -// universal runner. -func Execute(ctx context.Context, p *beam.Pipeline) (beam.PipelineResult, error) { - return universal.Execute(ctx, p) -} diff --git a/sdks/go/pkg/beam/x/beamx/run.go b/sdks/go/pkg/beam/x/beamx/run.go index 0be42561b658..ff3583917b7a 100644 --- a/sdks/go/pkg/beam/x/beamx/run.go +++ b/sdks/go/pkg/beam/x/beamx/run.go @@ -33,7 +33,6 @@ import ( _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dot" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal" ) diff --git a/sdks/go/test/build.gradle b/sdks/go/test/build.gradle index 424b009fd12f..473c43c28c6d 100644 --- a/sdks/go/test/build.gradle +++ b/sdks/go/test/build.gradle @@ -116,34 +116,6 @@ task flinkValidatesRunner { } } -// ValidatesRunner tests for Samza. Runs tests in the integration directory -// with Samza to validate that the runner behaves as expected. -task samzaValidatesRunner { - dependsOn ":sdks:go:test:goBuild" - dependsOn ":sdks:go:container:docker" - dependsOn ":sdks:java:container:${project.ext.currentJavaVersion}:docker" - dependsOn ":runners:samza:job-server:shadowJar" - dependsOn ":sdks:java:testing:expansion-service:buildTestExpansionServiceJar" - - doLast { - def pipelineOptions = [ // Pipeline options piped directly to Go SDK flags. - "--expansion_jar=test:${project(":sdks:java:testing:expansion-service").buildTestExpansionServiceJar.archivePath}", - ] - def options = [ - "--runner samza", - "--samza_job_server_jar ${project(":runners:samza:job-server").shadowJar.archivePath}", - "--pipeline_opts \"${pipelineOptions.join(' ')}\"", - ] - exec { - if (fork_java_home) { - environment "JAVA_HOME_JOB_SERVER", fork_java_home - } - executable "sh" - args "-c", "./run_validatesrunner_tests.sh ${options.join(' ')}" - } - } -} - // ValidatesRunner tests for Spark. Runs tests in the integration directory // with Spark to validate that the runner behaves as expected. task sparkValidatesRunner { diff --git a/sdks/go/test/integration/expansions_test.go b/sdks/go/test/integration/expansions_test.go index 3afa2470157c..6e47f5d303b0 100644 --- a/sdks/go/test/integration/expansions_test.go +++ b/sdks/go/test/integration/expansions_test.go @@ -22,7 +22,6 @@ import ( _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/test/integration/internal/jars" "github.com/google/go-cmp/cmp" diff --git a/sdks/go/test/integration/integration.go b/sdks/go/test/integration/integration.go index 9d04f08de4fb..b23547bf4fa1 100644 --- a/sdks/go/test/integration/integration.go +++ b/sdks/go/test/integration/integration.go @@ -206,53 +206,6 @@ var flinkFilters = []string{ "TestParDoBundleFinalizer.*", } -var samzaFilters = []string{ - // TODO(https://github.com/apache/beam/issues/20987): Samza tests invalid encoding. - "TestReshuffle", - "TestReshuffleKV", - // The Samza runner does not support the TestStream primitive - "TestTestStream.*", - // The trigger and pane tests uses TestStream - "TestTrigger.*", - "TestPanes", - // TODO(https://github.com/apache/beam/issues/21244): Samza doesn't yet support post job metrics, used by WordCount - "TestWordCount.*", - // TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners. - "TestBigQueryIO.*", - "TestBigtableIO.*", - "TestSpannerIO.*", - // The Samza runner does not support self-checkpointing - "TestCheckpointing", - // The samza runner does not support pipeline drain for SDF. - "TestDrain", - // FhirIO currently only supports Dataflow runner - "TestFhirIO.*", - // OOMs currently only lead to heap dumps on Dataflow runner - "TestOomParDo", - // The samza runner does not support user state. - "TestValueState", - "TestValueStateWindowed", - "TestValueStateClear", - "TestBagState", - "TestBagStateClear", - "TestCombiningState", - "TestMapState", - "TestMapStateClear", - "TestSetState", - "TestSetStateClear", - // TODO(https://github.com/apache/beam/issues/26126): Java runner issue (AcitveBundle has no regsitered handler) - "TestDebeziumIO_BasicRead", - - "TestOrderedListState", - - // Samza does not support state. - "TestTimers.*", - "TestBagStateBlindWrite", - - // no support for BundleFinalizer - "TestParDoBundleFinalizer.*", -} - var sparkFilters = []string{ // TODO(BEAM-11498): XLang tests broken with Spark runner. "TestXLang.*", @@ -375,8 +328,6 @@ func CheckFilters(t *testing.T) { filters = portableFilters case "flink", "FlinkRunner": filters = flinkFilters - case "samza", "SamzaRunner": - filters = samzaFilters case "spark", "SparkRunner": filters = sparkFilters case "dataflow", "DataflowRunner": diff --git a/sdks/go/test/integration/io/mongodbio/mongodbio_test.go b/sdks/go/test/integration/io/mongodbio/mongodbio_test.go index b8885e7c728d..481729c9eb98 100644 --- a/sdks/go/test/integration/io/mongodbio/mongodbio_test.go +++ b/sdks/go/test/integration/io/mongodbio/mongodbio_test.go @@ -26,7 +26,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/io/mongodbio" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" diff --git a/sdks/go/test/integration/io/xlang/debezium/debezium_test.go b/sdks/go/test/integration/io/xlang/debezium/debezium_test.go index a4850d4a3a33..8ccb64cae209 100644 --- a/sdks/go/test/integration/io/xlang/debezium/debezium_test.go +++ b/sdks/go/test/integration/io/xlang/debezium/debezium_test.go @@ -25,7 +25,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/debeziumio" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" "github.com/apache/beam/sdks/v2/go/test/integration" diff --git a/sdks/go/test/integration/io/xlang/jdbc/jdbc_test.go b/sdks/go/test/integration/io/xlang/jdbc/jdbc_test.go index 0eddc3e788d2..93d9f4c07ffc 100644 --- a/sdks/go/test/integration/io/xlang/jdbc/jdbc_test.go +++ b/sdks/go/test/integration/io/xlang/jdbc/jdbc_test.go @@ -26,7 +26,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" "github.com/apache/beam/sdks/v2/go/test/integration" diff --git a/sdks/go/test/integration/io/xlang/kafka/kafka_test.go b/sdks/go/test/integration/io/xlang/kafka/kafka_test.go index e1cdc2e935db..2a682ea5cd2c 100644 --- a/sdks/go/test/integration/io/xlang/kafka/kafka_test.go +++ b/sdks/go/test/integration/io/xlang/kafka/kafka_test.go @@ -24,7 +24,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" "github.com/apache/beam/sdks/v2/go/test/integration" diff --git a/sdks/go/test/integration/primitives/primitives_test.go b/sdks/go/test/integration/primitives/primitives_test.go index ef8a265b8bfa..ceaac4532f3c 100644 --- a/sdks/go/test/integration/primitives/primitives_test.go +++ b/sdks/go/test/integration/primitives/primitives_test.go @@ -20,7 +20,6 @@ import ( _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" ) diff --git a/sdks/go/test/integration/synthetic/synthetic_test.go b/sdks/go/test/integration/synthetic/synthetic_test.go index 3161012975bd..f55fc7d7a302 100644 --- a/sdks/go/test/integration/synthetic/synthetic_test.go +++ b/sdks/go/test/integration/synthetic/synthetic_test.go @@ -22,7 +22,7 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/io/synthetic" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" + _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" diff --git a/sdks/go/test/integration/wordcount/wordcount_test.go b/sdks/go/test/integration/wordcount/wordcount_test.go index 09c6683cd14e..3c18a799f752 100644 --- a/sdks/go/test/integration/wordcount/wordcount_test.go +++ b/sdks/go/test/integration/wordcount/wordcount_test.go @@ -23,7 +23,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" "github.com/apache/beam/sdks/v2/go/test/integration" diff --git a/sdks/go/test/integration/xlang/xlang_test.go b/sdks/go/test/integration/xlang/xlang_test.go index f1473f199057..091967a9d03c 100644 --- a/sdks/go/test/integration/xlang/xlang_test.go +++ b/sdks/go/test/integration/xlang/xlang_test.go @@ -27,7 +27,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" diff --git a/sdks/go/test/regression/lperror_test.go b/sdks/go/test/regression/lperror_test.go index 7e99e73c9f80..0dd251980b3d 100644 --- a/sdks/go/test/regression/lperror_test.go +++ b/sdks/go/test/regression/lperror_test.go @@ -26,7 +26,6 @@ import ( _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" ) diff --git a/sdks/go/test/regression/pardo_test.go b/sdks/go/test/regression/pardo_test.go index fc6d240fbf5a..e193739de452 100644 --- a/sdks/go/test/regression/pardo_test.go +++ b/sdks/go/test/regression/pardo_test.go @@ -23,7 +23,6 @@ import ( _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" - _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" ) diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index 972caef6a336..b9457f1bf400 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -35,7 +35,7 @@ # --timeout -> Timeout for the go test command, on a per-package level. # --simultaneous -> Number of simultaneous packages to test. # Controls the -p flag for the go test command. -# Not used for Flink, Spark, or Samza runners. Defaults to 3 otherwise. +# Not used for Flink or Spark runners. Defaults to 3 otherwise. # --endpoint -> An endpoint for an existing job server outside the script. # If present, job server jar flags are ignored. # --test_expansion_jar -> Filepath to jar for an expansion service, for @@ -164,11 +164,6 @@ case $key in shift # past argument shift # past value ;; - --samza_job_server_jar) - SAMZA_JOB_SERVER_JAR="$2" - shift # past argument - shift # past value - ;; --spark_job_server_jar) SPARK_JOB_SERVER_JAR="$2" shift # past argument @@ -266,7 +261,7 @@ else fi # Set up environment based on runner. -if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" || "$RUNNER" == "prism" ]]; then +if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "portable" || "$RUNNER" == "prism" ]]; then if [[ -z "$ENDPOINT" ]]; then JOB_PORT=$(python3 -c "$SOCKET_SCRIPT") ENDPOINT="localhost:$JOB_PORT" @@ -283,12 +278,6 @@ if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$ --job-port $JOB_PORT \ --expansion-port 0 \ --artifact-port 0 & - elif [[ "$RUNNER" == "samza" ]]; then - "$JAVA_CMD" \ - -jar $SAMZA_JOB_SERVER_JAR \ - --job-port $JOB_PORT \ - --expansion-port 0 \ - --artifact-port 0 & elif [[ "$RUNNER" == "spark" ]]; then "$JAVA_CMD" \ -jar $SPARK_JOB_SERVER_JAR \ @@ -348,7 +337,7 @@ if [[ "$RUNNER" != "direct" ]]; then fi # Disable parallelism on runners that don't support it. -if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" ]]; then +if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" ]]; then SIMULTANEOUS=1 fi diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml index afd80d1f4426..e734e98e7d40 100644 --- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml +++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml @@ -261,17 +261,6 @@ - - samza-runner - - - org.apache.beam - beam-runners-samza - ${beam.version} - runtime - - - twister2-runner diff --git a/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml index 320aacd8d4bb..d93a6b09284f 100644 --- a/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml +++ b/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml @@ -253,16 +253,6 @@ - - samza-runner - - - org.apache.beam - beam-runners-samza - runtime - - - twister2-runner diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py index 0197733e9115..8bae82f0aaaf 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py @@ -836,7 +836,6 @@ def test_pardo_et_timer_with_no_firing(self): 'FnApiRunnerTestWithMultiWorkers', 'FnApiRunnerTestWithBundleRepeat', 'FnApiRunnerTestWithBundleRepeatAndMultiWorkers', - 'SamzaRunnerTest', 'SparkRunnerTest'}: raise unittest.SkipTest("https://github.com/apache/beam/issues/35168") @@ -853,7 +852,6 @@ def test_pardo_et_timer_with_no_reset(self): 'FnApiRunnerTestWithMultiWorkers', 'FnApiRunnerTestWithBundleRepeat', 'FnApiRunnerTestWithBundleRepeatAndMultiWorkers', - 'SamzaRunnerTest', 'SparkRunnerTest'}: raise unittest.SkipTest("https://github.com/apache/beam/issues/35168") @@ -869,7 +867,6 @@ def test_pardo_et_timer_with_no_reset_and_no_clear(self): 'FnApiRunnerTestWithMultiWorkers', 'FnApiRunnerTestWithBundleRepeat', 'FnApiRunnerTestWithBundleRepeatAndMultiWorkers', - 'SamzaRunnerTest', 'SparkRunnerTest'}: raise unittest.SkipTest("https://github.com/apache/beam/issues/35168") # The timer will fire at T + 10. After the timer is set, it is never diff --git a/sdks/python/apache_beam/runners/portability/portable_runner_test.py b/sdks/python/apache_beam/runners/portability/portable_runner_test.py index 2fd63b822e96..0f44afb2f123 100644 --- a/sdks/python/apache_beam/runners/portability/portable_runner_test.py +++ b/sdks/python/apache_beam/runners/portability/portable_runner_test.py @@ -230,7 +230,6 @@ def test_pardo_et_timer_with_no_firing(self): 'PortableRunnerTestWithExternalEnv', 'PortableRunnerTestWithLocalDocker', 'PortableRunnerOptimizedWithoutFusion', - 'SamzaRunnerTest', 'SparkRunnerTest' }: raise unittest.SkipTest("https://github.com/apache/beam/issues/35168") @@ -244,7 +243,6 @@ def test_pardo_et_timer_with_no_reset(self): 'PortableRunnerTestWithExternalEnv', 'PortableRunnerTestWithLocalDocker', 'PortableRunnerOptimizedWithoutFusion', - 'SamzaRunnerTest', 'SparkRunnerTest' }: raise unittest.SkipTest("https://github.com/apache/beam/issues/35168") diff --git a/sdks/python/apache_beam/runners/portability/samza_runner_test.py b/sdks/python/apache_beam/runners/portability/samza_runner_test.py deleted file mode 100644 index cc8d947f054e..000000000000 --- a/sdks/python/apache_beam/runners/portability/samza_runner_test.py +++ /dev/null @@ -1,200 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# pytype: skip-file - -# Run as -# -# pytest samza_runner_test.py[::TestClass::test_case] \ -# --test-pipeline-options="--environment_type=LOOPBACK" -import argparse -import logging -import shlex -import unittest -from shutil import rmtree -from tempfile import mkdtemp - -import pytest - -from apache_beam.options.pipeline_options import PortableOptions -from apache_beam.runners.portability import job_server -from apache_beam.runners.portability import portable_runner -from apache_beam.runners.portability import portable_runner_test -from apache_beam.utils import subprocess_server - -_LOGGER = logging.getLogger(__name__) - - -class SamzaRunnerTest(portable_runner_test.PortableRunnerTest): - _use_grpc = True - _use_subprocesses = True - - expansion_port = None - samza_job_server_jar = None - - @pytest.fixture(autouse=True) - def parse_options(self, request): - if not request.config.option.test_pipeline_options: - raise unittest.SkipTest( - 'Skipping because --test-pipeline-options is not specified.') - test_pipeline_options = request.config.option.test_pipeline_options - parser = argparse.ArgumentParser(add_help=True) - parser.add_argument( - '--samza_job_server_jar', - help='Job server jar to submit jobs.', - action='store') - parser.add_argument( - '--environment_type', - default='LOOPBACK', - choices=['DOCKER', 'PROCESS', 'LOOPBACK'], - help='Set the environment type for running user code. DOCKER runs ' - 'user code in a container. PROCESS runs user code in ' - 'automatically started processes. LOOPBACK runs user code on ' - 'the same process that originally submitted the job.') - parser.add_argument( - '--environment_option', - '--environment_options', - dest='environment_options', - action='append', - default=None, - help=( - 'Environment configuration for running the user code. ' - 'Recognized options depend on --environment_type.\n ' - 'For DOCKER: docker_container_image (optional)\n ' - 'For PROCESS: process_command (required), process_variables ' - '(optional, comma-separated)\n ' - 'For EXTERNAL: external_service_address (required)')) - known_args, unknown_args = parser.parse_known_args( - shlex.split(test_pipeline_options)) - if unknown_args: - _LOGGER.warning('Discarding unrecognized arguments %s' % unknown_args) - self.set_samza_job_server_jar( - known_args.samza_job_server_jar or - job_server.JavaJarJobServer.path_to_beam_jar( - ':runners:samza:job-server:shadowJar')) - self.environment_type = known_args.environment_type - self.environment_options = known_args.environment_options\ - - @classmethod - def _subprocess_command(cls, job_port, expansion_port): - # will be cleaned up at the end of this method, and recreated and used by - # the job server - tmp_dir = mkdtemp(prefix='samzatest') - - cls.expansion_port = expansion_port - - try: - return [ - subprocess_server.JavaHelper.get_java(), - '-jar', - cls.samza_job_server_jar, - '--artifacts-dir', - tmp_dir, - '--job-port', - str(job_port), - '--artifact-port', - '0', - '--expansion-port', - str(expansion_port), - ] - finally: - rmtree(tmp_dir) - - @classmethod - def set_samza_job_server_jar(cls, samza_job_server_jar): - cls.samza_job_server_jar = samza_job_server_jar - - @classmethod - def get_runner(cls): - return portable_runner.PortableRunner() - - @classmethod - def get_expansion_service(cls): - # TODO Move expansion address resides into PipelineOptions - return 'localhost:%s' % cls.expansion_port - - def create_options(self): - options = super().create_options() - options.view_as(PortableOptions).environment_type = self.environment_type - options.view_as( - PortableOptions).environment_options = self.environment_options - - return options - - def test_metrics(self): - # Skip until Samza portable runner supports distribution metrics. - raise unittest.SkipTest("https://github.com/apache/beam/issues/21043") - - def test_flattened_side_input(self): - # Blocked on support for transcoding - # https://github.com/apache/beam/issues/20984 - super().test_flattened_side_input(with_transcoding=False) - - def test_flatten_and_gbk(self): - # Blocked on support for transcoding - # https://github.com/apache/beam/issues/20984 - # Also blocked on support of flatten and groupby sharing the same input - # https://github.com/apache/beam/issues/34647 - raise unittest.SkipTest("https://github.com/apache/beam/issues/34647") - - def test_pack_combiners(self): - # Stages produced by translations.pack_combiners are fused - # by translations.greedily_fuse, which prevent the stages - # from being detecting using counters by the test. - self._test_pack_combiners(assert_using_counter_names=False) - - def test_pardo_timers(self): - # Skip until Samza portable runner supports clearing timer. - raise unittest.SkipTest("https://github.com/apache/beam/issues/21059") - - def test_register_finalizations(self): - # Skip until Samza runner supports bundle finalization. - raise unittest.SkipTest("https://github.com/apache/beam/issues/21044") - - def test_callbacks_with_exception(self): - # Skip until Samza runner supports bundle finalization. - raise unittest.SkipTest("https://github.com/apache/beam/issues/21044") - - def test_sdf_with_dofn_as_watermark_estimator(self): - # Skip until Samza runner supports SDF and self-checkpoint. - raise unittest.SkipTest("https://github.com/apache/beam/issues/21045") - - def test_sdf_with_sdf_initiated_checkpointing(self): - # Skip until Samza runner supports SDF. - raise unittest.SkipTest("https://github.com/apache/beam/issues/21045") - - def test_sdf_with_watermark_tracking(self): - # Skip until Samza runner supports SDF. - raise unittest.SkipTest("https://github.com/apache/beam/issues/21045") - - def test_custom_merging_window(self): - # Skip until Samza runner supports merging window fns - raise unittest.SkipTest("https://github.com/apache/beam/issues/21049") - - def test_custom_window_type(self): - raise unittest.SkipTest("https://github.com/apache/beam/issues/21049") - - def test_reshuffle_after_custom_window(self): - raise unittest.SkipTest("https://github.com/apache/beam/issues/34831") - - def test_sliding_windows(self): - raise unittest.SkipTest("https://github.com/apache/beam/issues/35429") - - -if __name__ == '__main__': - # Run the tests. - logging.getLogger().setLevel(logging.INFO) - unittest.main() diff --git a/sdks/python/test-suites/gradle.properties b/sdks/python/test-suites/gradle.properties index 6cadc5e57b44..b5cb4cdae43a 100644 --- a/sdks/python/test-suites/gradle.properties +++ b/sdks/python/test-suites/gradle.properties @@ -41,9 +41,6 @@ flink_validates_runner_precommit_py_versions=3.14 flink_validates_runner_postcommit_py_versions=3.10,3.14 flink_examples_postcommit_py_versions=3.10,3.14 -# samza runner test-suites -samza_validates_runner_postcommit_py_versions=3.10,3.14 - # spark runner test-suites spark_examples_postcommit_py_versions=3.10,3.14 diff --git a/sdks/python/test-suites/portable/build.gradle b/sdks/python/test-suites/portable/build.gradle index 41cd88acfb6a..6e28f114e02b 100644 --- a/sdks/python/test-suites/portable/build.gradle +++ b/sdks/python/test-suites/portable/build.gradle @@ -25,12 +25,6 @@ tasks.register("flinkValidatesRunner") { } } -tasks.register("samzaValidatesRunner") { - getVersionsAsList('samza_validates_runner_postcommit_py_versions').each { - dependsOn.add(":sdks:python:test-suites:portable:py${getVersionSuffix(it)}:samzaValidatesRunner") - } -} - tasks.register("prismValidatesRunner") { getVersionsAsList('prism_validates_runner_postcommit_py_versions').each { dependsOn.add(":sdks:python:test-suites:portable:py${getVersionSuffix(it)}:prismValidatesRunner") diff --git a/sdks/python/test-suites/portable/common.gradle b/sdks/python/test-suites/portable/common.gradle index 8c5bd6341065..17bf9989f28b 100644 --- a/sdks/python/test-suites/portable/common.gradle +++ b/sdks/python/test-suites/portable/common.gradle @@ -146,33 +146,6 @@ tasks.register("portableLocalRunnerTestWithRequirementsFile") { } } -def createSamzaRunnerTestTask(String workerType) { - def taskName = "samzaCompatibilityMatrix${workerType}" - def jobServerJar = "${rootDir}/runners/samza/job-server/build/libs/beam-runners-samza-job-server-${version}.jar" - def options = "--samza_job_server_jar=${jobServerJar} --environment_type=${workerType}" - if (workerType == 'PROCESS') { - options += " --environment_options=process_command=${buildDir.absolutePath}/sdk_worker.sh" - } - def task = toxTask(taskName, 'samza-runner-test', options) - task.configure { - dependsOn ":runners:samza:job-server:shadowJar" - if (workerType == 'DOCKER') { - dependsOn pythonContainerTask - } else if (workerType == 'PROCESS') { - dependsOn createProcessWorker - } - } - return task -} - -createSamzaRunnerTestTask('DOCKER') -createSamzaRunnerTestTask('PROCESS') -createSamzaRunnerTestTask('LOOPBACK') - -task samzaValidatesRunner() { - dependsOn 'samzaCompatibilityMatrixLOOPBACK' -} - def createSparkRunnerTestTask(String workerType) { def taskName = "sparkCompatibilityMatrix${workerType}" // `project(':runners:spark:3:job-server').shadowJar.archivePath` is not resolvable until runtime, so hard-code it here. diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index d832a54977d9..5047109b82bc 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -326,12 +326,6 @@ extras = test commands = bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/flink_runner_test.py {posargs} -[testenv:samza-runner-test] -passenv = JAVA_HOME -extras = test -commands = - bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/samza_runner_test.py {posargs} - [testenv:spark-runner-test] extras = test commands = diff --git a/settings.gradle.kts b/settings.gradle.kts index c001a1add446..4080206bb542 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -150,8 +150,6 @@ include(":runners:prism:java") include(":runners:spark:3") include(":runners:spark:3:job-server") include(":runners:spark:3:job-server:container") -include(":runners:samza") -include(":runners:samza:job-server") include(":sdks:go") include(":sdks:go:container") include(":sdks:go:examples") diff --git a/website/www/site/content/en/blog/capability-matrix.md b/website/www/site/content/en/blog/capability-matrix.md index a3e65734eb2b..6ca5c44f3133 100644 --- a/website/www/site/content/en/blog/capability-matrix.md +++ b/website/www/site/content/en/blog/capability-matrix.md @@ -28,7 +28,7 @@ With initial code drops complete ([Dataflow SDK and Runner](https://github.com/a -While we’d love to have a world where all runners support the full suite of semantics included in the Beam Model (formerly referred to as the [Dataflow Model](https://www.vldb.org/pvldb/vol8/p1792-Akidau.pdf)), practically speaking, there will always be certain features that some runners can’t provide. For example, a Hadoop-based runner would be inherently batch-based and may be unable to (easily) implement support for unbounded collections. However, that doesn’t prevent it from being extremely useful for a large set of uses. In other cases, the implementations provided by one runner may have slightly different semantics that those provided by another (e.g. even though the current suite of runners all support exactly-once delivery guarantees, an [Apache Samza](https://samza.apache.org/) runner, which would be a welcome addition, would currently only support at-least-once). +While we’d love to have a world where all runners support the full suite of semantics included in the Beam Model (formerly referred to as the [Dataflow Model](https://www.vldb.org/pvldb/vol8/p1792-Akidau.pdf)), practically speaking, there will always be certain features that some runners can’t provide. For example, a Hadoop-based runner would be inherently batch-based and may be unable to (easily) implement support for unbounded collections. However, that doesn’t prevent it from being extremely useful for a large set of uses. In other cases, the implementations provided by one runner may have slightly different semantics that those provided by another (e.g. even though the current suite of runners all support exactly-once delivery guarantees, an [Apache Samza](https://samza.apache.org/) runner, supported in Beam 2.73.0 and before, only supported at-least-once). To help clarify things, we’ve been working on enumerating the key features of the Beam model in a [capability matrix](/documentation/runners/capability-matrix/) for all existing runners, categorized around the four key questions addressed by the model: What / Where / When / How (if you’re not familiar with those questions, you might want to read through [Streaming 102](https://oreilly.com/ideas/the-world-beyond-batch-streaming-102) for an overview). This table will be maintained over time as the model evolves, our understanding grows, and runners are created or features added. diff --git a/website/www/site/content/en/contribute/postcommits-policies-details.md b/website/www/site/content/en/contribute/postcommits-policies-details.md index bb0f75b0ce25..55ca7b0aedd7 100644 --- a/website/www/site/content/en/contribute/postcommits-policies-details.md +++ b/website/www/site/content/en/contribute/postcommits-policies-details.md @@ -91,7 +91,7 @@ implement a new unit test that covers a problematic code branch. ## Inform the community if Beam breaks downstream projects {#inform_community} There are multiple external projects depending on Beam which contain tests that are -outside of Beam repository. For example, Dataflow, Samza runner, and IBM Streams. +outside of Beam repository. For example, Dataflow, Scio, and IBM Streams. When an external project encounters an issue caused by (a PR) in Beam and, in consequence, requests for a change in the Beam repository, diff --git a/website/www/site/content/en/documentation/runners/samza.md b/website/www/site/content/en/documentation/runners/samza.md index c36d06fee861..9dbbcffb49d4 100644 --- a/website/www/site/content/en/documentation/runners/samza.md +++ b/website/www/site/content/en/documentation/runners/samza.md @@ -19,7 +19,7 @@ limitations under the License. # Using the Apache Samza Runner -**Note** Samza runner is deprecated and the support is planned to be removed in Beam 3.0 ([Issue](https://github.com/apache/beam/issues/35448)). +**Note** Samza runner is no longer supported since Beam 2.74.0 ([Issue](https://github.com/apache/beam/issues/35448)). The Apache Samza Runner can be used to execute Beam pipelines using [Apache Samza](https://samza.apache.org/). The Samza Runner executes Beam pipeline in a Samza application and can run locally. The application can further be built into a .tgz file, and deployed to a YARN cluster or Samza standalone cluster with Zookeeper. @@ -44,7 +44,7 @@ The Samza Runner is built on Samza version greater than 1.0. org.apache.beam beam-runners-samza - {{< param release_latest >}} + 2.72.0 runtime diff --git a/website/www/site/content/en/get-started/beam-overview.md b/website/www/site/content/en/get-started/beam-overview.md index 5a8fcd3b917c..c50ebdc39b54 100644 --- a/website/www/site/content/en/get-started/beam-overview.md +++ b/website/www/site/content/en/get-started/beam-overview.md @@ -50,12 +50,15 @@ Beam currently supports the following runners: - [Direct Runner](/documentation/runners/direct) - [Apache Flink Runner](/documentation/runners/flink) Apache Flink logo - [Apache Nemo Runner](/documentation/runners/nemo) -- [Apache Samza Runner](/documentation/runners/samza) Apache Samza logo - [Apache Spark Runner](/documentation/runners/spark) Apache Spark logo - [Google Cloud Dataflow Runner](/documentation/runners/dataflow) Google Cloud Dataflow logo - [Hazelcast Jet Runner](/documentation/runners/jet) Hazelcast Jet logo - [Twister2 Runner](/documentation/runners/twister2) Twister2 logo +Runners supported in older Beam versions + +- [Apache Samza Runner](/documentation/runners/samza) Apache Samza logo until Beam 2.74.0. + **Note:** You can always execute your pipeline locally for testing and debugging purposes. ## Get Started diff --git a/website/www/site/content/en/get-started/quickstart-java.md b/website/www/site/content/en/get-started/quickstart-java.md index 3152351facf5..d911918b9d2b 100644 --- a/website/www/site/content/en/get-started/quickstart-java.md +++ b/website/www/site/content/en/get-started/quickstart-java.md @@ -185,7 +185,6 @@ To run the WordCount pipeline: * [FlinkRunner](/documentation/runners/flink) * [SparkRunner](/documentation/runners/spark) * [DataflowRunner](/documentation/runners/dataflow) - * [SamzaRunner](/documentation/runners/samza) * [NemoRunner](/documentation/runners/nemo) * [JetRunner](/documentation/runners/jet) @@ -221,10 +220,6 @@ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ --inputFile=gs://apache-beam-samples/shakespeare/* --output=gs:///counts" \ -Pdataflow-runner {{< /runner >}} -{{< runner samza >}} -mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ - -Dexec.args="--inputFile=sample.txt --output=/tmp/counts --runner=SamzaRunner" -Psamza-runner -{{< /runner >}} {{< runner nemo >}} mvn package -Pnemo-runner && java -cp target/word-count-beam-bundled-0.1.jar org.apache.beam.examples.WordCount \ --runner=NemoRunner --inputFile=`pwd`/sample.txt --output=counts @@ -262,10 +257,6 @@ mvn compile exec:java -D exec.mainClass=org.apache.beam.examples.WordCount ` --inputFile=gs://apache-beam-samples/shakespeare/* --output=gs:///counts" ` -P dataflow-runner {{< /runner >}} -{{< runner samza >}} -mvn compile exec:java -D exec.mainClass=org.apache.beam.examples.WordCount ` - -D exec.args="--inputFile=sample.txt --output=/tmp/counts --runner=SamzaRunner" -P samza-runner -{{< /runner >}} {{< runner nemo >}} mvn package -P nemo-runner -DskipTests java -cp target/word-count-beam-bundled-0.1.jar org.apache.beam.examples.WordCount ` @@ -299,9 +290,6 @@ gradle clean execute -DmainClass=org.apache.beam.examples.WordCount \ --args="--project= --inputFile=gs://apache-beam-samples/shakespeare/* \ --output=gs:///counts --runner=DataflowRunner" -Pdataflow-runner {{< /runner >}} -{{< runner samza>}} -TODO: document Samza on Gradle: https://github.com/apache/beam/issues/21500 -{{< /runner >}} {{< runner nemo>}} TODO: document Nemo on Gradle: https://github.com/apache/beam/issues/21503 {{< /runner >}} @@ -329,9 +317,6 @@ TODO: document Spark on Gradle: https://github.com/apache/beam/issues/21502 --args="--project= --inputFile=gs://apache-beam-samples/shakespeare/* \ --output=gs:///counts --runner=DataflowRunner" -Pdataflow-runner {{< /runner >}} -{{< runner samza>}} -TODO: document Samza on Gradle: https://github.com/apache/beam/issues/21500 -{{< /runner >}} {{< runner nemo>}} TODO: document Nemo on Gradle: https://github.com/apache/beam/issues/21503 {{< /runner >}} @@ -360,9 +345,6 @@ ls counts* {{< /runner >}} {{< runner dataflow >}} gsutil ls gs:///counts* - {{< /runner >}} - {{< runner samza >}} -ls /tmp/counts* {{< /runner >}} {{< runner nemo >}} ls counts* @@ -387,9 +369,6 @@ more counts* {{< /runner >}} {{< runner dataflow >}} gsutil cat gs:///counts* - {{< /runner >}} - {{< runner samza >}} -more /tmp/counts* {{< /runner >}} {{< runner nemo >}} more counts* diff --git a/website/www/site/content/en/get-started/wordcount-example.md b/website/www/site/content/en/get-started/wordcount-example.md index e91226c247ca..157019230a65 100644 --- a/website/www/site/content/en/get-started/wordcount-example.md +++ b/website/www/site/content/en/get-started/wordcount-example.md @@ -376,11 +376,6 @@ $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ -Pdataflow-runner {{< /runner >}} -{{< runner samza >}} -$ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ - -Dexec.args="--inputFile=pom.xml --output=counts --runner=SamzaRunner" -Psamza-runner -{{< /runner >}} - {{< runner nemo >}} $ mvn package -Pnemo-runner && java -cp target/word-count-beam-bundled-0.1.jar org.apache.beam.examples.WordCount \ --runner=NemoRunner --inputFile=`pwd`/pom.xml --output=counts @@ -428,10 +423,6 @@ python -m apache_beam.examples.wordcount --input gs://dataflow-samples/shakespea --temp_location gs://YOUR_GCS_BUCKET/tmp/ {{< /runner >}} -{{< runner samza >}} -This runner is not yet available for the Python SDK. -{{< /runner >}} - {{< runner nemo >}} This runner is not yet available for the Python SDK. {{< /runner >}} @@ -476,10 +467,6 @@ $ wordcount --input gs://dataflow-samples/shakespeare/kinglear.txt \ --worker_harness_container_image=apache/beam_go_sdk:latest {{< /runner >}} -{{< runner samza >}} -This runner is not yet available for the Go SDK. -{{< /runner >}} - {{< runner nemo >}} This runner is not yet available for the Go SDK. {{< /runner >}} @@ -716,11 +703,6 @@ $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.DebuggingWordC -Pdataflow-runner {{< /runner >}} -{{< runner samza >}} -$ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.DebuggingWordCount \ - -Dexec.args="--runner=SamzaRunner --output=counts" -Psamza-runner -{{< /runner >}} - {{< runner nemo >}} $ mvn package -Pnemo-runner && java -cp target/word-count-beam-bundled-0.1.jar org.apache.beam.examples.DebuggingWordCount \ --runner=NemoRunner --inputFile=`pwd`/pom.xml --output=counts @@ -762,10 +744,6 @@ python -m apache_beam.examples.wordcount_debugging --input gs://dataflow-samples --temp_location gs://YOUR_GCS_BUCKET/tmp/ {{< /runner >}} -{{< runner samza >}} -This runner is not yet available for the Python SDK. -{{< /runner >}} - {{< runner nemo >}} This runner is not yet available for the Python SDK. {{< /runner >}} @@ -810,10 +788,6 @@ $ debugging_wordcount --input gs://dataflow-samples/shakespeare/kinglear.txt \ --worker_harness_container_image=apache-docker-beam-snapshots-docker.bintray.io/beam/go:20180515 {{< /runner >}} -{{< runner samza >}} -This runner is not yet available for the Go SDK. -{{< /runner >}} - {{< runner nemo >}} This runner is not yet available for the Go SDK. {{< /runner >}} @@ -1048,11 +1022,6 @@ $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WindowedWordCo -Pdataflow-runner {{< /runner >}} -{{< runner samza >}} -$ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WindowedWordCount \ - -Dexec.args="--runner=SamzaRunner --inputFile=pom.xml --output=counts" -Psamza-runner -{{< /runner >}} - {{< runner nemo >}} $ mvn package -Pnemo-runner && java -cp target/word-count-beam-bundled-0.1.jar org.apache.beam.examples.WindowedWordCount \ --runner=NemoRunner --inputFile=`pwd`/pom.xml --output=counts @@ -1098,10 +1067,6 @@ python -m apache_beam.examples.windowed_wordcount --input YOUR_INPUT_FILE \ --temp_location gs://YOUR_GCS_BUCKET/tmp/ {{< /runner >}} -{{< runner samza >}} -This runner is not yet available for the Python SDK. -{{< /runner >}} - {{< runner nemo >}} This runner is not yet available for the Python SDK. {{< /runner >}} @@ -1145,10 +1110,6 @@ $ windowed_wordcount --input gs://dataflow-samples/shakespeare/kinglear.txt \ --worker_harness_container_image=apache-docker-beam-snapshots-docker.bintray.io/beam/go:20180515 {{< /runner >}} -{{< runner samza >}} -This runner is not yet available for the Go SDK. -{{< /runner >}} - {{< runner nemo >}} This runner is not yet available for the Go SDK. {{< /runner >}} @@ -1410,10 +1371,6 @@ python -m apache_beam.examples.streaming_wordcount \ --streaming {{< /runner >}} -{{< runner samza >}} -This runner is not yet available for the Python SDK. -{{< /runner >}} - {{< runner nemo >}} This runner is not yet available for the Python SDK. {{< /runner >}} diff --git a/website/www/site/content/en/roadmap/_index.md b/website/www/site/content/en/roadmap/_index.md index 698b41dd798f..e764898d3f84 100644 --- a/website/www/site/content/en/roadmap/_index.md +++ b/website/www/site/content/en/roadmap/_index.md @@ -36,7 +36,7 @@ Beam 3 is the planned first major version upgrade. See https://s.apache.org/beam Portability is the primary Beam vision: running pipelines authored with _any SDK_ on _any runner_. This is a cross-cutting effort across Java, Python, and Go, and every Beam runner. Portability is currently supported on the -[DataFlow](/documentation/runners/dataflow), [Flink](/documentation/runners/flink/), [Jet](/documentation/runners/jet), [Nemo](/documentation/runners/nemo), [Prism](/documentation/runners/prism/), [Samza](/documentation/runners/samza), [Spark](/documentation/runners/spark/), and [Twister2](/documentation/runners/Twister2) +[DataFlow](/documentation/runners/dataflow), [Flink](/documentation/runners/flink/), [Jet](/documentation/runners/jet), [Nemo](/documentation/runners/nemo), [Prism](/documentation/runners/prism/), [Spark](/documentation/runners/spark/), and [Twister2](/documentation/runners/Twister2) runners. See the details on the [Portability Roadmap](/roadmap/portability/) diff --git a/website/www/site/content/en/roadmap/go-sdk.md b/website/www/site/content/en/roadmap/go-sdk.md index 4e2cc4ddba11..eb16b7936f73 100644 --- a/website/www/site/content/en/roadmap/go-sdk.md +++ b/website/www/site/content/en/roadmap/go-sdk.md @@ -20,7 +20,7 @@ limitations under the License. The Go SDK is [fully released as of v2.33.0](/blog/go-sdk-release/). The Go SDK the first SDK purely on the [Beam Portability Framework](/roadmap/portability/) -and can execute pipelines on portable runners, like Flink, Spark, Samza, and Google Cloud Dataflow. +and can execute pipelines on portable runners, like Flink, Spark, and Google Cloud Dataflow. Current roadmap: * continue building up unbounded pipeline facing features, as described on the [Beam Dev Wiki](https://cwiki.apache.org/confluence/display/BEAM/Supporting+Streaming+in+the+Go+SDK). diff --git a/website/www/site/content/en/roadmap/samza-runner.md b/website/www/site/content/en/roadmap/samza-runner.md index c27430601b09..801629222c68 100644 --- a/website/www/site/content/en/roadmap/samza-runner.md +++ b/website/www/site/content/en/roadmap/samza-runner.md @@ -17,7 +17,7 @@ limitations under the License. # Samza Runner Roadmap -**Note** Samza runner is deprecated and the support is planned to be removed in Beam 3.0 ([Issue](https://github.com/apache/beam/issues/35448)). +**Note** Samza runner is no longer supported since Beam 2.74.0 ([Issue](https://github.com/apache/beam/issues/35448)). For references, here are available resources: diff --git a/website/www/site/data/capability_matrix.yaml b/website/www/site/data/capability_matrix.yaml index c1da306b9cb7..3a753262eeae 100644 --- a/website/www/site/data/capability_matrix.yaml +++ b/website/www/site/data/capability_matrix.yaml @@ -22,8 +22,6 @@ capability-matrix: name: Apache Spark (RDD/DStream based) - class: spark-dataset name: Apache Spark Structured Streaming (Dataset based) - - class: samza - name: Apache Samza - class: nemo name: Apache Nemo - class: jet @@ -68,10 +66,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: ParDo applies per-element transformations as Spark FlatMapFunction. - - class: samza - l1: "Yes" - l2: fully supported - l3: Supported with per-element transformation. - class: nemo l1: "Yes" l2: fully supported @@ -115,10 +109,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "Using Spark's groupByKey." - - class: samza - l1: "Yes" - l2: fully supported - l3: "Uses Samza's partitionBy for key grouping and Beam's logic for window aggregation and triggering." - class: nemo l1: "Yes" l2: fully supported @@ -162,10 +152,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: Some corner cases like flatten on empty collections are not yet supported. - - class: samza - l1: "Yes" - l2: fully supported - l3: "" - class: nemo l1: "Yes" l2: fully supported @@ -209,10 +195,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "Using Spark's Aggregator and agg function" - - class: samza - l1: "Yes" - l2: fully supported - l3: Use combiner for efficient pre-aggregation. - class: nemo l1: "Yes" l2: fully supported @@ -256,10 +238,6 @@ capability-matrix: l1: "Partially" l2: supported via inlining only in batch mode l3: "" - - class: samza - l1: "Partially" - l2: supported via inlining - l3: "" - class: nemo l1: "Yes" l2: fully supported @@ -303,10 +281,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "Using Spark's broadcast variables." - - class: samza - l1: "Yes" - l2: fully supported - l3: Uses Samza's broadcast operator to distribute the side inputs. - class: nemo l1: "Yes" l2: fully supported @@ -350,10 +324,6 @@ capability-matrix: l1: "Partially" l2: bounded source only l3: "Using Spark's DatasourceV2 API in microbatch mode (Continuous streaming mode is tagged experimental in spark and does not support aggregation)." - - class: samza - l1: "Yes" - l2: fully supported - l3: "" - class: nemo l1: "Yes" l2: fully supported @@ -397,10 +367,6 @@ capability-matrix: l1: "Partially" l2: All metric types are supported in batch mode. l3: Only attempted values are supported. No committed values for metrics. - - class: samza - l1: "Partially" - l2: Counter and Gauge are supported. - l3: Only attempted values are supported. No committed values for metrics. - class: nemo l1: "No" l2: not implemented diff --git a/website/www/site/layouts/partials/section-menu/en/runners.html b/website/www/site/layouts/partials/section-menu/en/runners.html index 58576bb5f963..337debf3ecec 100644 --- a/website/www/site/layouts/partials/section-menu/en/runners.html +++ b/website/www/site/layouts/partials/section-menu/en/runners.html @@ -16,7 +16,6 @@

  • Prism Runner
  • Apache Flink
  • Apache Nemo
  • -
  • Apache Samza
  • Apache Spark
  • Google Cloud Dataflow
  • Hazelcast Jet