!pip install -U pip
!pip install --use-deprecated=legacy-resolver tfx==1.7.0
!pip install apache-beam==2.39.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Requirement already satisfied: pip in /usr/local/lib/python3.7/dist-packages (21.1.3)
Collecting pip
  Downloading pip-22.2.2-py3-none-any.whl (2.0 MB)
     |████████████████████████████████| 2.0 MB 27.7 MB/s 
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.3
    Uninstalling pip-21.1.3:
      Successfully uninstalled pip-21.1.3
Successfully installed pip-22.2.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tfx==1.7.0
  Downloading tfx-1.7.0-py3-none-any.whl (2.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.5/2.5 MB 47.9 MB/s eta 0:00:00
Requirement already satisfied: grpcio<2,>=1.28.1 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (1.47.0)
Requirement already satisfied: portpicker<2,>=1.3.1 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (1.3.9)
Collecting kubernetes<13,>=10.0.1
  Downloading kubernetes-12.0.1-py2.py3-none-any.whl (1.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 70.5 MB/s eta 0:00:00
Collecting google-apitools<1,>=0.5
  Downloading google_apitools-0.5.32-py3-none-any.whl (135 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 135.7/135.7 kB 15.6 MB/s eta 0:00:00
Collecting google-cloud-aiplatform<2,>=1.6.2
  Downloading google_cloud_aiplatform-1.16.1-py2.py3-none-any.whl (2.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.2/2.2 MB 49.7 MB/s eta 0:00:00
Collecting ml-pipelines-sdk==1.7.0
  Downloading ml_pipelines_sdk-1.7.0-py3-none-any.whl (1.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 45.2 MB/s eta 0:00:00
Requirement already satisfied: google-api-python-client<2,>=1.8 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (1.12.11)
Collecting google-cloud-bigquery<3,>=2.26.0
  Downloading google_cloud_bigquery-2.34.4-py2.py3-none-any.whl (206 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 206.6/206.6 kB 18.4 MB/s eta 0:00:00
Requirement already satisfied: tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (2.8.2+zzzcolab20220719082949)
Collecting tfx-bsl<1.8.0,>=1.7.0
  Downloading tfx_bsl-1.7.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (19.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 19.2/19.2 MB 41.6 MB/s eta 0:00:00
Collecting apache-beam[gcp]<3,>=2.36
  Downloading apache_beam-2.41.0-cp37-cp37m-manylinux2010_x86_64.whl (10.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 10.9/10.9 MB 114.8 MB/s eta 0:00:00
Requirement already satisfied: numpy<2,>=1.16 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (1.21.6)
Collecting docker<5,>=4.1
  Downloading docker-4.4.4-py2.py3-none-any.whl (147 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 147.0/147.0 kB 18.7 MB/s eta 0:00:00
Collecting tensorflow-transform<1.8.0,>=1.7.0
  Downloading tensorflow_transform-1.7.0-py3-none-any.whl (433 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 433.6/433.6 kB 50.1 MB/s eta 0:00:00
Collecting pyarrow<6,>=1
  Downloading pyarrow-5.0.0-cp37-cp37m-manylinux2014_x86_64.whl (23.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.6/23.6 MB 65.8 MB/s eta 0:00:00
Collecting tensorflow-serving-api!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3,>=1.15
  Downloading tensorflow_serving_api-2.9.1-py2.py3-none-any.whl (37 kB)
Requirement already satisfied: click<8,>=7 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (7.1.2)
Collecting pyyaml<6,>=3.12
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 636.6/636.6 kB 48.2 MB/s eta 0:00:00
Requirement already satisfied: protobuf<4,>=3.13 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (3.17.3)
Collecting tensorflow-data-validation<1.8.0,>=1.7.0
  Downloading tensorflow_data_validation-1.7.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 87.6 MB/s eta 0:00:00
Collecting ml-metadata<1.8.0,>=1.7.0
  Downloading ml_metadata-1.7.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 116.6 MB/s eta 0:00:00
Collecting attrs<21,>=19.3.0
  Downloading attrs-20.3.0-py2.py3-none-any.whl (49 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 49.3/49.3 kB 8.1 MB/s eta 0:00:00
Requirement already satisfied: tensorflow-hub<0.13,>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (0.12.0)
Collecting packaging<21,>=20
  Downloading packaging-20.9-py2.py3-none-any.whl (40 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 40.9/40.9 kB 4.1 MB/s eta 0:00:00
Collecting keras-tuner<2,>=1.0.4
  Downloading keras_tuner-1.1.3-py3-none-any.whl (135 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 135.7/135.7 kB 21.9 MB/s eta 0:00:00
Requirement already satisfied: jinja2<4,>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (2.11.3)
Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.7/dist-packages (from tfx==1.7.0) (1.2.0)
Collecting tensorflow-model-analysis<0.39,>=0.38.0
  Downloading tensorflow_model_analysis-0.38.0-py3-none-any.whl (1.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 MB 71.8 MB/s eta 0:00:00
Requirement already satisfied: six>=1.5.2 in /usr/local/lib/python3.7/dist-packages (from grpcio<2,>=1.28.1->tfx==1.7.0) (1.15.0)
Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from kubernetes<13,>=10.0.1->tfx==1.7.0) (1.35.0)
Collecting websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0
  Downloading websocket_client-1.4.1-py3-none-any.whl (55 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 55.0/55.0 kB 8.9 MB/s eta 0:00:00
Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.7/dist-packages (from kubernetes<13,>=10.0.1->tfx==1.7.0) (2022.6.15)
Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.7/dist-packages (from kubernetes<13,>=10.0.1->tfx==1.7.0) (1.24.3)
Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kubernetes<13,>=10.0.1->tfx==1.7.0) (2.23.0)
Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.7/dist-packages (from kubernetes<13,>=10.0.1->tfx==1.7.0) (2.8.2)
Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.7/dist-packages (from kubernetes<13,>=10.0.1->tfx==1.7.0) (1.3.1)
Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.7/dist-packages (from kubernetes<13,>=10.0.1->tfx==1.7.0) (57.4.0)
Requirement already satisfied: oauth2client>=1.4.12 in /usr/local/lib/python3.7/dist-packages (from google-apitools<1,>=0.5->tfx==1.7.0) (4.1.3)
Collecting fasteners>=0.14
  Downloading fasteners-0.17.3-py3-none-any.whl (18 kB)
Requirement already satisfied: httplib2>=0.8 in /usr/local/lib/python3.7/dist-packages (from google-apitools<1,>=0.5->tfx==1.7.0) (0.17.4)
Collecting proto-plus<2.0.0dev,>=1.15.0
  Downloading proto_plus-1.22.1-py3-none-any.whl (47 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 47.9/47.9 kB 7.8 MB/s eta 0:00:00
Collecting google-cloud-storage<3.0.0dev,>=1.32.0
  Downloading google_cloud_storage-2.5.0-py2.py3-none-any.whl (106 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 107.0/107.0 kB 17.1 MB/s eta 0:00:00
Collecting google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0
  Downloading google_api_core-2.10.0-py3-none-any.whl (115 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 115.3/115.3 kB 17.9 MB/s eta 0:00:00
Collecting google-cloud-resource-manager<3.0.0dev,>=1.3.3
  Downloading google_cloud_resource_manager-1.6.1-py2.py3-none-any.whl (231 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 231.1/231.1 kB 31.9 MB/s eta 0:00:00
Requirement already satisfied: uritemplate<4dev,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from google-api-python-client<2,>=1.8->tfx==1.7.0) (3.0.1)
Requirement already satisfied: google-auth-httplib2>=0.0.3 in /usr/local/lib/python3.7/dist-packages (from google-api-python-client<2,>=1.8->tfx==1.7.0) (0.0.4)
Collecting google-resumable-media<3.0dev,>=0.6.0
  Downloading google_resumable_media-2.3.3-py2.py3-none-any.whl (76 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 76.9/76.9 kB 11.4 MB/s eta 0:00:00
Collecting google-cloud-core<3.0.0dev,>=1.4.1
  Downloading google_cloud_core-2.3.2-py2.py3-none-any.whl (29 kB)
Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (4.1.1)
Requirement already satisfied: keras-preprocessing>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (1.1.2)
Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (1.14.1)
Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (1.6.3)
Requirement already satisfied: h5py>=2.9.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (3.1.0)
Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (1.1.0)
Requirement already satisfied: tensorboard<2.9,>=2.8 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (2.8.0)
Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (0.2.0)
Requirement already satisfied: flatbuffers>=1.12 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (2.0.7)
Requirement already satisfied: keras<2.9,>=2.8.0rc0 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (2.8.0)
Requirement already satisfied: gast>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (0.5.3)
Requirement already satisfied: tensorflow-estimator<2.9,>=2.8 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (2.8.0)
Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (0.26.0)
Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (3.3.0)
Requirement already satisfied: libclang>=9.0.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (14.0.6)
Requirement already satisfied: pandas<2,>=1.0 in /usr/local/lib/python3.7/dist-packages (from tfx-bsl<1.8.0,>=1.7.0->tfx==1.7.0) (1.3.5)
Collecting tensorflow-metadata<1.8.0,>=1.7.0
  Downloading tensorflow_metadata-1.7.0-py3-none-any.whl (48 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 48.8/48.8 kB 7.3 MB/s eta 0:00:00
Collecting orjson<4.0
  Downloading orjson-3.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (270 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 270.2/270.2 kB 35.6 MB/s eta 0:00:00
Collecting dill<0.3.2,>=0.3.1.1
  Downloading dill-0.3.1.1.tar.gz (151 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 152.0/152.0 kB 22.8 MB/s eta 0:00:00
  Preparing metadata (setup.py) ... done
Collecting pymongo<4.0.0,>=3.8.0
  Downloading pymongo-3.12.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (508 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 508.1/508.1 kB 54.1 MB/s eta 0:00:00
Collecting fastavro<2,>=0.23.6
  Downloading fastavro-1.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.4/2.4 MB 97.7 MB/s eta 0:00:00
Requirement already satisfied: crcmod<2.0,>=1.7 in /usr/local/lib/python3.7/dist-packages (from apache-beam[gcp]<3,>=2.36->tfx==1.7.0) (1.7)
Collecting hdfs<3.0.0,>=2.1.0
  Downloading hdfs-2.7.0-py3-none-any.whl (34 kB)
Requirement already satisfied: pydot<2,>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam[gcp]<3,>=2.36->tfx==1.7.0) (1.3.0)
Collecting cloudpickle<3,>=2.1.0
  Downloading cloudpickle-2.1.0-py3-none-any.whl (25 kB)
Requirement already satisfied: pytz>=2018.3 in /usr/local/lib/python3.7/dist-packages (from apache-beam[gcp]<3,>=2.36->tfx==1.7.0) (2022.2.1)
Collecting google-cloud-bigtable<2,>=0.31.1; extra == "gcp"
  Downloading google_cloud_bigtable-1.7.2-py2.py3-none-any.whl (267 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 267.7/267.7 kB 35.5 MB/s eta 0:00:00
Collecting google-cloud-bigquery-storage<2.14,>=2.6.3; extra == "gcp"
  Downloading google_cloud_bigquery_storage-2.13.2-py2.py3-none-any.whl (180 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 180.2/180.2 kB 24.4 MB/s eta 0:00:00
Collecting google-cloud-pubsublite<2,>=1.2.0; extra == "gcp"
  Downloading google_cloud_pubsublite-1.4.3-py2.py3-none-any.whl (267 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 267.2/267.2 kB 32.9 MB/s eta 0:00:00
Collecting google-cloud-vision<2,>=0.38.0; extra == "gcp"
  Downloading google_cloud_vision-1.0.2-py2.py3-none-any.whl (435 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 435.1/435.1 kB 50.9 MB/s eta 0:00:00
Collecting google-cloud-recommendations-ai<0.8.0,>=0.1.0; extra == "gcp"
  Downloading google_cloud_recommendations_ai-0.7.1-py2.py3-none-any.whl (148 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 148.2/148.2 kB 23.9 MB/s eta 0:00:00
Collecting grpcio-gcp<1,>=0.2.2; extra == "gcp"
  Downloading grpcio_gcp-0.2.2-py2.py3-none-any.whl (9.4 kB)
Collecting google-cloud-language<2,>=1.3.0; extra == "gcp"
  Downloading google_cloud_language-1.3.2-py2.py3-none-any.whl (83 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 83.6/83.6 kB 14.8 MB/s eta 0:00:00
Collecting google-cloud-pubsub<3,>=2.1.0; extra == "gcp"
  Downloading google_cloud_pubsub-2.13.6-py2.py3-none-any.whl (235 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 235.1/235.1 kB 33.2 MB/s eta 0:00:00
Collecting google-cloud-videointelligence<2,>=1.8.0; extra == "gcp"
  Downloading google_cloud_videointelligence-1.16.3-py2.py3-none-any.whl (183 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 183.9/183.9 kB 28.6 MB/s eta 0:00:00
Requirement already satisfied: google-cloud-datastore<2,>=1.8.0; extra == "gcp" in /usr/local/lib/python3.7/dist-packages (from apache-beam[gcp]<3,>=2.36->tfx==1.7.0) (1.8.0)
Collecting google-cloud-dlp<4,>=3.0.0; extra == "gcp"
  Downloading google_cloud_dlp-3.8.1-py2.py3-none-any.whl (119 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 119.4/119.4 kB 17.9 MB/s eta 0:00:00
Requirement already satisfied: cachetools<5,>=3.1.0; extra == "gcp" in /usr/local/lib/python3.7/dist-packages (from apache-beam[gcp]<3,>=2.36->tfx==1.7.0) (4.2.4)
Collecting google-cloud-spanner<2,>=1.13.0; extra == "gcp"
  Downloading google_cloud_spanner-1.19.3-py2.py3-none-any.whl (255 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 255.6/255.6 kB 35.0 MB/s eta 0:00:00
Collecting joblib<0.15,>=0.12
  Downloading joblib-0.14.1-py2.py3-none-any.whl (294 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 294.9/294.9 kB 37.4 MB/s eta 0:00:00
Collecting pyfarmhash<0.4,>=0.2
  Downloading pyfarmhash-0.3.2.tar.gz (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.9/99.9 kB 15.3 MB/s eta 0:00:00
  Preparing metadata (setup.py) ... done
Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging<21,>=20->tfx==1.7.0) (3.0.9)
Requirement already satisfied: ipython in /usr/local/lib/python3.7/dist-packages (from keras-tuner<2,>=1.0.4->tfx==1.7.0) (7.9.0)
Collecting kt-legacy
  Downloading kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)
Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2<4,>=2.7.3->tfx==1.7.0) (2.0.1)
Requirement already satisfied: ipywidgets<8,>=7 in /usr/local/lib/python3.7/dist-packages (from tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (7.7.1)
Requirement already satisfied: scipy<2,>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (1.7.3)
Requirement already satisfied: rsa<5,>=3.1.4; python_version >= "3.6" in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.0.1->kubernetes<13,>=10.0.1->tfx==1.7.0) (4.9)
Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth>=1.0.1->kubernetes<13,>=10.0.1->tfx==1.7.0) (0.2.8)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->kubernetes<13,>=10.0.1->tfx==1.7.0) (3.0.4)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kubernetes<13,>=10.0.1->tfx==1.7.0) (2.10)
Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib->kubernetes<13,>=10.0.1->tfx==1.7.0) (3.2.0)
Requirement already satisfied: pyasn1>=0.1.7 in /usr/local/lib/python3.7/dist-packages (from oauth2client>=1.4.12->google-apitools<1,>=0.5->tfx==1.7.0) (0.4.8)
Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /usr/local/lib/python3.7/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform<2,>=1.6.2->tfx==1.7.0) (1.56.4)
Collecting grpcio-status<2.0dev,>=1.33.2; extra == "grpc"
  Downloading grpcio_status-1.48.1-py3-none-any.whl (14 kB)
Collecting grpc-google-iam-v1<1.0.0dev,>=0.12.4
  Downloading grpc_google_iam_v1-0.12.4-py2.py3-none-any.whl (26 kB)
Collecting google-crc32c<2.0dev,>=1.0
  Downloading google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32 kB)
Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.7/dist-packages (from astunparse>=1.6.0->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (0.37.1)
Requirement already satisfied: cached-property; python_version < "3.8" in /usr/local/lib/python3.7/dist-packages (from h5py>=2.9.0->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (1.5.2)
Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.9,>=2.8->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (1.0.1)
Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.9,>=2.8->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (3.4.1)
Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.9,>=2.8->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (0.6.1)
Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.9,>=2.8->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (1.8.1)
Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.9,>=2.8->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (0.4.6)
Collecting docopt
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... done
Collecting overrides<7.0.0,>=6.0.1
  Downloading overrides-6.2.0-py3-none-any.whl (17 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.1-py2.py3-none-any.whl (1.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.6/1.6 MB 77.0 MB/s eta 0:00:00
Requirement already satisfied: backcall in /usr/local/lib/python3.7/dist-packages (from ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (0.2.0)
Requirement already satisfied: decorator in /usr/local/lib/python3.7/dist-packages (from ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (4.4.2)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.7/dist-packages (from ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (5.1.1)
Requirement already satisfied: pexpect; sys_platform != "win32" in /usr/local/lib/python3.7/dist-packages (from ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (4.8.0)
Requirement already satisfied: pygments in /usr/local/lib/python3.7/dist-packages (from ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (2.6.1)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.7/dist-packages (from ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (0.7.5)
Requirement already satisfied: prompt-toolkit<2.1.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (2.0.10)
Requirement already satisfied: jupyterlab-widgets>=1.0.0; python_version >= "3.6" in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (3.0.2)
Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (0.2.0)
Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (3.6.1)
Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.7/dist-packages (from ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (5.3.4)
Requirement already satisfied: importlib-metadata>=4.4; python_version < "3.10" in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard<2.9,>=2.8->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (4.12.0)
Requirement already satisfied: parso<0.9.0,>=0.8.0 in /usr/local/lib/python3.7/dist-packages (from jedi>=0.10->ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (0.8.3)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.7/dist-packages (from pexpect; sys_platform != "win32"->ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (0.7.0)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from prompt-toolkit<2.1.0,>=2.0.0->ipython->keras-tuner<2,>=1.0.4->tfx==1.7.0) (0.2.5)
Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.7/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (5.3.1)
Requirement already satisfied: jupyter-client in /usr/local/lib/python3.7/dist-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (6.1.12)
Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.7/dist-packages (from ipykernel>=4.5.1->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (5.1.1)
Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=4.4; python_version < "3.10"->markdown>=2.6.8->tensorboard<2.9,>=2.8->tensorflow!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<2.9,>=1.15.5->tfx==1.7.0) (3.8.1)
Requirement already satisfied: Send2Trash in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (1.8.0)
Requirement already satisfied: nbconvert in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (5.6.1)
Requirement already satisfied: terminado>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (0.13.3)
Requirement already satisfied: jupyter-core>=4.4.0 in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (4.11.1)
Requirement already satisfied: nbformat in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (5.4.0)
Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.7/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (23.2.1)
Requirement already satisfied: testpath in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (0.6.0)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (1.5.0)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (0.7.1)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (0.8.4)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (0.4)
Requirement already satisfied: bleach in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (5.0.1)
Requirement already satisfied: fastjsonschema in /usr/local/lib/python3.7/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (2.16.1)
Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.7/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (4.3.3)
Requirement already satisfied: webencodings in /usr/local/lib/python3.7/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (0.5.1)
Requirement already satisfied: importlib-resources>=1.4.0; python_version < "3.9" in /usr/local/lib/python3.7/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (5.9.0)
Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8,>=7->tensorflow-model-analysis<0.39,>=0.38.0->tfx==1.7.0) (0.18.1)
Building wheels for collected packages: dill, pyfarmhash, docopt
  Building wheel for dill (setup.py) ... done
  Created wheel for dill: filename=dill-0.3.1.1-py3-none-any.whl size=78544 sha256=4956f947463c86c4525446d1ebfc2d7795663dbe7ab11416a3c38108b599fdca
  Stored in directory: /root/.cache/pip/wheels/a4/61/fd/c57e374e580aa78a45ed78d5859b3a44436af17e22ca53284f
  Building wheel for pyfarmhash (setup.py) ... done
  Created wheel for pyfarmhash: filename=pyfarmhash-0.3.2-cp37-cp37m-linux_x86_64.whl size=68360 sha256=7148635a22ba679b153b94d04a22e70cc160ac53edc1147a87b306692193b7c9
  Stored in directory: /root/.cache/pip/wheels/53/58/7a/3b040f3a2ee31908e3be916e32660db6db53621ce6eba838dc
  Building wheel for docopt (setup.py) ... done
  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13723 sha256=e5d5a00f07fb3646fc83d8ed7d435aa7dbdb71d706de8b34ab686e355fa06a09
  Stored in directory: /root/.cache/pip/wheels/72/b0/3f/1d95f96ff986c7dfffe46ce2be4062f38ebd04b506c77c81b9
Successfully built dill pyfarmhash docopt
Installing collected packages: websocket-client, pyyaml, kubernetes, fasteners, google-apitools, proto-plus, grpcio-status, google-api-core, packaging, google-crc32c, google-resumable-media, google-cloud-core, google-cloud-bigquery, google-cloud-storage, grpc-google-iam-v1, google-cloud-resource-manager, google-cloud-aiplatform, docker, attrs, ml-metadata, ml-pipelines-sdk, orjson, dill, pymongo, fastavro, pyarrow, docopt, hdfs, cloudpickle, google-cloud-bigtable, google-cloud-bigquery-storage, google-cloud-pubsub, overrides, google-cloud-pubsublite, google-cloud-vision, google-cloud-recommendations-ai, grpcio-gcp, google-cloud-language, google-cloud-videointelligence, google-cloud-dlp, google-cloud-spanner, apache-beam, tensorflow-metadata, tensorflow-serving-api, tfx-bsl, tensorflow-transform, joblib, pyfarmhash, tensorflow-data-validation, kt-legacy, keras-tuner, tensorflow-model-analysis, tfx, jedi
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 6.0
    Uninstalling PyYAML-6.0:
      Successfully uninstalled PyYAML-6.0
  Attempting uninstall: google-api-core
    Found existing installation: google-api-core 1.31.6
    Uninstalling google-api-core-1.31.6:
      Successfully uninstalled google-api-core-1.31.6
  Attempting uninstall: packaging
    Found existing installation: packaging 21.3
    Uninstalling packaging-21.3:
      Successfully uninstalled packaging-21.3
  Attempting uninstall: google-resumable-media
    Found existing installation: google-resumable-media 0.4.1
    Uninstalling google-resumable-media-0.4.1:
      Successfully uninstalled google-resumable-media-0.4.1
  Attempting uninstall: google-cloud-core
    Found existing installation: google-cloud-core 1.0.3
    Uninstalling google-cloud-core-1.0.3:
      Successfully uninstalled google-cloud-core-1.0.3
  Attempting uninstall: google-cloud-bigquery
    Found existing installation: google-cloud-bigquery 1.21.0
    Uninstalling google-cloud-bigquery-1.21.0:
      Successfully uninstalled google-cloud-bigquery-1.21.0
  Attempting uninstall: google-cloud-storage
    Found existing installation: google-cloud-storage 1.18.1
    Uninstalling google-cloud-storage-1.18.1:
      Successfully uninstalled google-cloud-storage-1.18.1
  Attempting uninstall: attrs
    Found existing installation: attrs 22.1.0
    Uninstalling attrs-22.1.0:
      Successfully uninstalled attrs-22.1.0
  Attempting uninstall: dill
    Found existing installation: dill 0.3.5.1
    Uninstalling dill-0.3.5.1:
      Successfully uninstalled dill-0.3.5.1
  Attempting uninstall: pymongo
    Found existing installation: pymongo 4.2.0
    Uninstalling pymongo-4.2.0:
      Successfully uninstalled pymongo-4.2.0
  Attempting uninstall: pyarrow
    Found existing installation: pyarrow 6.0.1
    Uninstalling pyarrow-6.0.1:
      Successfully uninstalled pyarrow-6.0.1
  Attempting uninstall: cloudpickle
    Found existing installation: cloudpickle 1.5.0
    Uninstalling cloudpickle-1.5.0:
      Successfully uninstalled cloudpickle-1.5.0
  Attempting uninstall: google-cloud-bigquery-storage
    Found existing installation: google-cloud-bigquery-storage 1.1.2
    Uninstalling google-cloud-bigquery-storage-1.1.2:
      Successfully uninstalled google-cloud-bigquery-storage-1.1.2
  Attempting uninstall: google-cloud-language
    Found existing installation: google-cloud-language 1.2.0
    Uninstalling google-cloud-language-1.2.0:
      Successfully uninstalled google-cloud-language-1.2.0
  Attempting uninstall: tensorflow-metadata
    Found existing installation: tensorflow-metadata 1.10.0
    Uninstalling tensorflow-metadata-1.10.0:
      Successfully uninstalled tensorflow-metadata-1.10.0
  Attempting uninstall: joblib
    Found existing installation: joblib 1.1.0
    Uninstalling joblib-1.1.0:
      Successfully uninstalled joblib-1.1.0
ERROR: pip's legacy dependency resolver does not consider dependency conflicts when selecting packages. This behaviour is the source of the following dependency conflicts.
pandas-gbq 0.13.3 requires google-cloud-bigquery[bqstorage,pandas]<2.0.0dev,>=1.11.1, but you'll have google-cloud-bigquery 2.34.4 which is incompatible.
google-cloud-translate 1.5.0 requires google-api-core[grpc]<2.0.0dev,>=1.6.0, but you'll have google-api-core 2.10.0 which is incompatible.
google-cloud-translate 1.5.0 requires google-cloud-core<2.0dev,>=1.0.0, but you'll have google-cloud-core 2.3.2 which is incompatible.
google-cloud-firestore 1.7.0 requires google-api-core[grpc]<2.0.0dev,>=1.14.0, but you'll have google-api-core 2.10.0 which is incompatible.
google-cloud-firestore 1.7.0 requires google-cloud-core<2.0dev,>=1.0.3, but you'll have google-cloud-core 2.3.2 which is incompatible.
google-cloud-datastore 1.8.0 requires google-api-core[grpc]<2.0.0dev,>=1.6.0, but you'll have google-api-core 2.10.0 which is incompatible.
google-cloud-datastore 1.8.0 requires google-cloud-core<2.0dev,>=1.0.0, but you'll have google-cloud-core 2.3.2 which is incompatible.
google-cloud-bigquery-storage 2.13.2 requires protobuf<4.0.0dev,>=3.19.0, but you'll have protobuf 3.17.3 which is incompatible.
google-api-core 2.10.0 requires protobuf<5.0.0dev,>=3.20.1, but you'll have protobuf 3.17.3 which is incompatible.
firebase-admin 4.4.0 requires google-api-core[grpc]<2.0.0dev,>=1.14.0; platform_python_implementation != "PyPy", but you'll have google-api-core 2.10.0 which is incompatible.
proto-plus 1.22.1 requires protobuf<5.0.0dev,>=3.19.0, but you'll have protobuf 3.17.3 which is incompatible.
grpcio-status 1.48.1 requires grpcio>=1.48.1, but you'll have grpcio 1.47.0 which is incompatible.
google-cloud-resource-manager 1.6.1 requires protobuf<5.0.0dev,>=3.19.0, but you'll have protobuf 3.17.3 which is incompatible.
google-cloud-aiplatform 1.16.1 requires protobuf<4.0.0dev,>=3.19.0, but you'll have protobuf 3.17.3 which is incompatible.
google-cloud-pubsub 2.13.6 requires protobuf<5.0.0dev,>=3.19.0, but you'll have protobuf 3.17.3 which is incompatible.
google-cloud-recommendations-ai 0.7.1 requires protobuf<5.0.0dev,>=3.19.0, but you'll have protobuf 3.17.3 which is incompatible.
google-cloud-dlp 3.8.1 requires protobuf<5.0.0dev,>=3.19.0, but you'll have protobuf 3.17.3 which is incompatible.
apache-beam 2.41.0 requires requests<3.0.0,>=2.24.0, but you'll have requests 2.23.0 which is incompatible.
tensorflow-serving-api 2.9.1 requires tensorflow<3,>=2.9.1, but you'll have tensorflow 2.8.2+zzzcolab20220719082949 which is incompatible.
Successfully installed apache-beam-2.41.0 attrs-20.3.0 cloudpickle-2.1.0 dill-0.3.1.1 docker-4.4.4 docopt-0.6.2 fastavro-1.6.0 fasteners-0.17.3 google-api-core-2.10.0 google-apitools-0.5.32 google-cloud-aiplatform-1.16.1 google-cloud-bigquery-2.34.4 google-cloud-bigquery-storage-2.13.2 google-cloud-bigtable-1.7.2 google-cloud-core-2.3.2 google-cloud-dlp-3.8.1 google-cloud-language-1.3.2 google-cloud-pubsub-2.13.6 google-cloud-pubsublite-1.4.3 google-cloud-recommendations-ai-0.7.1 google-cloud-resource-manager-1.6.1 google-cloud-spanner-1.19.3 google-cloud-storage-2.5.0 google-cloud-videointelligence-1.16.3 google-cloud-vision-1.0.2 google-crc32c-1.5.0 google-resumable-media-2.3.3 grpc-google-iam-v1-0.12.4 grpcio-gcp-0.2.2 grpcio-status-1.48.1 hdfs-2.7.0 jedi-0.18.1 joblib-0.14.1 keras-tuner-1.1.3 kt-legacy-1.0.4 kubernetes-12.0.1 ml-metadata-1.7.0 ml-pipelines-sdk-1.7.0 orjson-3.8.0 overrides-6.2.0 packaging-20.9 proto-plus-1.22.1 pyarrow-5.0.0 pyfarmhash-0.3.2 pymongo-3.12.3 pyyaml-5.4.1 tensorflow-data-validation-1.7.0 tensorflow-metadata-1.7.0 tensorflow-model-analysis-0.38.0 tensorflow-serving-api-2.9.1 tensorflow-transform-1.7.0 tfx-1.7.0 tfx-bsl-1.7.0 websocket-client-1.4.1
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting apache-beam==2.39.0
  Downloading apache_beam-2.39.0-cp37-cp37m-manylinux2010_x86_64.whl (10.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 10.3/10.3 MB 19.7 MB/s eta 0:00:00
Requirement already satisfied: pydot<2,>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (1.3.0)
Requirement already satisfied: proto-plus<2,>=1.7.1 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (1.22.1)
Requirement already satisfied: dill<0.3.2,>=0.3.1.1 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (0.3.1.1)
Requirement already satisfied: numpy<1.23.0,>=1.14.3 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (1.21.6)
Requirement already satisfied: pymongo<4.0.0,>=3.8.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (3.12.3)
Collecting requests<3.0.0,>=2.24.0
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.8/62.8 kB 9.5 MB/s eta 0:00:00
Requirement already satisfied: pyarrow<8.0.0,>=0.15.1 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (5.0.0)
Requirement already satisfied: crcmod<2.0,>=1.7 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (1.7)
Requirement already satisfied: hdfs<3.0.0,>=2.1.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (2.7.0)
Requirement already satisfied: python-dateutil<3,>=2.8.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (2.8.2)
Requirement already satisfied: httplib2<0.20.0,>=0.8 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (0.17.4)
Requirement already satisfied: pytz>=2018.3 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (2022.2.1)
Requirement already satisfied: typing-extensions>=3.7.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (4.1.1)
Requirement already satisfied: fastavro<2,>=0.23.6 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (1.6.0)
Requirement already satisfied: cloudpickle<3,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (2.1.0)
Requirement already satisfied: grpcio<2,>=1.29.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (1.47.0)
Requirement already satisfied: orjson<4.0 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (3.8.0)
Requirement already satisfied: protobuf<4,>=3.12.2 in /usr/local/lib/python3.7/dist-packages (from apache-beam==2.39.0) (3.17.3)
Requirement already satisfied: six>=1.5.2 in /usr/local/lib/python3.7/dist-packages (from grpcio<2,>=1.29.0->apache-beam==2.39.0) (1.15.0)
Requirement already satisfied: docopt in /usr/local/lib/python3.7/dist-packages (from hdfs<3.0.0,>=2.1.0->apache-beam==2.39.0) (0.6.2)
Collecting protobuf<4,>=3.12.2
  Downloading protobuf-3.20.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 61.5 MB/s eta 0:00:00
Requirement already satisfied: pyparsing>=2.1.4 in /usr/local/lib/python3.7/dist-packages (from pydot<2,>=1.2.0->apache-beam==2.39.0) (3.0.9)
Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam==2.39.0) (2.1.1)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam==2.39.0) (1.24.3)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam==2.39.0) (2.10)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam==2.39.0) (2022.6.15)
Installing collected packages: requests, protobuf, apache-beam
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.17.3
    Uninstalling protobuf-3.17.3:
      Successfully uninstalled protobuf-3.17.3
  Attempting uninstall: apache-beam
    Found existing installation: apache-beam 2.41.0
    Uninstalling apache-beam-2.41.0:
      Successfully uninstalled apache-beam-2.41.0
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.8.2+zzzcolab20220719082949 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.1 which is incompatible.
tensorflow-serving-api 2.9.1 requires tensorflow<3,>=2.9.1, but you have tensorflow 2.8.2+zzzcolab20220719082949 which is incompatible.
grpcio-status 1.48.1 requires grpcio>=1.48.1, but you have grpcio 1.47.0 which is incompatible.
firebase-admin 4.4.0 requires google-api-core[grpc]<2.0.0dev,>=1.14.0; platform_python_implementation != "PyPy", but you have google-api-core 2.10.0 which is incompatible.
Successfully installed apache-beam-2.39.0 protobuf-3.20.1 requests-2.28.1
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv


import os
import pprint

import tensorflow as tf
import tensorflow_model_analysis as tfma
from tfx import v1 as tfx

from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

tf.get_logger().propagate = False
tf.get_logger().setLevel('ERROR')
pp = pprint.PrettyPrinter()


# Location of the pipeline metadata store
_pipeline_root = './pipeline/'

# Directory of the raw data files
_data_root = './data/census'

_data_filepath = os.path.join(_data_root, "data.csv")


# Create the TFX pipeline files directory
!mkdir {_pipeline_root}

# Create the dataset directory
!mkdir -p {_data_root}


# Define filename and URL
TAR_NAME = 'C3_W4_Lab_2_data.tar.gz'
DATA_PATH = f'https://storage.googleapis.com/mlep-public/course_3/week4/{TAR_NAME}'

# Download dataset
!wget -nc {DATA_PATH}

# Extract archive
!tar xvzf {TAR_NAME}

# Delete archive
!rm {TAR_NAME}

--2022-09-05 02:47:09--  https://storage.googleapis.com/mlep-public/course_3/week4/C3_W4_Lab_2_data.tar.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.45.16, 172.253.115.128, 172.253.122.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.45.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 418898 (409K) [application/x-gzip]
Saving to: ‘C3_W4_Lab_2_data.tar.gz’

C3_W4_Lab_2_data.ta 100%[===================>] 409.08K  --.-KB/s    in 0.004s  

2022-09-05 02:47:09 (107 MB/s) - ‘C3_W4_Lab_2_data.tar.gz’ saved [418898/418898]

./data/census/data.csv


# Preview dataset
!head {_data_filepath}

age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,label
39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,0
50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,0
38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,0
53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,0
28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,0
37,Private,284582,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,0
49,Private,160187,9th,5,Married-spouse-absent,Other-service,Not-in-family,Black,Female,0,0,16,Jamaica,0
52,Self-emp-not-inc,209642,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,1
31,Private,45781,Masters,14,Never-married,Prof-specialty,Not-in-family,White,Female,14084,0,50,United-States,1


# Initialize InteractiveContext
context = InteractiveContext(pipeline_root=_pipeline_root)

WARNING:absl:InteractiveContext metadata_connection_config not provided: using SQLite ML Metadata database at ./pipeline/metadata.sqlite.


# Run CsvExampleGen
example_gen = tfx.components.CsvExampleGen(input_base=_data_root)
context.run(example_gen)

WARNING:apache_beam.runners.interactive.interactive_environment:Dependencies required for Interactive Beam PCollection visualization are not available, please use: `pip install apache-beam[interactive]` to install necessary dependencies to enable all data visualization features.

WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.
WARNING:apache_beam.io.tfrecordio:Couldn't find python-snappy so the implementation of _TFRecordUtil._masked_crc32c is not as fast as it could be.


# Print split names and URI
artifact = example_gen.outputs['examples'].get()[0]
print(artifact.split_names, artifact.uri)

["train", "eval"] ./pipeline/CsvExampleGen/examples/1


# Run StatisticsGen
statistics_gen = tfx.components.StatisticsGen(
    examples=example_gen.outputs['examples'])
context.run(statistics_gen)

WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.


# Visualize statistics
context.show(statistics_gen.outputs['statistics'])


# Run SchemaGen
schema_gen = tfx.components.SchemaGen(
    statistics=statistics_gen.outputs['statistics'])
context.run(schema_gen)


# Visualize the inferred Schema
context.show(schema_gen.outputs['schema'])


# Run ExampleValidator
example_validator = tfx.components.ExampleValidator(
    statistics=statistics_gen.outputs['statistics'],
    schema=schema_gen.outputs['schema'])
context.run(example_validator)


# Check results
context.show(example_validator.outputs['anomalies'])


# Set the constants module filename
_census_constants_module_file = 'census_constants.py'


%%writefile {_census_constants_module_file}

# Features with string data types that will be converted to indices
VOCAB_FEATURE_DICT = {
    'education': 16, 'marital-status': 7, 'occupation': 15, 'race': 5, 
    'relationship': 6, 'workclass': 9, 'sex': 2, 'native-country': 42
}

# Numerical features that are marked as continuous
NUMERIC_FEATURE_KEYS = ['fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']

# Feature that can be grouped into buckets
BUCKET_FEATURE_DICT = {'age': 4}

# Number of out-of-vocabulary buckets
NUM_OOV_BUCKETS = 5

# Feature that the model will predict
LABEL_KEY = 'label'

Writing census_constants.py


# Set the transform module filename
_census_transform_module_file = 'census_transform.py'


%%writefile {_census_transform_module_file}

import tensorflow as tf
import tensorflow_transform as tft

# import constants from cells above
import census_constants

# Unpack the contents of the constants module
_NUMERIC_FEATURE_KEYS = census_constants.NUMERIC_FEATURE_KEYS
_VOCAB_FEATURE_DICT = census_constants.VOCAB_FEATURE_DICT
_BUCKET_FEATURE_DICT = census_constants.BUCKET_FEATURE_DICT
_NUM_OOV_BUCKETS = census_constants.NUM_OOV_BUCKETS
_LABEL_KEY = census_constants.LABEL_KEY

# Define the transformations
def preprocessing_fn(inputs):
    """tf.transform's callback function for preprocessing inputs.
    Args:
        inputs: map from feature keys to raw not-yet-transformed features.
    Returns:
        Map from string feature key to transformed feature operations.
    """

    # Initialize outputs dictionary
    outputs = {}

    # Scale these features to the range [0,1]
    for key in _NUMERIC_FEATURE_KEYS:
        scaled = tft.scale_to_0_1(inputs[key])
        outputs[key] = tf.reshape(scaled, [-1])

    # Convert strings to indices and convert to one-hot vectors
    for key, vocab_size in _VOCAB_FEATURE_DICT.items():
        indices = tft.compute_and_apply_vocabulary(inputs[key], num_oov_buckets=_NUM_OOV_BUCKETS)
        one_hot = tf.one_hot(indices, vocab_size + _NUM_OOV_BUCKETS)
        outputs[key] = tf.reshape(one_hot, [-1, vocab_size + _NUM_OOV_BUCKETS])

    # Bucketize this feature and convert to one-hot vectors
    for key, num_buckets in _BUCKET_FEATURE_DICT.items():
        indices = tft.bucketize(inputs[key], num_buckets)
        one_hot = tf.one_hot(indices, num_buckets)
        outputs[key] = tf.reshape(one_hot, [-1, num_buckets])

    # Cast label to float
    outputs[_LABEL_KEY] = tf.cast(inputs[_LABEL_KEY], tf.float32)

    return outputs

Writing census_transform.py


# Run the Transform component
transform = tfx.components.Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_gen.outputs['schema'],
    module_file=os.path.abspath(_census_transform_module_file))
context.run(transform, enable_cache=False)

WARNING:root:This output type hint will be ignored and not used for type-checking purposes. Typically, output type hints for a PTransform are single (or nested) types wrapped by a PCollection, PDone, or None. Got: Tuple[Dict[str, Union[NoneType, _Dataset]], Union[Dict[str, Dict[str, PCollection]], NoneType], int] instead.
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_1/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_2/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_3/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_4/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_5/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_6/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_7/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_1/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_2/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_3/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_4/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_5/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_6/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:absl:Tables initialized inside a tf.function  will be re-initialized on every invocation of the function. This  re-initialization can have significant impact on performance. Consider lifting  them out of the graph context using  `tf.init_scope`.: compute_and_apply_vocabulary_7/apply_vocab/text_file_init/InitializeTableFromTextFileV2
WARNING:root:This output type hint will be ignored and not used for type-checking purposes. Typically, output type hints for a PTransform are single (or nested) types wrapped by a PCollection, PDone, or None. Got: Tuple[Dict[str, Union[NoneType, _Dataset]], Union[Dict[str, Dict[str, PCollection]], NoneType], int] instead.
WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.


# Get the URI of the output artifact representing the transformed examples
train_uri = os.path.join(transform.outputs['transformed_examples'].get()[0].uri, 'Split-train')

# Get the list of files in this directory (all compressed TFRecord files)
tfrecord_filenames = [os.path.join(train_uri, name)
                      for name in os.listdir(train_uri)]

# Create a `TFRecordDataset` to read these files
dataset = tf.data.TFRecordDataset(tfrecord_filenames, compression_type="GZIP")

# Decode the first record and print output
for tfrecord in dataset.take(1):
  serialized_example = tfrecord.numpy()
  example = tf.train.Example()
  example.ParseFromString(serialized_example)
  pp.pprint(example)

features {
  feature {
    key: "age"
    value {
      float_list {
        value: 0.0
        value: 0.0
        value: 1.0
        value: 0.0
      }
    }
  }
  feature {
    key: "capital-gain"
    value {
      float_list {
        value: 0.02174021676182747
      }
    }
  }
  feature {
    key: "capital-loss"
    value {
      float_list {
        value: 0.0
      }
    }
  }
  feature {
    key: "education"
    value {
      float_list {
        value: 0.0
        value: 0.0
        value: 1.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
      }
    }
  }
  feature {
    key: "education-num"
    value {
      float_list {
        value: 0.800000011920929
      }
    }
  }
  feature {
    key: "fnlwgt"
    value {
      float_list {
        value: 0.044301897287368774
      }
    }
  }
  feature {
    key: "hours-per-week"
    value {
      float_list {
        value: 0.3979591727256775
      }
    }
  }
  feature {
    key: "label"
    value {
      float_list {
        value: 0.0
      }
    }
  }
  feature {
    key: "marital-status"
    value {
      float_list {
        value: 0.0
        value: 1.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
      }
    }
  }
  feature {
    key: "native-country"
    value {
      float_list {
        value: 1.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
      }
    }
  }
  feature {
    key: "occupation"
    value {
      float_list {
        value: 0.0
        value: 0.0
        value: 0.0
        value: 1.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
      }
    }
  }
  feature {
    key: "race"
    value {
      float_list {
        value: 1.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
      }
    }
  }
  feature {
    key: "relationship"
    value {
      float_list {
        value: 0.0
        value: 1.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
      }
    }
  }
  feature {
    key: "sex"
    value {
      float_list {
        value: 1.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
      }
    }
  }
  feature {
    key: "workclass"
    value {
      float_list {
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 1.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
        value: 0.0
      }
    }
  }
}


# Get URI and list subdirectories
graph_uri = transform.outputs['transform_graph'].get()[0].uri
os.listdir(graph_uri)

['transform_fn', 'transformed_metadata', 'metadata']


# Declare trainer module file
_census_trainer_module_file = 'census_trainer.py'


%%writefile {_census_trainer_module_file}

from typing import List, Text

import tensorflow as tf
import tensorflow_transform as tft
from tensorflow_transform.tf_metadata import schema_utils

from tfx.components.trainer.fn_args_utils import DataAccessor, FnArgs
from tfx_bsl.public.tfxio import TensorFlowDatasetOptions

# import same constants from transform module
import census_constants

# Unpack the contents of the constants module
_NUMERIC_FEATURE_KEYS = census_constants.NUMERIC_FEATURE_KEYS
_VOCAB_FEATURE_DICT = census_constants.VOCAB_FEATURE_DICT
_BUCKET_FEATURE_DICT = census_constants.BUCKET_FEATURE_DICT
_NUM_OOV_BUCKETS = census_constants.NUM_OOV_BUCKETS
_LABEL_KEY = census_constants.LABEL_KEY


def _gzip_reader_fn(filenames):
  '''Load compressed dataset
  
  Args:
    filenames - filenames of TFRecords to load

  Returns:
    TFRecordDataset loaded from the filenames
  '''

  # Load the dataset. Specify the compression type since it is saved as `.gz`
  return tf.data.TFRecordDataset(filenames, compression_type='GZIP')


def _input_fn(file_pattern,
              tf_transform_output,
              num_epochs=None,
              batch_size=32) -> tf.data.Dataset:
  '''Create batches of features and labels from TF Records

  Args:
    file_pattern - List of files or patterns of file paths containing Example records.
    tf_transform_output - transform output graph
    num_epochs - Integer specifying the number of times to read through the dataset. 
            If None, cycles through the dataset forever.
    batch_size - An int representing the number of records to combine in a single batch.

  Returns:
    A dataset of dict elements, (or a tuple of dict elements and label). 
    Each dict maps feature keys to Tensor or SparseTensor objects.
  '''

  # Get post-transfrom feature spec
  transformed_feature_spec = (
      tf_transform_output.transformed_feature_spec().copy())
  
  # Create batches of data
  dataset = tf.data.experimental.make_batched_features_dataset(
      file_pattern=file_pattern,
      batch_size=batch_size,
      features=transformed_feature_spec,
      reader=_gzip_reader_fn,
      num_epochs=num_epochs,
      label_key=_LABEL_KEY
      )
  
  return dataset


def _get_serve_tf_examples_fn(model, tf_transform_output):
  """Returns a function that parses a serialized tf.Example and applies TFT."""

  # Get transformation graph
  model.tft_layer = tf_transform_output.transform_features_layer()

  @tf.function
  def serve_tf_examples_fn(serialized_tf_examples):
    """Returns the output to be used in the serving signature."""
    # Get pre-transform feature spec
    feature_spec = tf_transform_output.raw_feature_spec()

    # Pop label since serving inputs do not include the label
    feature_spec.pop(_LABEL_KEY)

    # Parse raw examples into a dictionary of tensors matching the feature spec
    parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)

    # Transform the raw examples using the transform graph
    transformed_features = model.tft_layer(parsed_features)

    # Get predictions using the transformed features
    return model(transformed_features)

  return serve_tf_examples_fn


def _build_keras_model(hidden_units: List[int] = None) -> tf.keras.Model:
  """Creates a DNN Keras model for classifying income data.

  Args:
    hidden_units: [int], the layer sizes of the DNN (input layer first).

  Returns:
    A keras Model.
  """

  # Use helper function to create the model
  model = _wide_and_deep_classifier(
      dnn_hidden_units=hidden_units or [100, 70, 50, 25])
  
  return model


def _wide_and_deep_classifier(dnn_hidden_units):
  """Build a simple keras wide and deep model using the Functional API.

  Args:
    wide_columns: Feature columns wrapped in indicator_column for wide (linear)
      part of the model.
    deep_columns: Feature columns for deep part of the model.
    dnn_hidden_units: [int], the layer sizes of the hidden DNN.

  Returns:
    A Wide and Deep Keras model
  """

  # Define input layers for numeric keys
  input_numeric = [
      tf.keras.layers.Input(name=colname, shape=(1,), dtype=tf.float32)
      for colname in _NUMERIC_FEATURE_KEYS
  ]

  # Define input layers for vocab keys
  input_categorical = [
      tf.keras.layers.Input(name=colname, shape=(vocab_size + _NUM_OOV_BUCKETS,), dtype=tf.float32)
      for colname, vocab_size in _VOCAB_FEATURE_DICT.items()
  ]

  # Define input layers for bucket key
  input_categorical += [
      tf.keras.layers.Input(name=colname, shape=(num_buckets,), dtype=tf.float32)
      for colname, num_buckets in _BUCKET_FEATURE_DICT.items()
  ]

  # Concatenate numeric inputs
  deep = tf.keras.layers.concatenate(input_numeric)

  # Create deep dense network for numeric inputs
  for numnodes in dnn_hidden_units:
    deep = tf.keras.layers.Dense(numnodes)(deep)

  # Concatenate categorical inputs
  wide = tf.keras.layers.concatenate(input_categorical)

  # Create shallow dense network for categorical inputs
  wide = tf.keras.layers.Dense(128, activation='relu')(wide)

  # Combine wide and deep networks
  combined = tf.keras.layers.concatenate([deep, wide])
                                              
  # Define output of binary classifier
  output = tf.keras.layers.Dense(
      1, activation='sigmoid')(combined)

  # Setup combined input
  input_layers = input_numeric + input_categorical

  # Create the Keras model
  model = tf.keras.Model(input_layers, output)

  # Define training parameters
  model.compile(
      loss='binary_crossentropy',
      optimizer=tf.keras.optimizers.Adam(lr=0.001),
      metrics='binary_accuracy')
  
  # Print model summary
  model.summary()

  return model


# TFX Trainer will call this function.
def run_fn(fn_args: FnArgs):
  """Defines and trains the model.
  
  Args:
    fn_args: Holds args as name/value pairs. Refer here for the complete attributes: 
    https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs#attributes
  """

  # Number of nodes in the first layer of the DNN
  first_dnn_layer_size = 100
  num_dnn_layers = 4
  dnn_decay_factor = 0.7

  # Get transform output (i.e. transform graph) wrapper
  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  # Create batches of train and eval sets
  train_dataset = _input_fn(fn_args.train_files[0], tf_transform_output, 10)
  eval_dataset = _input_fn(fn_args.eval_files[0], tf_transform_output, 10)

  # Build the model
  model = _build_keras_model(
      # Construct layers sizes with exponential decay
      hidden_units=[
          max(2, int(first_dnn_layer_size * dnn_decay_factor**i))
          for i in range(num_dnn_layers)
      ])
  
  # Callback for TensorBoard
  tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=fn_args.model_run_dir, update_freq='batch')


  # Train the model
  model.fit(
      train_dataset,
      steps_per_epoch=fn_args.train_steps,
      validation_data=eval_dataset,
      validation_steps=fn_args.eval_steps,
      callbacks=[tensorboard_callback])
  

  # Define default serving signature
  signatures = {
      'serving_default':
          _get_serve_tf_examples_fn(model,
                                    tf_transform_output).get_concrete_function(
                                        tf.TensorSpec(
                                            shape=[None],
                                            dtype=tf.string,
                                            name='examples')),
  }
  

  # Save model with signature
  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)

Writing census_trainer.py


trainer = tfx.components.Trainer(
    module_file=os.path.abspath(_census_trainer_module_file),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=tfx.proto.TrainArgs(num_steps=50),
    eval_args=tfx.proto.EvalArgs(num_steps=50))
context.run(trainer, enable_cache=False)

WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 fnlwgt (InputLayer)            [(None, 1)]          0           []                               
                                                                                                  
 education-num (InputLayer)     [(None, 1)]          0           []                               
                                                                                                  
 capital-gain (InputLayer)      [(None, 1)]          0           []                               
                                                                                                  
 capital-loss (InputLayer)      [(None, 1)]          0           []                               
                                                                                                  
 hours-per-week (InputLayer)    [(None, 1)]          0           []                               
                                                                                                  
 concatenate (Concatenate)      (None, 5)            0           ['fnlwgt[0][0]',                 
                                                                  'education-num[0][0]',          
                                                                  'capital-gain[0][0]',           
                                                                  'capital-loss[0][0]',           
                                                                  'hours-per-week[0][0]']         
                                                                                                  
 dense (Dense)                  (None, 100)          600         ['concatenate[0][0]']            
                                                                                                  
 dense_1 (Dense)                (None, 70)           7070        ['dense[0][0]']                  
                                                                                                  
 education (InputLayer)         [(None, 21)]         0           []                               
                                                                                                  
 marital-status (InputLayer)    [(None, 12)]         0           []                               
                                                                                                  
 occupation (InputLayer)        [(None, 20)]         0           []                               
                                                                                                  
 race (InputLayer)              [(None, 10)]         0           []                               
                                                                                                  
 relationship (InputLayer)      [(None, 11)]         0           []                               
                                                                                                  
 workclass (InputLayer)         [(None, 14)]         0           []                               
                                                                                                  
 sex (InputLayer)               [(None, 7)]          0           []                               
                                                                                                  
 native-country (InputLayer)    [(None, 47)]         0           []                               
                                                                                                  
 age (InputLayer)               [(None, 4)]          0           []                               
                                                                                                  
 dense_2 (Dense)                (None, 48)           3408        ['dense_1[0][0]']                
                                                                                                  
 concatenate_1 (Concatenate)    (None, 146)          0           ['education[0][0]',              
                                                                  'marital-status[0][0]',         
                                                                  'occupation[0][0]',             
                                                                  'race[0][0]',                   
                                                                  'relationship[0][0]',           
                                                                  'workclass[0][0]',              
                                                                  'sex[0][0]',                    
                                                                  'native-country[0][0]',         
                                                                  'age[0][0]']                    
                                                                                                  
 dense_3 (Dense)                (None, 34)           1666        ['dense_2[0][0]']                
                                                                                                  
 dense_4 (Dense)                (None, 128)          18816       ['concatenate_1[0][0]']          
                                                                                                  
 concatenate_2 (Concatenate)    (None, 162)          0           ['dense_3[0][0]',                
                                                                  'dense_4[0][0]']                
                                                                                                  
 dense_5 (Dense)                (None, 1)            163         ['concatenate_2[0][0]']          
                                                                                                  
==================================================================================================
Total params: 31,723
Trainable params: 31,723
Non-trainable params: 0
__________________________________________________________________________________________________

/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/adam.py:105: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
  super(Adam, self).__init__(name, **kwargs)

50/50 [==============================] - 2s 22ms/step - loss: 0.4721 - binary_accuracy: 0.7725 - val_loss: 0.4041 - val_binary_accuracy: 0.8275


# Get `model` output of the component
model_artifact_dir = trainer.outputs['model'].get()[0].uri

# List top-level directory
pp.pprint(os.listdir(model_artifact_dir))

# Get model directory
model_dir = os.path.join(model_artifact_dir, 'Format-Serving')

# List subdirectories
pp.pprint(os.listdir(model_dir))

['Format-Serving']
['saved_model.pb', 'assets', 'keras_metadata.pb', 'variables']


# Get `model_run` output URI
model_run_artifact_dir = trainer.outputs['model_run'].get()[0].uri

# Load results to Tensorboard
%load_ext tensorboard
%tensorboard --logdir {model_run_artifact_dir}


import tensorflow_model_analysis as tfma
from google.protobuf import text_format

eval_config = text_format.Parse("""
  ## Model information
  model_specs {
    # This assumes a serving model with signature 'serving_default'.
    signature_name: "serving_default",
    label_key: "label"
  }

  ## Post training metric information
  metrics_specs {
    metrics { class_name: "ExampleCount" }
    metrics {
      class_name: "BinaryAccuracy"
      threshold {
        # Ensure that metric is always > 0.5
        value_threshold {
          lower_bound { value: 0.5 }
        }
        # Ensure that metric does not drop by more than a small epsilon
        # e.g. (candidate - baseline) > -1e-10 or candidate > baseline - 1e-10
        change_threshold {
          direction: HIGHER_IS_BETTER
          absolute { value: -1e-10 }
        }
      }
    }
    metrics { class_name: "BinaryCrossentropy" }
    metrics { class_name: "AUC" }
    metrics { class_name: "AUCPrecisionRecall" }
    metrics { class_name: "Precision" }
    metrics { class_name: "Recall" }
    metrics { class_name: "MeanLabel" }
    metrics { class_name: "MeanPrediction" }
    metrics { class_name: "Calibration" }
    metrics { class_name: "CalibrationPlot" }
    metrics { class_name: "ConfusionMatrixPlot" }
    # ... add additional metrics and plots ...
  }

  ## Slicing information
  slicing_specs {}  # overall slice
  slicing_specs {
    feature_keys: ["race"]
  }
  slicing_specs {
    feature_keys: ["sex"]
  }
""", tfma.EvalConfig())


# Setup the Resolver node to find the latest blessed model
model_resolver = tfx.dsl.Resolver(
      strategy_class=tfx.dsl.experimental.LatestBlessedModelStrategy,
      model=tfx.dsl.Channel(type=tfx.types.standard_artifacts.Model),
      model_blessing=tfx.dsl.Channel(
          type=tfx.types.standard_artifacts.ModelBlessing)).with_id(
              'latest_blessed_model_resolver')

# Run the Resolver node
context.run(model_resolver)


# Load Resolver outputs
model_resolver.outputs['model']


# Setup and run the Evaluator component
evaluator = tfx.components.Evaluator(
    examples=example_gen.outputs['examples'],
    model=trainer.outputs['model'],
    baseline_model=model_resolver.outputs['model'],
    eval_config=eval_config)
context.run(evaluator, enable_cache=False)

WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.
/usr/local/lib/python3.7/dist-packages/tensorflow_model_analysis/metrics/confusion_matrix_metrics.py:482: RuntimeWarning: invalid value encountered in true_divide
  prec_slope = dtp / np.maximum(dp, 0)
/usr/local/lib/python3.7/dist-packages/tensorflow_model_analysis/metrics/confusion_matrix_metrics.py:486: RuntimeWarning: divide by zero encountered in true_divide
  p[:num_thresholds - 1] / np.maximum(p[1:], 0), np.ones_like(p[1:]))
/usr/local/lib/python3.7/dist-packages/tensorflow_model_analysis/metrics/confusion_matrix_metrics.py:486: RuntimeWarning: invalid value encountered in true_divide
  p[:num_thresholds - 1] / np.maximum(p[1:], 0), np.ones_like(p[1:]))


# Print component output keys
evaluator.outputs.keys()

dict_keys(['evaluation', 'blessing'])


# Get `Evaluator` blessing output URI
blessing_uri = evaluator.outputs['blessing'].get()[0].uri

# List files under URI
os.listdir(blessing_uri)

['BLESSED']


# Visualize the evaluation results
context.show(evaluator.outputs['evaluation'])


import tensorflow_model_analysis as tfma

# Get the TFMA output result path and load the result.
PATH_TO_RESULT = evaluator.outputs['evaluation'].get()[0].uri
tfma_result = tfma.load_eval_result(PATH_TO_RESULT)

# Show data sliced along feature column trip_start_hour.
tfma.view.render_slicing_metrics(
    tfma_result, slicing_column='sex')


# Get `evaluation` output URI
PATH_TO_RESULT = evaluator.outputs['evaluation'].get()[0].uri

# Print validation result
print(tfma.load_validation_result(PATH_TO_RESULT))

validation_ok: true
validation_details {
  slicing_details {
    slicing_spec {
    }
    num_matching_slices: 8
  }
}


# Re-run the Resolver component
context.run(model_resolver)


# Get path to latest blessed model
model_resolver.outputs['model'].get()[0].uri

'./pipeline/Trainer/model/6'


# Setup trainer to train with more epochs
trainer = tfx.components.Trainer(
    module_file=os.path.abspath(_census_trainer_module_file),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=tfx.proto.TrainArgs(num_steps=500),
    eval_args=tfx.proto.EvalArgs(num_steps=200))

# Run trainer
context.run(trainer, enable_cache=False)

WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE
WARNING:absl:Examples artifact does not have payload_format custom property. Falling back to FORMAT_TF_EXAMPLE

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 fnlwgt (InputLayer)            [(None, 1)]          0           []                               
                                                                                                  
 education-num (InputLayer)     [(None, 1)]          0           []                               
                                                                                                  
 capital-gain (InputLayer)      [(None, 1)]          0           []                               
                                                                                                  
 capital-loss (InputLayer)      [(None, 1)]          0           []                               
                                                                                                  
 hours-per-week (InputLayer)    [(None, 1)]          0           []                               
                                                                                                  
 concatenate_3 (Concatenate)    (None, 5)            0           ['fnlwgt[0][0]',                 
                                                                  'education-num[0][0]',          
                                                                  'capital-gain[0][0]',           
                                                                  'capital-loss[0][0]',           
                                                                  'hours-per-week[0][0]']         
                                                                                                  
 dense_6 (Dense)                (None, 100)          600         ['concatenate_3[0][0]']          
                                                                                                  
 dense_7 (Dense)                (None, 70)           7070        ['dense_6[0][0]']                
                                                                                                  
 education (InputLayer)         [(None, 21)]         0           []                               
                                                                                                  
 marital-status (InputLayer)    [(None, 12)]         0           []                               
                                                                                                  
 occupation (InputLayer)        [(None, 20)]         0           []                               
                                                                                                  
 race (InputLayer)              [(None, 10)]         0           []                               
                                                                                                  
 relationship (InputLayer)      [(None, 11)]         0           []                               
                                                                                                  
 workclass (InputLayer)         [(None, 14)]         0           []                               
                                                                                                  
 sex (InputLayer)               [(None, 7)]          0           []                               
                                                                                                  
 native-country (InputLayer)    [(None, 47)]         0           []                               
                                                                                                  
 age (InputLayer)               [(None, 4)]          0           []                               
                                                                                                  
 dense_8 (Dense)                (None, 48)           3408        ['dense_7[0][0]']                
                                                                                                  
 concatenate_4 (Concatenate)    (None, 146)          0           ['education[0][0]',              
                                                                  'marital-status[0][0]',         
                                                                  'occupation[0][0]',             
                                                                  'race[0][0]',                   
                                                                  'relationship[0][0]',           
                                                                  'workclass[0][0]',              
                                                                  'sex[0][0]',                    
                                                                  'native-country[0][0]',         
                                                                  'age[0][0]']                    
                                                                                                  
 dense_9 (Dense)                (None, 34)           1666        ['dense_8[0][0]']                
                                                                                                  
 dense_10 (Dense)               (None, 128)          18816       ['concatenate_4[0][0]']          
                                                                                                  
 concatenate_5 (Concatenate)    (None, 162)          0           ['dense_9[0][0]',                
                                                                  'dense_10[0][0]']               
                                                                                                  
 dense_11 (Dense)               (None, 1)            163         ['concatenate_5[0][0]']          
                                                                                                  
==================================================================================================
Total params: 31,723
Trainable params: 31,723
Non-trainable params: 0
__________________________________________________________________________________________________

/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/adam.py:105: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
  super(Adam, self).__init__(name, **kwargs)

500/500 [==============================] - 2s 4ms/step - loss: 0.3543 - binary_accuracy: 0.8369 - val_loss: 0.3112 - val_binary_accuracy: 0.8520


# Setup and run the Evaluator component
evaluator = tfx.components.Evaluator(
    examples=example_gen.outputs['examples'],
    model=trainer.outputs['model'],
    baseline_model=model_resolver.outputs['model'],
    eval_config=eval_config)
context.run(evaluator, enable_cache=False)

WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.
/usr/local/lib/python3.7/dist-packages/tensorflow_model_analysis/metrics/confusion_matrix_metrics.py:482: RuntimeWarning: invalid value encountered in true_divide
  prec_slope = dtp / np.maximum(dp, 0)
/usr/local/lib/python3.7/dist-packages/tensorflow_model_analysis/metrics/confusion_matrix_metrics.py:486: RuntimeWarning: divide by zero encountered in true_divide
  p[:num_thresholds - 1] / np.maximum(p[1:], 0), np.ones_like(p[1:]))
/usr/local/lib/python3.7/dist-packages/tensorflow_model_analysis/metrics/confusion_matrix_metrics.py:486: RuntimeWarning: invalid value encountered in true_divide
  p[:num_thresholds - 1] / np.maximum(p[1:], 0), np.ones_like(p[1:]))


# Re-run the resolver
context.run(model_resolver, enable_cache=False)


# Get path to latest blessed model
model_resolver.outputs['model'].get()[0].uri

'./pipeline/Trainer/model/10'


context.show(evaluator.outputs['evaluation'])

WARNING:absl:Fail to find plots for model name: None . Available model names are [candidate, baseline]
WARNING:absl:Fail to find plots for model name: None . Available model names are [candidate, baseline]
WARNING:absl:Fail to find plots for model name: None . Available model names are [candidate, baseline]
WARNING:absl:Fail to find plots for model name: None . Available model names are [candidate, baseline]
WARNING:absl:Fail to find plots for model name: None . Available model names are [candidate, baseline]
WARNING:absl:Fail to find plots for model name: None . Available model names are [candidate, baseline]
WARNING:absl:Fail to find plots for model name: None . Available model names are [candidate, baseline]
WARNING:absl:Fail to find plots for model name: None . Available model names are [candidate, baseline]

	Values
Domain
'education'	'10th', '11th', '12th', '1st-4th', '5th-6th', '7th-8th', '9th', 'Assoc-acdm', 'Assoc-voc', 'Bachelors', 'Doctorate', 'HS-grad', 'Masters', 'Preschool', 'Prof-school', 'Some-college'
'marital-status'	'Divorced', 'Married-AF-spouse', 'Married-civ-spouse', 'Married-spouse-absent', 'Never-married', 'Separated', 'Widowed'
'native-country'	'?', 'Cambodia', 'Canada', 'China', 'Columbia', 'Cuba', 'Dominican-Republic', 'Ecuador', 'El-Salvador', 'England', 'France', 'Germany', 'Greece', 'Guatemala', 'Haiti', 'Holand-Netherlands', 'Honduras', 'Hong', 'Hungary', 'India', 'Iran', 'Ireland', 'Italy', 'Jamaica', 'Japan', 'Laos', 'Mexico', 'Nicaragua', 'Outlying-US(Guam-USVI-etc)', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Puerto-Rico', 'Scotland', 'South', 'Taiwan', 'Thailand', 'Trinadad&Tobago', 'United-States', 'Vietnam', 'Yugoslavia'
'occupation'	'?', 'Adm-clerical', 'Armed-Forces', 'Craft-repair', 'Exec-managerial', 'Farming-fishing', 'Handlers-cleaners', 'Machine-op-inspct', 'Other-service', 'Priv-house-serv', 'Prof-specialty', 'Protective-serv', 'Sales', 'Tech-support', 'Transport-moving'
'race'	'Amer-Indian-Eskimo', 'Asian-Pac-Islander', 'Black', 'Other', 'White'
'relationship'	'Husband', 'Not-in-family', 'Other-relative', 'Own-child', 'Unmarried', 'Wife'
'sex'	'Female', 'Male'
'workclass'	'?', 'Federal-gov', 'Local-gov', 'Never-worked', 'Private', 'Self-emp-inc', 'Self-emp-not-inc', 'State-gov', 'Without-pay'

Ungraded Lab: Model Analysis with TFX Evaluator¶

Setup¶

Install TFX¶

Imports¶

Set up pipeline paths¶

Download and prepare the dataset¶

TFX Pipeline¶

Create the InteractiveContext¶

ExampleGen¶

StatisticsGen¶

SchemaGen¶

ExampleValidator¶

No anomalies found.

No anomalies found.

Transform¶

Trainer¶

Evaluator¶

Define EvalConfig¶

Resolve latest blessed model¶

Run the Evaluator component¶

Comparing two models¶

	Type	Presence	Domain
Feature name
'age'	INT	required	-
'capital-gain'	INT	required	-
'capital-loss'	INT	required	-
'education'	STRING	required	'education'
'education-num'	INT	required	-
'fnlwgt'	INT	required	-
'hours-per-week'	INT	required	-
'label'	INT	required	-
'marital-status'	STRING	required	'marital-status'
'native-country'	STRING	required	'native-country'
'occupation'	STRING	required	'occupation'
'race'	STRING	required	'race'
'relationship'	STRING	required	'relationship'
'sex'	STRING	required	'sex'
'workclass'	STRING	required	'workclass'