HDFS Slots#
- class onetl.connection.file_connection.hdfs.slots.HDFSSlots#
Slots that could be implemented by third-party plugins
- static normalize_cluster_name(cluster: str) str | None #
Normalize cluster name passed into HDFS constructor.
If hooks didn’t return anything, cluster name is left intact.
- Parameters:
- cluster
str
Cluster name
- cluster
- Returns:
- str | None
Normalized cluster name.
If hook cannot be applied to a specific cluster, it should return
None
.
Examples
from onetl.connection import HDFS from onetl.hooks import hook @HDFS.Slots.normalize_cluster_name.bind @hook def normalize_cluster_name(cluster: str) -> str: return cluster.lower()
- static normalize_namenode_host(host: str, cluster: str | None) str | None #
Normalize namenode host passed into HDFS constructor.
If hooks didn’t return anything, host is left intact.
- Parameters:
- host
str
Namenode host (raw)
- cluster
str
orNone
Cluster name (normalized), if set
- host
- Returns:
- str | None
Normalized namenode host name.
If hook cannot be applied to a specific host name, it should return
None
.
Examples
from onetl.connection import HDFS from onetl.hooks import hook @HDFS.Slots.normalize_namenode_host.bind @hook def normalize_namenode_host(host: str, cluster: str) -> str | None: if cluster == "rnd-dwh": if not host.endswith(".domain.com"): # fix missing domain name host += ".domain.com" return host return None
- static get_known_clusters() set[str] | None #
Return collection of known clusters.
Cluster passed into HDFS constructor should be present in this list. If hooks didn’t return anything, no validation will be performed.
- Returns:
- set[str] | None
Collection of cluster names (in normalized form).
If hook cannot be applied, it should return
None
.
Examples
from onetl.connection import HDFS from onetl.hooks import hook @HDFS.Slots.get_known_clusters.bind @hook def get_known_clusters() -> str[str]: return {"rnd-dwh", "rnd-prod"}
- static get_cluster_namenodes(cluster: str) set[str] | None #
Return collection of known namenodes for the cluster.
Namenode host passed into HDFS constructor should be present in this list. If hooks didn’t return anything, no validation will be performed.
- Parameters:
- cluster
str
Cluster name (normalized)
- cluster
- Returns:
- set[str] | None
Collection of host names (in normalized form).
If hook cannot be applied, it should return
None
.
Examples
from onetl.connection import HDFS from onetl.hooks import hook @HDFS.Slots.get_cluster_namenodes.bind @hook def get_cluster_namenodes(cluster: str) -> str[str] | None: if cluster == "rnd-dwh": return {"namenode1.domain.com", "namenode2.domain.com"} return None
- static get_current_cluster() str | None #
Get current cluster name.
Used in
get_current_cluster
to automatically fill upcluster
attribute of a connection. If hooks didn’t return anything, calling the method above will raise an exception.- Returns:
- str | None
Current cluster name (in normalized form).
If hook cannot be applied, it should return
None
.
Examples
from onetl.connection import HDFS from onetl.hooks import hook @HDFS.Slots.get_current_cluster.bind @hook def get_current_cluster() -> str: # some magic here return "rnd-dwh"
- static get_webhdfs_port(cluster: str) int | None #
Get WebHDFS port number for a specific cluster.
Used by constructor to automatically set port number if omitted.
- Parameters:
- cluster
str
Cluster name (normalized)
- cluster
- Returns:
- int | None
WebHDFS port number.
If hook cannot be applied, it should return
None
.
Examples
from onetl.connection import HDFS from onetl.hooks import hook @HDFS.Slots.get_webhdfs_port.bind @hook def get_webhdfs_port(cluster: str) -> int | None: if cluster == "rnd-dwh": return 50007 # Cloudera return None
- static is_namenode_active(host: str, cluster: str | None) bool | None #
Check whether a namenode of a specified cluster is active (=not standby) or not.
- Used for:
If HDFS connection is created without
host
Connector will iterate over
get_cluster_namenodes
of a cluster to get active namenode, and then use it instead ofhost
attribute.If HDFS connection is created with
host
check
will determine whether this host is active.
- Parameters:
- host
str
Namenode host (normalized)
- cluster
str
orNone
Cluster name (normalized), if set
- host
- Returns:
- bool | None
True
if namenode is active,False
if not.If hook cannot be applied, it should return
None
.
Examples
from onetl.connection import HDFS from onetl.hooks import hook @HDFS.Slots.is_namenode_active.bind @hook def is_namenode_active(host: str, cluster: str | None) -> bool: # some magic here return True