// |
| name | sap-hana-ml |
| description | SAP HANA Machine Learning Python Client (hana-ml) development skill. Use when: Building ML solutions with SAP HANA's in-database machine learning using Python hana-ml library for PAL/APL algorithms, DataFrame operations, AutoML, model persistence, and visualization. Keywords: hana-ml, SAP HANA, machine learning, PAL, APL, predictive analytics, HANA DataFrame, ConnectionContext, classification, regression, clustering, time series, ARIMA, gradient boosting, AutoML, SHAP, model storage |
| license | GPL-3.0 |
| metadata | {"version":"1.1.0","last_verified":"2025-11-27T00:00:00.000Z","package_version":"2.22.241011"} |
Package Version: 2.22.241011
Last Verified: 2025-11-27
pip install hana-ml
Requirements: Python 3.8+, SAP HANA 2.0 SPS03+ or SAP HANA Cloud
from hana_ml import ConnectionContext
# Connect
conn = ConnectionContext(
address='<hostname>',
port=443,
user='<username>',
password='<password>',
encrypt=True
)
# Create DataFrame
df = conn.table('MY_TABLE', schema='MY_SCHEMA')
print(f"Shape: {df.shape}")
df.head(10).collect()
from hana_ml.algorithms.pal.unified_classification import UnifiedClassification
# Train model
clf = UnifiedClassification(func='RandomDecisionTree')
clf.fit(train_df, features=['F1', 'F2', 'F3'], label='TARGET')
# Predict & evaluate
predictions = clf.predict(test_df, features=['F1', 'F2', 'F3'])
score = clf.score(test_df, features=['F1', 'F2', 'F3'], label='TARGET')
from hana_ml.algorithms.apl.classification import AutoClassifier
# Automated classification
auto_clf = AutoClassifier()
auto_clf.fit(train_df, label='TARGET')
predictions = auto_clf.predict(test_df)
from hana_ml.model_storage import ModelStorage
ms = ModelStorage(conn)
clf.name = 'MY_CLASSIFIER'
ms.save_model(model=clf, if_exists='replace')
UnifiedClassification, UnifiedRegression, KMeans, ARIMAreferences/PAL_ALGORITHMS.md for complete listAutoClassifier, AutoRegressor, GradientBoostingClassifierreferences/APL_ALGORITHMS.md for detailscollect() calledreferences/DATAFRAME_REFERENCE.md for complete APIreferences/VISUALIZERS.md for 14 visualization modulesfrom hana_ml.algorithms.pal.partition import train_test_val_split
train, test, val = train_test_val_split(
data=df,
training_percentage=0.7,
testing_percentage=0.2,
validation_percentage=0.1
)
# APL models
importance = auto_clf.get_feature_importances()
# PAL models
from hana_ml.algorithms.pal.preprocessing import FeatureSelection
fs = FeatureSelection()
fs.fit(train_df, features=features, label='TARGET')
from hana_ml.algorithms.pal.pipeline import Pipeline
from hana_ml.algorithms.pal.preprocessing import Imputer, FeatureNormalizer
pipeline = Pipeline([
('imputer', Imputer(strategy='mean')),
('normalizer', FeatureNormalizer()),
('classifier', UnifiedClassification(func='RandomDecisionTree'))
])
collect()ModelStorage for persistencePipelineProgressStatusMonitor for long-running jobsreferences/DATAFRAME_REFERENCE.md (10,356 lines)
references/PAL_ALGORITHMS.md (18,647 lines)
references/APL_ALGORITHMS.md (12,271 lines)
references/VISUALIZERS.md (13,535 lines)
references/SUPPORTING_MODULES.md (13,692 lines)
from hana_ml.ml_exceptions import Error
try:
clf.fit(train_df, features=features, label='TARGET')
except Error as e:
print(f"HANA ML Error: {e}")