Skip to content

Commit 9773685

Browse files
authored
Additional validations (#116)
1 parent 6bd1b15 commit 9773685

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def send_log(msg: str):
3939
send_log("Start setup PyLib")
4040
setup(
4141
name="upgini",
42-
version="1.1.15",
42+
version="1.1.16",
4343
description="Low-code feature search and enrichment library for machine learning",
4444
long_description=(here / "README.md").read_text(encoding="utf-8"),
4545
long_description_content_type="text/markdown",

src/upgini/features_enricher.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -703,18 +703,26 @@ def __inner_fit(
703703

704704
if isinstance(y, pd.Series):
705705
y_array = y.values
706-
else:
706+
elif isinstance(y, np.ndarray):
707707
y_array = y
708+
else:
709+
y_array = np.array(y)
710+
711+
if len(np.unique(y_array)) < 2:
712+
raise ValueError("y is a constant, please check your training dataset")
708713

709714
if X.shape[0] != len(y_array):
710715
raise ValueError("X and y should be the same size")
711716

717+
if len(set(X.columns)) != len(X.columns):
718+
raise ValueError("X contains duplicating columns names, please check your training dataset")
719+
712720
self.__prepare_search_keys(X)
713721

714722
df: pd.DataFrame = X.copy() # type: ignore
715723
df[self.TARGET_NAME] = y_array
716724

717-
self.logger.info(f"First dataset row:\n{df.head(1)}")
725+
self.logger.info(f"First 10 rows of the dataset:\n{df.head(10)}")
718726

719727
df = self.__handle_index_search_keys(df)
720728

0 commit comments

Comments
 (0)