Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion _config.yml

This file was deleted.

12 changes: 7 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ dependencies = [
"fastparquet>=0.8.1",
"ipywidgets>=8.1.0",
"lightgbm>=3.3.2",
"numpy>=1.19.0",
"numpy>=1.19.0,<=1.26.4",
"pandas>=1.1.0,<3.0.0",
"pydantic>=1.8.2,<2.0.0",
"pydantic>1.0.0,<3.0.0",
"pyjwt>=2.8.0",
"python-dateutil>=2.8.0",
"python-json-logger>=2.0.2",
Expand Down Expand Up @@ -79,7 +79,7 @@ python = "3.10"
cov = 'pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=upgini --cov=tests'
format = "black {args}"
lint = "ruff check {args}"
test_binary = 'pytest -s -vv tests/test_binary_dataset.py'
test_all = 'pytest -s -vv tests'

[[tool.hatch.envs.test.matrix]]
python = ["3.8"]
Expand All @@ -103,7 +103,8 @@ dependencies = [
# "pytest-timeout",
"requests-mock",
"pytest-datafiles",
"pandas~={matrix:pandas}.0",
"pytest-xdist",
"pandas~={matrix:pandas}",
]

[tool.black]
Expand All @@ -115,4 +116,5 @@ profile = "black"
[tool.pytest.ini_options]
pythonpath = [
"./src"
]
]
addopts="-n 4"
7 changes: 4 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ python-dateutil>=2.8.0
requests>=2.8.0
# pandas>=1.1.0,<2.0.0
pandas>=1.1.0,<3.0.0
numpy>=1.19.0
numpy>=1.19.0,<=1.26.4
scikit-learn>=1.3.0
pydantic>=1.8.2,<2.0.0
pydantic>1.0.0,<3.0.0
fastparquet>=0.8.1
python-json-logger>=2.0.2
catboost>=1.0.3
Expand All @@ -30,4 +30,5 @@ unittest-xml-reporting
pytest-parallel
py
build
twine
twine
pytest-xdist
2 changes: 1 addition & 1 deletion src/upgini/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.317"
__version__ = "1.2.0"
142 changes: 71 additions & 71 deletions src/upgini/autofe/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,32 @@


class Min(PandasOperand):
name = "min"
is_binary = True
is_symmetrical = True
has_symmetry_importance = True
name: str = "min"
is_binary: bool = True
is_symmetrical: bool = True
has_symmetry_importance: bool = True

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
return np.minimum(left, right)


class Max(PandasOperand):
name = "max"
is_binary = True
is_symmetrical = True
has_symmetry_importance = True
name: str = "max"
is_binary: bool = True
is_symmetrical: bool = True
has_symmetry_importance: bool = True

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
return np.maximum(left, right)


class Add(PandasOperand, VectorizableMixin):
name = "+"
alias = "add"
is_binary = True
is_symmetrical = True
has_symmetry_importance = True
is_vectorizable = True
name: str = "+"
alias: str = "add"
is_binary: bool = True
is_symmetrical: bool = True
has_symmetry_importance: bool = True
is_vectorizable: bool = True

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
return left + right
Expand All @@ -48,12 +48,12 @@ def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:


class Subtract(PandasOperand, VectorizableMixin):
name = "-"
alias = "sub"
is_binary = True
is_symmetrical = True
has_symmetry_importance = True
is_vectorizable = True
name: str = "-"
alias: str = "sub"
is_binary: bool = True
is_symmetrical: bool = True
has_symmetry_importance: bool = True
is_vectorizable: bool = True

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
return left - right
Expand All @@ -67,12 +67,12 @@ def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:


class Multiply(PandasOperand, VectorizableMixin):
name = "*"
alias = "mul"
is_binary = True
is_symmetrical = True
has_symmetry_importance = True
is_vectorizable = True
name: str = "*"
alias: str = "mul"
is_binary: bool = True
is_symmetrical: bool = True
has_symmetry_importance: bool = True
is_vectorizable: bool = True

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
return left * right
Expand All @@ -86,12 +86,12 @@ def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:


class Divide(PandasOperand, VectorizableMixin):
name = "/"
alias = "div"
is_binary = True
has_symmetry_importance = True
is_vectorizable = True
output_type = "float"
name: str = "/"
alias: str = "div"
is_binary: bool = True
has_symmetry_importance: bool = True
is_vectorizable: bool = True
output_type: Optional[str] = "float"

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
return left / right.replace(0, np.nan)
Expand All @@ -105,10 +105,10 @@ def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:


class Combine(PandasOperand):
name = "Combine"
is_binary = True
has_symmetry_importance = True
output_type = "object"
name: str = "Combine"
is_binary: bool = True
has_symmetry_importance: bool = True
output_type: Optional[str] = "object"

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
temp = left.astype(str) + "_" + right.astype(str)
Expand All @@ -117,13 +117,13 @@ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:


class CombineThenFreq(PandasOperand):
name = "CombineThenFreq"
is_binary = True
is_symmetrical = True
has_symmetry_importance = True
output_type = "float"
is_distribution_dependent = True
input_type = "discrete"
name: str = "CombineThenFreq"
is_binary: bool = True
is_symmetrical: bool = True
has_symmetry_importance: bool = True
output_type: Optional[str] = "float"
is_distribution_dependent: bool = True
input_type: Optional[str] = "discrete"

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
temp = left.astype(str) + "_" + right.astype(str)
Expand All @@ -133,11 +133,11 @@ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:


class Distance(PandasOperand):
name = "dist"
is_binary = True
output_type = "float"
is_symmetrical = True
has_symmetry_importance = True
name: str = "dist"
is_binary: bool = True
output_type: Optional[str] = "float"
is_symmetrical: bool = True
has_symmetry_importance: bool = True

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
return pd.Series(
Expand All @@ -158,11 +158,11 @@ def __norm(self, vector: pd.Series) -> pd.Series:

# Left for backward compatibility
class Sim(Distance):
name = "sim"
is_binary = True
output_type = "float"
is_symmetrical = True
has_symmetry_importance = True
name: str = "sim"
is_binary: bool = True
output_type: Optional[str] = "float"
is_symmetrical: bool = True
has_symmetry_importance: bool = True

def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
return 1 - super().calculate_binary(left, right)
Expand Down Expand Up @@ -191,12 +191,12 @@ def _similarity(self, left: str, right: str) -> float:


class JaroWinklerSim1(StringSim):
name = "sim_jw1"
is_binary = True
input_type = "string"
output_type = "float"
is_symmetrical = True
has_symmetry_importance = True
name: str = "sim_jw1"
is_binary: bool = True
input_type: Optional[str] = "string"
output_type: Optional[str] = "float"
is_symmetrical: bool = True
has_symmetry_importance: bool = True

def _prepare_value(self, value: Optional[str]) -> Optional[str]:
return value
Expand All @@ -206,12 +206,12 @@ def _similarity(self, left: str, right: str) -> float:


class JaroWinklerSim2(StringSim):
name = "sim_jw2"
is_binary = True
input_type = "string"
output_type = "float"
is_symmetrical = True
has_symmetry_importance = True
name: str = "sim_jw2"
is_binary: bool = True
input_type: Optional[str] = "string"
output_type: Optional[str] = "float"
is_symmetrical: bool = True
has_symmetry_importance: bool = True

def _prepare_value(self, value: Optional[str]) -> Optional[str]:
return value[::-1] if value is not None else None
Expand All @@ -221,12 +221,12 @@ def _similarity(self, left: str, right: str) -> float:


class LevenshteinSim(StringSim):
name = "sim_lv"
is_binary = True
input_type = "string"
output_type = "float"
is_symmetrical = True
has_symmetry_importance = True
name: str = "sim_lv"
is_binary: bool = True
input_type: Optional[str] = "string"
output_type: Optional[str] = "float"
is_symmetrical: bool = True
has_symmetry_importance: bool = True

def _prepare_value(self, value: Optional[str]) -> Optional[str]:
return value
Expand Down
Loading