Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ python:
dist: xenial

python:
- "2.7"
- "3.5"
- "3.6"

Expand Down
6 changes: 5 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,16 @@ share, let us know.
Dependencies, installation and license
--------------------------------------

Install the Python Record Linkage Toolkit easily with pip
The Python Record linkage Toolkit requires Python 3.5 or higher (since version
>= 0.14). Install the package easily with pip

.. code:: sh

pip install recordlinkage

Python 2.7 users can use version <= 0.13, but it is advised to use Python >=
3.5.

The toolkit depends on Pandas_ (>=18.0), Numpy_, `Scikit-learn`_, Scipy_ and
Jellyfish_. You probably have most of them already installed. The package
``jellyfish`` is used for approximate string comparing and string encoding.
Expand Down
9 changes: 6 additions & 3 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@ documentation_.
Installation
============

The easiest way of installing the Python Record Linkage Toolkit is using
``pip``. It is as easy as typing:
The Python Record linkage Toolkit requires Python 3.5 or higher (since version
>= 0.14). Install the package easily with pip

.. code:: sh

pip install --user recordlinkage
pip install recordlinkage

Python 2.7 users can use version <= 0.13, but it is advised to use Python >=
3.5.

You can also clone the project on Github. The license of this record linkage
package is BSD-3-Clause.
Expand Down
3 changes: 0 additions & 3 deletions recordlinkage/algorithms/string.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from __future__ import division
from __future__ import unicode_literals

import warnings

import jellyfish
Expand Down
5 changes: 1 addition & 4 deletions recordlinkage/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

import pandas

import six

from recordlinkage import rl_logging as logging
import recordlinkage.config as cf
from recordlinkage.utils import (listify,
Expand All @@ -28,7 +26,6 @@
from recordlinkage.types import (is_numpy_like,
is_pandas_2d_multiindex)
from recordlinkage.measures import max_pairs

from recordlinkage.utils import DeprecationHelper, LearningError


Expand Down Expand Up @@ -874,7 +871,7 @@ def clear_memory(self):
raise AttributeError("this method was removed in version 0.12.0")


class BaseClassifier(six.with_metaclass(ABCMeta)):
class BaseClassifier(metaclass=ABCMeta):
"""Base class for classification of records pairs.

This class contains methods for training the classifier.
Expand Down
3 changes: 0 additions & 3 deletions recordlinkage/compare.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from __future__ import division
from __future__ import unicode_literals

from functools import partial

import numpy
Expand Down
24 changes: 15 additions & 9 deletions recordlinkage/datasets/external.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
from io import BytesIO
from pathlib import Path
from urllib.request import urlopen
import zipfile

import pandas

from six import BytesIO
from six.moves.urllib.request import urlopen


def load_krebsregister(block=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
Expand Down Expand Up @@ -68,10 +68,13 @@ def load_krebsregister(block=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
# If the data is not found, download it.
for i in range(1, 11):

filepath = os.path.join(os.path.dirname(__file__),
'krebsregister', 'block_{}.zip'.format(i))
filepath = Path(
Path(__file__).parent,
'krebsregister',
'block_{}.zip'.format(i)
)

if not os.path.exists(filepath):
if not filepath.is_file():
_download_krebsregister()
break

Expand Down Expand Up @@ -105,7 +108,7 @@ def _download_krebsregister():

# unzip the content and put it in the krebsregister folder
z = zipfile.ZipFile(BytesIO(r))
z.extractall(os.path.join(os.path.dirname(__file__), 'krebsregister'))
z.extractall(str(Path(Path(__file__).parent, 'krebsregister')))

print("Data download succesfull.")

Expand All @@ -120,8 +123,11 @@ def _krebsregister_block(block):
"Argument 'block' has to be integer in "
"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] or list of integers.")

fp_i = os.path.join(os.path.dirname(__file__),
'krebsregister', 'block_{}.zip'.format(block))
fp_i = Path(
Path(__file__).parent,
'krebsregister',
'block_{}.zip'.format(block)
)

data_block = pandas.read_csv(
fp_i,
Expand Down
8 changes: 6 additions & 2 deletions recordlinkage/datasets/febrl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path

import pandas
import numpy
Expand All @@ -7,7 +7,11 @@
def _febrl_load_data(filename):
# Internal function for loading febrl data

filepath = os.path.join(os.path.dirname(__file__), 'febrl', filename)
filepath = Path(
Path(__file__).parent,
'febrl',
filename
)

febrl_data = pandas.read_csv(filepath,
index_col="rec_id",
Expand Down
2 changes: 0 additions & 2 deletions recordlinkage/index.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import division

import warnings

import numpy
Expand Down
2 changes: 0 additions & 2 deletions recordlinkage/measures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# measures.py

from __future__ import division

import numpy

import pandas
Expand Down
4 changes: 0 additions & 4 deletions recordlinkage/preprocessing/cleaning.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
from __future__ import absolute_import
from __future__ import division
# from __future__ import unicode_literals

import sys

from sklearn.feature_extraction.text import strip_accents_ascii, \
Expand Down
11 changes: 4 additions & 7 deletions recordlinkage/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,24 @@

import pandas

from six import binary_type, string_types, text_type

string_and_binary_types = (string_types,) + (binary_type,)
string_and_binary_types = (str, bytes)


def is_number(obj):
return isinstance(obj, (Number, numpy.number))


def is_string_like(obj):
return isinstance(obj, (text_type, string_types))
return isinstance(obj, str)


def _iterable_not_string(x):
return (isinstance(x, collections.Iterable) and
not isinstance(x, string_types))
not isinstance(x, str))


def is_iterator(obj):
# python 3 generators have __next__ instead of next
return hasattr(obj, 'next') or hasattr(obj, '__next__')
return hasattr(obj, '__next__')


def is_re(obj):
Expand Down
3 changes: 0 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@

[bdist_wheel]
universal = 1

[versioneer]
VCS = git
style = pep440
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def read(fname):
# Github
url="https://github.com/J535D165/recordlinkage",

python_requires=">=3.5",
install_requires=[
"six>=1.10.0",
"jellyfish>=0.5.4",
"numpy>=1.13.0",
"pandas>=0.18.0",
Expand Down
2 changes: 0 additions & 2 deletions tests/test_indexing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function

import os
import tempfile
import shutil
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = py{27,35,36}-pandas{018,019,020,021,022,latest},flake8,docs
envlist = py{35,36}-pandas{018,019,020,021,022,latest},flake8,docs

[testenv]
deps=
Expand Down