Benefits of object oriented design:
Variables prefixed with an underscore (self._name
) is a convention to indicate that the instance variable is private.
class Location:
def __init__(self, name, longitude, latitude):
self._name = name.upper() # Names are always uppercase
...
@property
def name(self):
return self._name
@name.setter
def name(self, value):
self._name = value.upper()
>>> loc = Location("Antwerp", 4.42, 51.22)
>>> loc.name = "Antwerpen"
>>> loc.name
"ANTWERPEN" 😊
Composition in object oriented design is a way to combine objects or data types into more complex objects.
class Grid:
def __init__(self, nx, dx, ny, dy):
self.nx = nx
self.dx = dx
self.ny = ny
self.dy = dy
def find_index(self, x,y):
...
class DataArray:
def __init__(self, data, time, item, geometry):
self.data = data
self.time = time
self.item = item
self.geometry = geometry
def plot(self):
...
DataArray
has a geometry
(e.g. Grid
) and an item
(ItemInfo
).
GeometryFM3D
inherits from _GeometryFMLayered
, it is a _GeometryFMLayered
.
C#
mypy
)sum()
allows us to operate on a higher level of abstraction.__len__
it is a Sized
object.__contains__
it is a Container
object.__iter__
it is a Iterable
object.If you want your code to be Pythonic, you have to be familiar with these types and their methods.
Dundermethods:
__getitem__
__setitem__
__len__
__contains__
class JavaLikeToolbox:
def __init__(self, tools: Collection[Tool]):
self.tools = tools
def getToolByName(self, name: str) -> Tool:
for tool in self.tools:
if tool.name == name:
return tool
def numberOfTools(self) -> int:
return len(self.tools)
>>> tb = JavaLikeToolbox([Hammer(), Screwdriver()])
>>> tb.getToolByName("hammer")
Hammer()
>>> tb.numberOfTools()
2
class Toolbox:
def __init__(self, tools: Collection[Tool]):
self._tools = {tool.name: tool for tool in tools}
def __getitem__(self, name: str) -> Tool:
return self._tools[name]
def __len__(self) -> int:
return len(self.tools)
>>> tb = Toolbox([Hammer(), Screwdriver()])
>>> tb["hammer"]
Hammer()
>>> len(tb)
2
class SparseMatrix:
def __init__(self, shape, fill_value=0.0, data=None):
self.shape = shape
self._data = data if data is not None else {}
self.fill_value = fill_value
def __setitem__(self, key, value):
i,j = key
self._data[i,j] = float(value)
def __getitem__(self, key) -> float:
i,j = key
return self._data.get((i,j), self.fill_value)
def transpose(self) -> "SparseMatrix":
data = {(j,i) : v for (i,j),v in self._data.items()}
return SparseMatrix(data=data,
shape=self.shape,
fill_value=self.fill_value)
def __repr__(self):
matrix_str = ""
for j in range(self.shape[1]):
for i in range(self.shape[0]):
value = self[i, j]
matrix_str += f"{value:<4}"
matrix_str += "\n"
return matrix_str
IToolbox
and implement it for Toolbox
.An example is a Scikit learn transformers
fit
transform
fit_transform
If you want to make a transformer compatible with sklearn, you have to implement these methods.
We can inherit some behavior from sklearn.base.TransformerMixin
from sklearn.base import TransformerMixin
class RemoveOutliersTransformer(TransformerMixin):
def __init__(self, lower_bound, upper_bound):
self.lower_bound = lower_bound
self.upper_bound = upper_bound
self.lower_ = None
self.upper_ = None
def fit(self, X, y=None):
self.lower_ = np.quantile(X, self.lower_bound)
self.upper_ = np.quantile(X, self.upper_bound)
def transform(self, X):
return np.clip(X, self.lower_, self.upper_)
# def fit_transform(self, X, y=None):
# we get this for free, from TransformerMixin
The Interval
class represent an interval in time.
class Interval:
def __init__(self, start, end):
self.start = start
self.end = end
def __contains__(self, x):
return self.start < x < self.end
>>> dr = Interval(date(2020, 1, 1), date(2020, 1, 31))
>>> date(2020,1,15) in dr
True
>>> date(1970,1,1) in dr
False
What if we want to make another type of interval, e.g. a interval of numbers \([1.0, 2.0]\)?
class Interval:
def __init__(self, start, end):
self.start = start
self.end = end
def __contains__(self, x):
return self.start < x < self.end
>>> interval = Interval(5, 10)
>>> 8 in interval
True
>>> 12 in interval
False
As long as the start
, end
and x
are comparable, the Interval
class is a generic class able to handle integers, floats, dates, datetimes, strings …
a.k.a. the Robustness principle of software design
The consumers of your package (future self), will be grateful if you are not overly restricitive in what types you accept as input.
from pydantic import BaseModel
from datetime import date
class Sensor(BaseModel):
name: str
voltage: float
install_date: date
location: tuple[float, float]
s1 = Sensor(name="Sensor 1",
voltage=3.3,
install_date=date(2020, 1, 1),
location=(4.42, 51.22))
data = {
"name": "Sensor 1",
"voltage": "3.3",
"install_date": "2020-01-01",
"location": ("4.42", "51.22")
}
s2 = Sensor(**data)
Before
Before
Opposite of extract mehtod.
Break up a long method into smaller methods.
If you want to learn more about refactoring, I recommend the book “Refactoring: Improving the Design of Existing Code” by Martin Fowler.
Python package development