Skip to content

Commit

Permalink
Fix errors arising under formulaic>=1.1.0 (#786)
Browse files Browse the repository at this point in the history
* add jax benchmark notebook

* support formulaic 1.1.0

* delete gpu notebook

* delete coverage xml
  • Loading branch information
s3alfisc authored Jan 12, 2025
1 parent cb31ee2 commit 306da82
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 4,578 deletions.
4,503 changes: 0 additions & 4,503 deletions coverage.xml

This file was deleted.

1 change: 1 addition & 0 deletions docs/changelog.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Adds a `pf.feglm()` function that supports GLMs with normal and binomial families (gaussian, logit, probit) without fixed effects. Fixed effects support is work in progress.
- Adds a function argument `context`, that allows to pass information / context to the `formulaic.Formulaic.get_model_matrix()` call that creates the model matrix.
- Fix a bug that caused reindexing of `LPDID._coeftable` when calling `LPDID.iplot()`. As a result, a second call of `LPDID.iplot()` would fail.
- Bumps the required `formulaic` version to `1.1.0` and fixes errors that arose when a) the ref argument was used for i() syntax, which led to a silent failure under formulaic >= 1.1.0, and fixef() / predict() with fixed effects, which led to a loud error.

## PyFixest 0.27.0

Expand Down
53 changes: 27 additions & 26 deletions pixi.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyfixest/estimation/model_matrix_fixest_.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def _get_columns_to_drop_and_check_ivars(
if ref and "_" in ref:
ref = ref.replace("_", "")

pattern = rf"\[T\.{ref}(?:\.0)?\]:{var2}"
pattern = rf"\[(?:T\.)?{ref}(?:\.0)?\]:{var2}"
if ref:
for column in X.columns:
if var1 in column and re.search(pattern, column):
Expand Down
22 changes: 8 additions & 14 deletions pyfixest/utils/dev_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,18 +182,12 @@ def _extract_variable_level(fe_string: str):
A tuple containing the extracted variable and level for the fixed
effect.
"""
c_pattern = r"C\((.+?)\)"
t_pattern = r"\[T\.(.*\])"
c_match = re.search(c_pattern, fe_string)
t_match = re.search(t_pattern, fe_string, re.DOTALL)
pattern = r"C\(([^)]*)\)\[(?:T\.)?(.*)\]$"
match = re.search(pattern, fe_string)
if not match:
raise ValueError(f"Cannot parse: {fe_string}")

if not c_match or not t_match:
raise ValueError(
f"feols() failed after regex encountered the following value as a fixed effect:\n {fe_string}."
+ "\nThis may due to the presence of line separation and/or escape sequences within the string."
+ " If so, consider recoding the underlying string. Otherwise, please open a PR in the github repo!"
)

variable = c_match.group(1)
level = t_match.group(1)
return "C(" + variable + ")", level[0 : level.rfind("]")]
variable = match.group(1)
level = match.group(2)

return f"C({variable})", level
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ license = { text = "MIT" }
dependencies = [
"lets-plot>=4.0.0",
"scipy>=1.6",
"formulaic>=1.0.0,<1.1.0",
"formulaic>=1.1.0",
"pandas>=1.1.0",
"numba>=0.58.0",
"seaborn>=0.13.2",
Expand Down
47 changes: 14 additions & 33 deletions tests/test_i.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,42 +115,23 @@ def test_i_vs_fixest():
)


def test_i_interacted_fixest():
@pytest.mark.parametrize(
"fml",
[
"dep_var ~ i(state)",
"dep_var ~ i(state, ref = 1)",
"dep_var ~ i(state, year)",
"dep_var ~ i(state, year, ref = 1)",
"dep_var ~ i(state, year) | state",
"dep_var ~ i(state, year, ref = 1) | state",
],
)
def test_i_interacted_fixest(fml):
df_het = pd.read_csv("pyfixest/did/data/df_het.csv")
df_het["X"] = np.random.normal(df_het.shape[0])

# ------------------------------------------------------------------------ #
# no fixed effects

# no references
fit_py = feols("dep_var~i(state, year)", df_het)
fit_r = fixest.feols(ro.Formula("dep_var~i(state, year)"), df_het)
fit_py = feols(fml, df_het)
fit_r = fixest.feols(ro.Formula(fml), df_het)
np.testing.assert_allclose(
fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
)

if True:
# no reference one fixed effect
fit_py = feols("dep_var~i(state, year) | state ", df_het)
fit_r = fixest.feols(ro.Formula("dep_var~i(state, year) | state"), df_het)
np.testing.assert_allclose(
fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
)

if True:
# one reference
fit_py = feols("dep_var~i(state, year,ref=1) ", df_het)
fit_r = fixest.feols(ro.Formula("dep_var~i(state, year, ref = 1)"), df_het)
np.testing.assert_allclose(
fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
)

if True:
# one reference and fixed effect
fit_py = feols("dep_var~i(state, year,ref=1) | state ", df_het)
fit_r = fixest.feols(
ro.Formula("dep_var~i(state, year, ref = 1) | state"), df_het
)
np.testing.assert_allclose(
fit_py.coef().values, np.array(fit_r.rx2("coefficients"))
)
2 changes: 2 additions & 0 deletions tests/test_predict_resid_fixef.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,5 @@ def test_extract_variable_level():
assert _extract_variable_level(var) == ("C(f3)", "1.0")
var = "C(f4)[T.1]"
assert _extract_variable_level(var) == ("C(f4)", "1")
var = "C(f5)[1.0]"
assert _extract_variable_level(var) == ("C(f5)", "1.0")

0 comments on commit 306da82

Please sign in to comment.