ANOVA

Analysis of Variance models containing anova_lm for ANOVA analysis with a linear OLSModel, and AnovaRM for repeated measures ANOVA, within ANOVA for balanced data.

Examples

In [1]: import statsmodels.api as sm

In [2]: from statsmodels.formula.api import ols

In [3]: moore = sm.datasets.get_rdataset("Moore", "car",
   ...:                                  cache=True) # load data
   ...: 
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
/build/statsmodels-0.9.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/datasets/utils.py in _get_data(base_url, dataname, cache, extension)
    221     try:
--> 222         data, from_cache = _urlopen_cached(url, cache)
    223     except HTTPError as err:

/build/statsmodels-0.9.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/datasets/utils.py in _urlopen_cached(url, cache)
    212     if not from_cache:
--> 213         data = urlopen(url, timeout=3).read()
    214         if cache is not None:  # then put it in the cache

/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 

/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    531             meth = getattr(processor, meth_name)
--> 532             response = meth(req, response)
    533 

/usr/lib/python3.6/urllib/request.py in http_response(self, request, response)
    641             response = self.parent.error(
--> 642                 'http', request, response, code, msg, hdrs)
    643 

/usr/lib/python3.6/urllib/request.py in error(self, proto, *args)
    563         args = (dict, proto, meth_name) + args
--> 564         result = self._call_chain(*args)
    565         if result:

/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:

/usr/lib/python3.6/urllib/request.py in http_error_302(self, req, fp, code, msg, headers)
    755 
--> 756         return self.parent.open(new, timeout=req.timeout)
    757 

/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    531             meth = getattr(processor, meth_name)
--> 532             response = meth(req, response)
    533 

/usr/lib/python3.6/urllib/request.py in http_response(self, request, response)
    641             response = self.parent.error(
--> 642                 'http', request, response, code, msg, hdrs)
    643 

/usr/lib/python3.6/urllib/request.py in error(self, proto, *args)
    569             args = (dict, 'default', 'http_error_default') + orig_args
--> 570             return self._call_chain(*args)
    571 

/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:

/usr/lib/python3.6/urllib/request.py in http_error_default(self, req, fp, code, msg, hdrs)
    649     def http_error_default(self, req, fp, code, msg, hdrs):
--> 650         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    651 

HTTPError: HTTP Error 404: Not Found

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-3-5b46e3f60387> in <module>()
      1 moore = sm.datasets.get_rdataset("Moore", "car",
----> 2                                  cache=True) # load data

/build/statsmodels-0.9.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/datasets/utils.py in get_rdataset(dataname, package, cache)
    289                      "master/doc/"+package+"/rst/")
    290     cache = _get_cache(cache)
--> 291     data, from_cache = _get_data(data_base_url, dataname, cache)
    292     data = read_csv(data, index_col=0)
    293     data = _maybe_reset_index(data)

/build/statsmodels-0.9.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/datasets/utils.py in _get_data(base_url, dataname, cache, extension)
    223     except HTTPError as err:
    224         if '404' in str(err):
--> 225             raise ValueError("Dataset %s was not found." % dataname)
    226         else:
    227             raise err

ValueError: Dataset Moore was not found.

In [4]: data = moore.data
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-7f7c7245843e> in <module>()
----> 1 data = moore.data

NameError: name 'moore' is not defined

In [5]: data = data.rename(columns={"partner.status":
   ...:                             "partner_status"}) # make name pythonic
   ...: 
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-8a6860e82653> in <module>()
----> 1 data = data.rename(columns={"partner.status":
      2                             "partner_status"}) # make name pythonic
      3 

NameError: name 'data' is not defined

In [6]: moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
   ...:                 data=data).fit()
   ...: 
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-1a5fe75a6b5a> in <module>()
      1 moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
----> 2                 data=data).fit()

NameError: name 'data' is not defined

In [7]: table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 ANOVA DataFrame
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-e1cc32cde8d3> in <module>()
----> 1 table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 ANOVA DataFrame

NameError: name 'moore_lm' is not defined

In [8]: print(table)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-2e062ec15c20> in <module>()
----> 1 print(table)

NameError: name 'table' is not defined

A more detailed example for anova_lm can be found here:

Module Reference

anova_lm(*args, **kwargs) Anova table for one or more fitted linear models.
AnovaRM(data, depvar, subject[, within, …]) Repeated measures Anova using least squares regression