Patterns in static

Apophenia

Public functions, structs, and types

Data Structures

struct  apop_arms_settings
 
struct  apop_cdf_settings
 
struct  apop_composition_settings
 
struct  apop_coordinate_transform_settings
 
struct  apop_cross_settings
 
struct  apop_data
 
struct  apop_dconstrain_settings
 
struct  apop_kernel_density_settings
 
struct  apop_lm_settings
 
struct  apop_loess_settings
 
struct  apop_mcmc_proposal_s
 
struct  apop_mcmc_settings
 
struct  apop_mixture_settings
 
struct  apop_mle_settings
 
struct  apop_model
 
struct  apop_name
 
struct  apop_opts_type
 
struct  apop_parts_wanted_settings
 
struct  apop_pm_settings
 
struct  apop_pmf_settings
 
struct  apop_settings_type
 

Macros

#define apop_ANOVA
 
#define apop_ANOVA
 
#define apop_ANOVA
 
#define apop_ANOVA
 
#define Apop_c(d, col)
 
#define Apop_c(d, col)
 
#define Apop_c(d, col)
 
#define Apop_c(d, col)
 
#define Apop_col_t(d, colname, outd)
 
#define Apop_col_t(d, colname, outd)
 
#define Apop_col_t(d, colname, outd)
 
#define Apop_col_t(d, colname, outd)
 
#define Apop_col_tv(m, col, v)
 
#define Apop_col_tv(m, col, v)
 
#define Apop_col_tv(m, col, v)
 
#define Apop_col_tv(m, col, v)
 
#define Apop_cs(d, colnum, len)
 
#define Apop_cs(d, colnum, len)
 
#define Apop_cs(d, colnum, len)
 
#define Apop_cs(d, colnum, len)
 
#define Apop_cv(data_to_view, col)
 
#define Apop_cv(data_to_view, col)
 
#define Apop_cv(data_to_view, col)
 
#define Apop_cv(data_to_view, col)
 
#define apop_data_add_names(dataset, type, ...)
 
#define apop_data_add_names(dataset, type, ...)
 
#define apop_data_add_names(dataset, type, ...)
 
#define apop_data_add_names(dataset, type, ...)
 
#define apop_data_falloc(sizes, ...)
 
#define apop_data_falloc(sizes, ...)
 
#define apop_data_falloc(sizes, ...)
 
#define apop_data_falloc(sizes, ...)
 
#define apop_data_fill(adfin, ...)
 
#define apop_data_fill(adfin, ...)
 
#define apop_data_fill(adfin, ...)
 
#define apop_data_fill(adfin, ...)
 
#define apop_data_free(freeme)
 
#define apop_data_free(freeme)
 
#define apop_data_free(freeme)
 
#define apop_data_free(freeme)
 
#define apop_data_prune_columns(in, ...)
 
#define apop_data_prune_columns(in, ...)
 
#define apop_data_prune_columns(in, ...)
 
#define apop_data_prune_columns(in, ...)
 
#define apop_errorlevel
 
#define apop_errorlevel
 
#define apop_errorlevel
 
#define apop_errorlevel
 
#define apop_estimate_r_squared(in)
 
#define apop_estimate_r_squared(in)
 
#define apop_estimate_r_squared(in)
 
#define apop_estimate_r_squared(in)
 
#define apop_F_distribution
 
#define apop_F_distribution
 
#define apop_F_distribution
 
#define apop_F_distribution
 
#define apop_F_test
 
#define apop_F_test
 
#define apop_F_test
 
#define apop_F_test
 
#define apop_gaussian
 
#define apop_gaussian
 
#define apop_gaussian
 
#define apop_gaussian
 
#define apop_IV
 
#define apop_IV
 
#define apop_IV
 
#define apop_IV
 
#define Apop_mcv(matrix_to_view, col)
 
#define Apop_mcv(matrix_to_view, col)
 
#define Apop_mcv(matrix_to_view, col)
 
#define Apop_mcv(matrix_to_view, col)
 
#define apop_mean
 
#define apop_mean
 
#define apop_mean
 
#define apop_mean
 
#define apop_model_coordinate_transform(...)
 
#define apop_model_coordinate_transform(...)
 
#define apop_model_coordinate_transform(...)
 
#define apop_model_coordinate_transform(...)
 
#define apop_model_copy_set(model, type, ...)
 
#define apop_model_copy_set(model, type, ...)
 
#define apop_model_copy_set(model, type, ...)
 
#define apop_model_copy_set(model, type, ...)
 
#define apop_model_cross(...)
 
#define apop_model_cross(...)
 
#define apop_model_cross(...)
 
#define apop_model_cross(...)
 
#define apop_model_dcompose(...)
 
#define apop_model_dcompose(...)
 
#define apop_model_dcompose(...)
 
#define apop_model_dcompose(...)
 
#define apop_model_dconstrain(...)
 
#define apop_model_dconstrain(...)
 
#define apop_model_dconstrain(...)
 
#define apop_model_dconstrain(...)
 
#define apop_model_mixture(...)
 
#define apop_model_mixture(...)
 
#define apop_model_mixture(...)
 
#define apop_model_mixture(...)
 
#define apop_model_set_parameters(in, ...)
 
#define apop_model_set_parameters(in, ...)
 
#define apop_model_set_parameters(in, ...)
 
#define apop_model_set_parameters(in, ...)
 
#define Apop_model_set_settings(model, ...)
 
#define Apop_model_set_settings(model, ...)
 
#define Apop_model_set_settings(model, ...)
 
#define Apop_model_set_settings(model, ...)
 
#define apop_model_set_settings
 
#define apop_model_set_settings
 
#define apop_model_set_settings
 
#define apop_model_set_settings
 
#define Apop_mrv(matrix_to_view, row)
 
#define Apop_mrv(matrix_to_view, row)
 
#define Apop_mrv(matrix_to_view, row)
 
#define Apop_mrv(matrix_to_view, row)
 
#define Apop_notify(verbosity, ...)
 
#define Apop_notify(verbosity, ...)
 
#define Apop_notify(verbosity, ...)
 
#define Apop_notify(verbosity, ...)
 
#define apop_OLS
 
#define apop_OLS
 
#define apop_OLS
 
#define apop_OLS
 
#define apop_PMF
 
#define apop_PMF
 
#define apop_PMF
 
#define apop_PMF
 
#define Apop_r(d, rownum)
 
#define Apop_r(d, rownum)
 
#define Apop_r(d, rownum)
 
#define Apop_r(d, rownum)
 
#define apop_rng_get_thread(thread_in)
 
#define apop_rng_get_thread(thread_in)
 
#define apop_rng_get_thread(thread_in)
 
#define apop_rng_get_thread(thread_in)
 
#define Apop_row_t(d, rowname, outd)
 
#define Apop_row_t(d, rowname, outd)
 
#define Apop_row_t(d, rowname, outd)
 
#define Apop_row_t(d, rowname, outd)
 
#define Apop_row_tv(m, row, v)
 
#define Apop_row_tv(m, row, v)
 
#define Apop_row_tv(m, row, v)
 
#define Apop_row_tv(m, row, v)
 
#define Apop_rs(d, rownum, len)
 
#define Apop_rs(d, rownum, len)
 
#define Apop_rs(d, rownum, len)
 
#define Apop_rs(d, rownum, len)
 
#define Apop_rv(data_to_view, row)
 
#define Apop_rv(data_to_view, row)
 
#define Apop_rv(data_to_view, row)
 
#define Apop_rv(data_to_view, row)
 
#define Apop_settings_add_group(model, type, ...)
 
#define Apop_settings_add_group(model, type, ...)
 
#define Apop_settings_add_group(model, type, ...)
 
#define Apop_settings_add_group(model, type, ...)
 
#define Apop_settings_copy(name, ...)
 
#define Apop_settings_copy(name, ...)
 
#define Apop_settings_copy(name, ...)
 
#define Apop_settings_copy(name, ...)
 
#define Apop_settings_declarations(ysg)
 
#define Apop_settings_declarations(ysg)
 
#define Apop_settings_declarations(ysg)
 
#define Apop_settings_declarations(ysg)
 
#define Apop_settings_free(name, ...)
 
#define Apop_settings_free(name, ...)
 
#define Apop_settings_free(name, ...)
 
#define Apop_settings_free(name, ...)
 
#define Apop_settings_get(model, type, setting)
 
#define Apop_settings_get(model, type, setting)
 
#define Apop_settings_get(model, type, setting)
 
#define Apop_settings_get(model, type, setting)
 
#define Apop_settings_get_group(m, type)
 
#define Apop_settings_get_group(m, type)
 
#define Apop_settings_get_group(m, type)
 
#define Apop_settings_get_group(m, type)
 
#define Apop_settings_init(name, ...)
 
#define Apop_settings_init(name, ...)
 
#define Apop_settings_init(name, ...)
 
#define Apop_settings_init(name, ...)
 
#define Apop_settings_rm_group(m, type)
 
#define Apop_settings_rm_group(m, type)
 
#define Apop_settings_rm_group(m, type)
 
#define Apop_settings_rm_group(m, type)
 
#define Apop_settings_set(model, type, setting, data)
 
#define Apop_settings_set(model, type, setting, data)
 
#define Apop_settings_set(model, type, setting, data)
 
#define Apop_settings_set(model, type, setting, data)
 
#define Apop_stopif(test, onfail, level, ...)
 
#define Apop_stopif(test, onfail, level, ...)
 
#define Apop_stopif(test, onfail, level, ...)
 
#define Apop_stopif(test, onfail, level, ...)
 
#define Apop_subm(matrix_to_view, srow, scol, nrows, ncols)
 
#define Apop_subm(matrix_to_view, srow, scol, nrows, ncols)
 
#define Apop_subm(matrix_to_view, srow, scol, nrows, ncols)
 
#define Apop_subm(matrix_to_view, srow, scol, nrows, ncols)
 
#define apop_sum
 
#define apop_sum
 
#define apop_sum
 
#define apop_sum
 
#define apop_test_ANOVA_independence(d)
 
#define apop_test_ANOVA_independence(d)
 
#define apop_test_ANOVA_independence(d)
 
#define apop_test_ANOVA_independence(d)
 
#define apop_text_fill(dataset, ...)
 
#define apop_text_fill(dataset, ...)
 
#define apop_text_fill(dataset, ...)
 
#define apop_text_fill(dataset, ...)
 
#define apop_var
 
#define apop_var
 
#define apop_var
 
#define apop_var
 
#define apop_vector_fill(avfin, ...)
 
#define apop_vector_fill(avfin, ...)
 
#define apop_vector_fill(avfin, ...)
 
#define apop_vector_fill(avfin, ...)
 

Functions

apop_dataapop_anova (char *table, char *data, char *grouping1, char *grouping2)
 
int apop_arms_draw (double *out, gsl_rng *r, apop_model *m)
 
gsl_vector * apop_array_to_vector (double *in, int size)
 
apop_modelapop_beta_from_mean_var (double m, double v)
 
apop_dataapop_bootstrap_cov (apop_data *data, apop_model *model, gsl_rng *rng, int iterations, char keep_boots, char ignore_nans, apop_data **boot_store)
 
double apop_cdf (apop_data *d, apop_model *m)
 
void apop_crosstab_to_db (apop_data *in, char *tabname, char *row_col_name, char *col_col_name, char *data_col_name)
 
void apop_data_add_named_elmt (apop_data *d, char *name, double val)
 
void apop_data_add_names_base (apop_data *d, const char type, char const **names)
 
apop_dataapop_data_add_page (apop_data *dataset, apop_data *newpage, const char *title)
 
apop_dataapop_data_alloc (const size_t size1, const size_t size2, const int size3)
 
apop_dataapop_data_calloc (const size_t size1, const size_t size2, const int size3)
 
apop_dataapop_data_copy (const apop_data *in)
 
apop_dataapop_data_correlation (const apop_data *in)
 
apop_dataapop_data_covariance (const apop_data *in)
 
apop_dataapop_data_fill_base (apop_data *in, double[])
 
char apop_data_free_base (apop_data *freeme)
 
double apop_data_get (const apop_data *data, size_t row, int col, const char *rowname, const char *colname, const char *page)
 
apop_dataapop_data_get_factor_names (apop_data *data, int col, char type)
 
apop_dataapop_data_get_page (const apop_data *data, const char *title, const char match)
 
apop_dataapop_data_listwise_delete (apop_data *d, char inplace)
 
void apop_data_memcpy (apop_data *out, const apop_data *in)
 
gsl_vector * apop_data_pack (const apop_data *in, gsl_vector *out, char more_pages, char use_info_pages)
 
apop_dataapop_data_pmf_compress (apop_data *in)
 
void apop_data_print (const apop_data *data, Output_declares)
 
void apop_data_print (const apop_data *data, char const *output_name, FILE *output_pipe, char output_type, char output_append)
 
apop_dataapop_data_prune_columns_base (apop_data *d, char **colnames)
 
double * apop_data_ptr (apop_data *data, int row, int col, const char *rowname, const char *colname, const char *page)
 
apop_dataapop_data_rank_compress (apop_data *in, int min_bins)
 
apop_dataapop_data_rank_expand (apop_data *in)
 
void apop_data_rm_columns (apop_data *d, int *drop)
 
apop_dataapop_data_rm_page (apop_data *data, const char *title, const char free_p)
 
apop_dataapop_data_rm_rows (apop_data *in, int *drop, int(*do_drop)(apop_data *, void *), void *drop_parameter)
 
int apop_data_set (apop_data *data, size_t row, int col, const double val, const char *rowname, const char *colname, const char *page)
 
void apop_data_show (const apop_data *data)
 
apop_dataapop_data_sort (apop_data *data, apop_data *sort_order, char asc, char inplace, double *col_order)
 
apop_data ** apop_data_split (apop_data *in, int splitpoint, char r_or_c)
 
apop_dataapop_data_stack (apop_data *m1, apop_data *m2, char posn, char inplace)
 
apop_dataapop_data_summarize (apop_data *data)
 
apop_dataapop_data_to_bins (apop_data const *indata, apop_data const *binspec, int bin_count, char close_top_bin)
 
int apop_data_to_db (const apop_data *set, const char *tabname, char)
 
apop_dataapop_data_to_dummies (apop_data *d, int col, char type, int keep_first, char append, char remove)
 
apop_dataapop_data_to_factors (apop_data *data, char intype, int incol, int outcol)
 
apop_dataapop_data_transpose (apop_data *in, char transpose_text, char inplace)
 
void apop_data_unpack (const gsl_vector *in, apop_data *d, char use_info_pages)
 
int apop_db_close (char vacuum)
 
int apop_db_open (char const *filename)
 
apop_dataapop_db_to_crosstab (char const *tabname, char const *row, char const *col, char const *data, char is_aggregate)
 
double apop_det_and_inv (const gsl_matrix *in, gsl_matrix **out, int calc_det, int calc_inv)
 
apop_dataapop_dot (const apop_data *d1, const apop_data *d2, char form1, char form2)
 
int apop_draw (double *out, gsl_rng *r, apop_model *m)
 
apop_modelapop_estimate (apop_data *d, apop_model *m)
 
apop_dataapop_estimate_coefficient_of_determination (apop_model *)
 
void apop_estimate_parameter_tests (apop_model *est)
 
apop_modelapop_estimate_restart (apop_model *e, apop_model *copy, char *starting_pt, double boundary)
 
apop_dataapop_f_test (apop_model *est, apop_data *contrast)
 
long double apop_generalized_harmonic (int N, double s)
 
apop_dataapop_histograms_test_goodness_of_fit (apop_model *h0, apop_model *h1)
 
apop_dataapop_jackknife_cov (apop_data *data, apop_model *model)
 
long double apop_kl_divergence (apop_model *from, apop_model *to, int draw_ct, gsl_rng *rng)
 
long double apop_linear_constraint (gsl_vector *beta, apop_data *constraint, double margin)
 
double apop_log_likelihood (apop_data *d, apop_model *m)
 
apop_dataapop_map (apop_data *in, apop_fn_d *fn_d, apop_fn_v *fn_v, apop_fn_r *fn_r, apop_fn_dp *fn_dp, apop_fn_vp *fn_vp, apop_fn_rp *fn_rp, apop_fn_dpi *fn_dpi, apop_fn_vpi *fn_vpi, apop_fn_rpi *fn_rpi, apop_fn_di *fn_di, apop_fn_vi *fn_vi, apop_fn_ri *fn_ri, void *param, int inplace, char part, int all_pages)
 
apop_dataapop_map (apop_data *in, double(*fn_d)(double), double(*fn_v)(gsl_vector *), double(*fn_r)(apop_data *), double(*fn_dp)(double, void *), double(*fn_vp)(gsl_vector *, void *), double(*fn_rp)(apop_data *, void *), double(*fn_dpi)(double, void *, int), double(*fn_vpi)(gsl_vector *, void *, int), double(*fn_rpi)(apop_data *, void *, int), double(*fn_di)(double, int), double(*fn_vi)(gsl_vector *, int), double(*fn_ri)(apop_data *, int), void *param, int inplace, char part, int all_pages)
 
double apop_map_sum (apop_data *in, apop_fn_d *fn_d, apop_fn_v *fn_v, apop_fn_r *fn_r, apop_fn_dp *fn_dp, apop_fn_vp *fn_vp, apop_fn_rp *fn_rp, apop_fn_dpi *fn_dpi, apop_fn_vpi *fn_vpi, apop_fn_rpi *fn_rpi, apop_fn_di *fn_di, apop_fn_vi *fn_vi, apop_fn_ri *fn_ri, void *param, char part, int all_pages)
 
double apop_map_sum (apop_data *in, double(*fn_d)(double), double(*fn_v)(gsl_vector *), double(*fn_r)(apop_data *), double(*fn_dp)(double, void *), double(*fn_vp)(gsl_vector *, void *), double(*fn_rp)(apop_data *, void *), double(*fn_dpi)(double, void *, int), double(*fn_vpi)(gsl_vector *, void *, int), double(*fn_rpi)(apop_data *, void *, int), double(*fn_di)(double, int), double(*fn_vi)(gsl_vector *, int), double(*fn_ri)(apop_data *, int), void *param, char part, int all_pages)
 
void apop_matrix_apply (gsl_matrix *m, void(*fn)(gsl_vector *))
 
void apop_matrix_apply_all (gsl_matrix *in, void(*fn)(double *))
 
gsl_matrix * apop_matrix_copy (const gsl_matrix *in)
 
double apop_matrix_determinant (const gsl_matrix *in)
 
gsl_matrix * apop_matrix_inverse (const gsl_matrix *in)
 
int apop_matrix_is_positive_semidefinite (gsl_matrix *m, char semi)
 
gsl_vector * apop_matrix_map (const gsl_matrix *m, double(*fn)(gsl_vector *))
 
gsl_matrix * apop_matrix_map_all (const gsl_matrix *in, double(*fn)(double))
 
double apop_matrix_map_all_sum (const gsl_matrix *in, double(*fn)(double))
 
double apop_matrix_map_sum (const gsl_matrix *in, double(*fn)(gsl_vector *))
 
double apop_matrix_mean (const gsl_matrix *data)
 
void apop_matrix_mean_and_var (const gsl_matrix *data, double *mean, double *var)
 
apop_dataapop_matrix_pca (gsl_matrix *data, int const dimensions_we_want)
 
void apop_matrix_print (const gsl_matrix *data, Output_declares)
 
void apop_matrix_print (const gsl_matrix *data, char const *output_name, FILE *output_pipe, char output_type, char output_append)
 
gsl_matrix * apop_matrix_realloc (gsl_matrix *m, size_t newheight, size_t newwidth)
 
void apop_matrix_show (const gsl_matrix *data)
 
gsl_matrix * apop_matrix_stack (gsl_matrix *m1, gsl_matrix const *m2, char posn, char inplace)
 
long double apop_matrix_sum (const gsl_matrix *m)
 
double apop_matrix_to_positive_semidefinite (gsl_matrix *m)
 
void apop_maximum_likelihood (apop_data *data, apop_model *dist)
 
apop_modelapop_ml_impute (apop_data *d, apop_model *meanvar)
 
apop_modelapop_model_clear (apop_data *data, apop_model *model)
 
apop_modelapop_model_copy (apop_model *in)
 
apop_modelapop_model_cross_base (apop_model *mlist[])
 
apop_dataapop_model_draws (apop_model *model, int count, apop_data *draws)
 
long double apop_model_entropy (apop_model *in, int draws)
 
apop_modelapop_model_fix_params (apop_model *model_in)
 
apop_modelapop_model_fix_params_get_base (apop_model *model_in)
 
void apop_model_free (apop_model *free_me)
 
apop_dataapop_model_hessian (apop_data *data, apop_model *model, double delta)
 
apop_modelapop_model_metropolis (apop_data *d, gsl_rng *rng, apop_model *m)
 
int apop_model_metropolis_draw (double *out, gsl_rng *rng, apop_model *model)
 
apop_modelapop_model_mixture_base (apop_model **inlist)
 
apop_dataapop_model_numerical_covariance (apop_data *data, apop_model *model, double delta)
 
void apop_model_print (apop_model *model, FILE *output_pipe)
 
apop_modelapop_model_set_parameters_base (apop_model *in, double ap[])
 
void apop_model_show (apop_model *print_me)
 
apop_modelapop_model_to_pmf (apop_model *model, apop_data *binspec, long int draws, int bin_count)
 
long double apop_multivariate_gamma (double a, int p)
 
long double apop_multivariate_lngamma (double a, int p)
 
int apop_name_add (apop_name *n, char const *add_me, char type)
 
apop_nameapop_name_alloc (void)
 
apop_nameapop_name_copy (apop_name *in)
 
int apop_name_find (const apop_name *n, const char *findme, const char type)
 
void apop_name_free (apop_name *free_me)
 
void apop_name_print (apop_name *n)
 
void apop_name_stack (apop_name *n1, apop_name *nadd, char type1, char typeadd)
 
gsl_vector * apop_numerical_gradient (apop_data *data, apop_model *model, double delta)
 
double apop_p (apop_data *d, apop_model *m)
 
apop_dataapop_paired_t_test (gsl_vector *a, gsl_vector *b)
 
apop_modelapop_parameter_model (apop_data *d, apop_model *m)
 
apop_dataapop_predict (apop_data *d, apop_model *m)
 
void apop_prep (apop_data *d, apop_model *m)
 
int apop_prep_output (char const *output_name, FILE **output_pipe, char *output_type, char *output_append)
 
int apop_query (const char *q,...)
 
apop_dataapop_query_to_data (const char *fmt,...)
 
double apop_query_to_float (const char *fmt,...)
 
apop_dataapop_query_to_mixed_data (const char *typelist, const char *fmt,...)
 
apop_dataapop_query_to_text (const char *fmt,...)
 
gsl_vector * apop_query_to_vector (const char *fmt,...)
 
apop_dataapop_rake (char const *margin_table, char *const *var_list, int var_ct, char *const *contrasts, int contrast_ct, char const *structural_zeros, int max_iterations, double tolerance, char const *count_col, char const *init_table, char const *init_count_col, double nudge)
 
int apop_regex (const char *string, const char *regex, apop_data **substrings, const char use_case)
 
gsl_rng * apop_rng_alloc (int seed)
 
gsl_rng * apop_rng_get_thread_base (int thread)
 
double apop_rng_GHgB3 (gsl_rng *r, double *a)
 
void apop_score (apop_data *d, gsl_vector *out, apop_model *m)
 
int apop_system (const char *fmt,...)
 
apop_dataapop_t_test (gsl_vector *a, gsl_vector *b)
 
int apop_table_exists (char const *name, char remove)
 
double apop_test (double statistic, char *distribution, double p1, double p2, char tail)
 
apop_dataapop_test_anova_independence (apop_data *d)
 
apop_dataapop_test_fisher_exact (apop_data *intab)
 
apop_dataapop_test_kolmogorov (apop_model *m1, apop_model *m2)
 
apop_dataapop_text_alloc (apop_data *in, const size_t row, const size_t col)
 
apop_dataapop_text_fill_base (apop_data *data, char *text[])
 
void apop_text_free (char ***freeme, int rows, int cols)
 
char * apop_text_paste (apop_data const *strings, char *between, char *before, char *after, char *between_cols, int(*prune)(apop_data *, int, int, void *), void *prune_parameter)
 
int apop_text_set (apop_data *in, const size_t row, const size_t col, const char *fmt,...)
 
apop_dataapop_text_to_data (char const *text_file, int has_row_names, int has_col_names, int const *field_ends, char const *delimiters)
 
int apop_text_to_db (char const *text_file, char *tabname, int has_row_names, int has_col_names, char **field_names, int const *field_ends, apop_data *field_params, char *table_params, char const *delimiters, char if_table_exists)
 
apop_dataapop_text_unique_elements (const apop_data *d, size_t col)
 
apop_modelapop_update (apop_data *data, apop_model *prior, apop_model *likelihood, gsl_rng *rng)
 
void apop_vector_apply (gsl_vector *v, void(*fn)(double *))
 
int apop_vector_bounded (const gsl_vector *in, long double max)
 
gsl_vector * apop_vector_copy (const gsl_vector *in)
 
double apop_vector_correlation (const gsl_vector *ina, const gsl_vector *inb, const gsl_vector *weights)
 
double apop_vector_cov (gsl_vector const *v1, gsl_vector const *v2, gsl_vector const *weights)
 
double apop_vector_distance (const gsl_vector *ina, const gsl_vector *inb, const char metric, const double norm)
 
long double apop_vector_entropy (gsl_vector *in)
 
void apop_vector_exp (gsl_vector *v)
 
gsl_vector * apop_vector_fill_base (gsl_vector *in, double[])
 
double apop_vector_kurtosis (const gsl_vector *in)
 
double apop_vector_kurtosis_pop (gsl_vector const *v, gsl_vector const *weights)
 
void apop_vector_log (gsl_vector *v)
 
void apop_vector_log10 (gsl_vector *v)
 
gsl_vector * apop_vector_map (const gsl_vector *v, double(*fn)(double))
 
double apop_vector_map_sum (const gsl_vector *in, double(*fn)(double))
 
double apop_vector_mean (gsl_vector const *v, gsl_vector const *weights)
 
gsl_vector * apop_vector_moving_average (gsl_vector *, size_t)
 
void apop_vector_normalize (gsl_vector *in, gsl_vector **out, const char normalization_type)
 
double * apop_vector_percentiles (gsl_vector *data, char rounding)
 
void apop_vector_print (gsl_vector *data, Output_declares)
 
void apop_vector_print (gsl_vector *data, char const *output_name, FILE *output_pipe, char output_type, char output_append)
 
gsl_vector * apop_vector_realloc (gsl_vector *v, size_t newheight)
 
void apop_vector_show (const gsl_vector *data)
 
double apop_vector_skew (const gsl_vector *in)
 
double apop_vector_skew_pop (gsl_vector const *v, gsl_vector const *weights)
 
gsl_vector * apop_vector_stack (gsl_vector *v1, gsl_vector const *v2, char inplace)
 
long double apop_vector_sum (const gsl_vector *in)
 
gsl_matrix * apop_vector_to_matrix (const gsl_vector *in, char row_col)
 
gsl_vector * apop_vector_unique_elements (const gsl_vector *v)
 
double apop_vector_var (gsl_vector const *v, gsl_vector const *weights)
 
double apop_vector_var_m (const gsl_vector *in, const double mean)
 

Variables

apop_modelapop_bernoulli
 
apop_modelapop_bernoulli
 
apop_modelapop_bernoulli
 
apop_modelapop_bernoulli
 
apop_modelapop_beta
 
apop_modelapop_beta
 
apop_modelapop_beta
 
apop_modelapop_beta
 
apop_modelapop_binomial
 
apop_modelapop_binomial
 
apop_modelapop_binomial
 
apop_modelapop_binomial
 
apop_modelapop_chi_squared
 
apop_modelapop_chi_squared
 
apop_modelapop_chi_squared
 
apop_modelapop_chi_squared
 
apop_modelapop_composition
 
apop_modelapop_composition
 
apop_modelapop_composition
 
apop_modelapop_composition
 
apop_modelapop_coordinate_transform
 
apop_modelapop_coordinate_transform
 
apop_modelapop_coordinate_transform
 
apop_modelapop_coordinate_transform
 
apop_modelapop_cross
 
apop_modelapop_cross
 
apop_modelapop_cross
 
apop_modelapop_cross
 
apop_modelapop_dconstrain
 
apop_modelapop_dconstrain
 
apop_modelapop_dconstrain
 
apop_modelapop_dconstrain
 
apop_modelapop_dirichlet
 
apop_modelapop_dirichlet
 
apop_modelapop_dirichlet
 
apop_modelapop_dirichlet
 
apop_modelapop_exponential
 
apop_modelapop_exponential
 
apop_modelapop_exponential
 
apop_modelapop_exponential
 
apop_modelapop_f_distribution
 
apop_modelapop_f_distribution
 
apop_modelapop_f_distribution
 
apop_modelapop_f_distribution
 
apop_modelapop_gamma
 
apop_modelapop_gamma
 
apop_modelapop_gamma
 
apop_modelapop_gamma
 
apop_modelapop_improper_uniform
 
apop_modelapop_improper_uniform
 
apop_modelapop_improper_uniform
 
apop_modelapop_improper_uniform
 
apop_modelapop_iv
 
apop_modelapop_iv
 
apop_modelapop_iv
 
apop_modelapop_iv
 
apop_modelapop_kernel_density
 
apop_modelapop_kernel_density
 
apop_modelapop_kernel_density
 
apop_modelapop_kernel_density
 
apop_modelapop_loess
 
apop_modelapop_loess
 
apop_modelapop_loess
 
apop_modelapop_loess
 
apop_modelapop_logit
 
apop_modelapop_logit
 
apop_modelapop_logit
 
apop_modelapop_logit
 
apop_modelapop_lognormal
 
apop_modelapop_lognormal
 
apop_modelapop_lognormal
 
apop_modelapop_lognormal
 
apop_modelapop_mixture
 
apop_modelapop_mixture
 
apop_modelapop_mixture
 
apop_modelapop_mixture
 
apop_modelapop_multinomial
 
apop_modelapop_multinomial
 
apop_modelapop_multinomial
 
apop_modelapop_multinomial
 
apop_modelapop_multivariate_normal
 
apop_modelapop_multivariate_normal
 
apop_modelapop_multivariate_normal
 
apop_modelapop_multivariate_normal
 
apop_modelapop_normal
 
apop_modelapop_normal
 
apop_modelapop_normal
 
apop_modelapop_normal
 
apop_modelapop_ols
 
apop_modelapop_ols
 
apop_modelapop_ols
 
apop_modelapop_ols
 
apop_opts_type apop_opts
 
apop_opts_type apop_opts
 
apop_opts_type apop_opts
 
apop_opts_type apop_opts
 
apop_opts_type apop_opts
 
apop_opts_type apop_opts
 
apop_modelapop_pmf
 
apop_modelapop_pmf
 
apop_modelapop_pmf
 
apop_modelapop_pmf
 
apop_modelapop_poisson
 
apop_modelapop_poisson
 
apop_modelapop_poisson
 
apop_modelapop_poisson
 
apop_modelapop_probit
 
apop_modelapop_probit
 
apop_modelapop_probit
 
apop_modelapop_probit
 
apop_modelapop_t_distribution
 
apop_modelapop_t_distribution
 
apop_modelapop_t_distribution
 
apop_modelapop_t_distribution
 
apop_modelapop_uniform
 
apop_modelapop_uniform
 
apop_modelapop_uniform
 
apop_modelapop_uniform
 
apop_modelapop_wls
 
apop_modelapop_wls
 
apop_modelapop_wls
 
apop_modelapop_wls
 
apop_modelapop_yule
 
apop_modelapop_yule
 
apop_modelapop_yule
 
apop_modelapop_yule
 
apop_modelapop_zipf
 
apop_modelapop_zipf
 
apop_modelapop_zipf
 
apop_modelapop_zipf
 

Detailed Description

Macro Definition Documentation

#define Apop_c (   d,
  col 
)

A macro to generate a temporary one-column view of apop_data set d, pulling out only column col. After this call, outd will be a pointer to this temporary view, that you can use as you would any apop_data set.

See also
Apop_cs, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_c (   d,
  col 
)

A macro to generate a temporary one-column view of apop_data set d, pulling out only column col. After this call, outd will be a pointer to this temporary view, that you can use as you would any apop_data set.

See also
Apop_cs, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_c (   d,
  col 
)

A macro to generate a temporary one-column view of apop_data set d, pulling out only column col. After this call, outd will be a pointer to this temporary view, that you can use as you would any apop_data set.

See also
Apop_cs, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_c (   d,
  col 
)

A macro to generate a temporary one-column view of apop_data set d, pulling out only column col. After this call, outd will be a pointer to this temporary view, that you can use as you would any apop_data set.

See also
Apop_cs, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_col_t (   d,
  colname,
  outd 
)

After this call, v will hold a view of the apop_data set m. The view will consist only of a gsl_vector view of the column of the apop_data set m with name col_name. Unlike Apop_c, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_cs, Apop_c, Apop_cv, Apop_col_tv, Apop_mcv
#define Apop_col_t (   d,
  colname,
  outd 
)

After this call, v will hold a view of the apop_data set m. The view will consist only of a gsl_vector view of the column of the apop_data set m with name col_name. Unlike Apop_c, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_cs, Apop_c, Apop_cv, Apop_col_tv, Apop_mcv
#define Apop_col_t (   d,
  colname,
  outd 
)

After this call, v will hold a view of the apop_data set m. The view will consist only of a gsl_vector view of the column of the apop_data set m with name col_name. Unlike Apop_c, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_cs, Apop_c, Apop_cv, Apop_col_tv, Apop_mcv
#define Apop_col_t (   d,
  colname,
  outd 
)

After this call, v will hold a view of the apop_data set m. The view will consist only of a gsl_vector view of the column of the apop_data set m with name col_name. Unlike Apop_c, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_cs, Apop_c, Apop_cv, Apop_col_tv, Apop_mcv
#define Apop_col_tv (   m,
  col,
 
)

After this call, v will hold a gsl_vector view of the apop_data set m. The view will consist only of the column with name col_name. Unlike Apop_cv, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_cs, Apop_c, Apop_cv, Apop_col_t, Apop_mcv
#define Apop_col_tv (   m,
  col,
 
)

After this call, v will hold a gsl_vector view of the apop_data set m. The view will consist only of the column with name col_name. Unlike Apop_cv, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_cs, Apop_c, Apop_cv, Apop_col_t, Apop_mcv
#define Apop_col_tv (   m,
  col,
 
)

After this call, v will hold a gsl_vector view of the apop_data set m. The view will consist only of the column with name col_name. Unlike Apop_cv, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_cs, Apop_c, Apop_cv, Apop_col_t, Apop_mcv
#define Apop_col_tv (   m,
  col,
 
)

After this call, v will hold a gsl_vector view of the apop_data set m. The view will consist only of the column with name col_name. Unlike Apop_cv, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_cs, Apop_c, Apop_cv, Apop_col_t, Apop_mcv
#define Apop_cs (   d,
  colnum,
  len 
)

A macro to generate a temporary view of apop_data set d including only certain columns, beginning at column col and having length len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_c, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_cs (   d,
  colnum,
  len 
)

A macro to generate a temporary view of apop_data set d including only certain columns, beginning at column col and having length len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_c, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_cs (   d,
  colnum,
  len 
)

A macro to generate a temporary view of apop_data set d including only certain columns, beginning at column col and having length len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_c, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_cs (   d,
  colnum,
  len 
)

A macro to generate a temporary view of apop_data set d including only certain columns, beginning at column col and having length len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_c, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_cv (   data_to_view,
  col 
)

A macro to generate a temporary one-column view of the matrix in an apop_data set d, pulling out only column col. The view is a gsl_vector set.

As usual, column -1 is the vector element of the apop_data set.

1 gsl_vector *v = Apop_cv(your_data, i);
2 
3 for (int i=0; i< your_data->matrix->size2; i++)
4  printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_c(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_cs, Apop_c, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_cv (   data_to_view,
  col 
)

A macro to generate a temporary one-column view of the matrix in an apop_data set d, pulling out only column col. The view is a gsl_vector set.

As usual, column -1 is the vector element of the apop_data set.

1 gsl_vector *v = Apop_cv(your_data, i);
2 
3 for (int i=0; i< your_data->matrix->size2; i++)
4  printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_c(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_cs, Apop_c, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_cv (   data_to_view,
  col 
)

A macro to generate a temporary one-column view of the matrix in an apop_data set d, pulling out only column col. The view is a gsl_vector set.

As usual, column -1 is the vector element of the apop_data set.

1 gsl_vector *v = Apop_cv(your_data, i);
2 
3 for (int i=0; i< your_data->matrix->size2; i++)
4  printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_c(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_cs, Apop_c, Apop_col_tv, Apop_col_t, Apop_mcv
#define Apop_cv (   data_to_view,
  col 
)

A macro to generate a temporary one-column view of the matrix in an apop_data set d, pulling out only column col. The view is a gsl_vector set.

As usual, column -1 is the vector element of the apop_data set.

1 gsl_vector *v = Apop_cv(your_data, i);
2 
3 for (int i=0; i< your_data->matrix->size2; i++)
4  printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_c(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_cs, Apop_c, Apop_col_tv, Apop_col_t, Apop_mcv
#define apop_data_add_names (   dataset,
  type,
  ... 
)

Add a list of names to a data set.

  • Use this with a list of names that you type in yourself, like
    1 apop_data_add_names(mydata, 'c', "age", "sex", "height");
    Notice the lack of curly braces around the list.
  • You may have an array of names, probably autogenerated, that you would like to add. In this case, make certain that the last element of the array is NULL, and call the base function:
    1 char **[] colnames = {"age", "sex", "height", NULL};
    2 apop_data_add_names_base(mydata, 'c', colnames);
    But if you forget the NULL marker, this has good odds of segfaulting. You may prefer to use a for loop that inserts each name in turn using apop_name_add.
See also
apop_name_add, although apop_data_add_names will be more useful in most cases.
#define apop_data_add_names (   dataset,
  type,
  ... 
)

Add a list of names to a data set.

  • Use this with a list of names that you type in yourself, like
    1 apop_data_add_names(mydata, 'c', "age", "sex", "height");
    Notice the lack of curly braces around the list.
  • You may have an array of names, probably autogenerated, that you would like to add. In this case, make certain that the last element of the array is NULL, and call the base function:
    1 char **[] colnames = {"age", "sex", "height", NULL};
    2 apop_data_add_names_base(mydata, 'c', colnames);
    But if you forget the NULL marker, this has good odds of segfaulting. You may prefer to use a for loop that inserts each name in turn using apop_name_add.
See also
apop_name_add, although apop_data_add_names will be more useful in most cases.
#define apop_data_add_names (   dataset,
  type,
  ... 
)

Add a list of names to a data set.

  • Use this with a list of names that you type in yourself, like
    1 apop_data_add_names(mydata, 'c', "age", "sex", "height");
    Notice the lack of curly braces around the list.
  • You may have an array of names, probably autogenerated, that you would like to add. In this case, make certain that the last element of the array is NULL, and call the base function:
    1 char **[] colnames = {"age", "sex", "height", NULL};
    2 apop_data_add_names_base(mydata, 'c', colnames);
    But if you forget the NULL marker, this has good odds of segfaulting. You may prefer to use a for loop that inserts each name in turn using apop_name_add.
See also
apop_name_add, although apop_data_add_names will be more useful in most cases.
#define apop_data_add_names (   dataset,
  type,
  ... 
)

Add a list of names to a data set.

  • Use this with a list of names that you type in yourself, like
    1 apop_data_add_names(mydata, 'c', "age", "sex", "height");
    Notice the lack of curly braces around the list.
  • You may have an array of names, probably autogenerated, that you would like to add. In this case, make certain that the last element of the array is NULL, and call the base function:
    1 char **[] colnames = {"age", "sex", "height", NULL};
    2 apop_data_add_names_base(mydata, 'c', colnames);
    But if you forget the NULL marker, this has good odds of segfaulting. You may prefer to use a for loop that inserts each name in turn using apop_name_add.
See also
apop_name_add, although apop_data_add_names will be more useful in most cases.
#define apop_data_free (   freeme)

Free an apop_data structure.

  • As with free(), it is safe to send in a NULL pointer (in which case the function does nothing).
  • If the more pointer is not NULL, I will free the pointed-to data set first. If you don't want to free data sets down the chain, set more=NULL before calling this.
  • This is actually a macro (that calls apop_data_free_base). It sets freeme to NULL when it's done, because there's nothing safe you can do with the freed location, and you can later safely test conditions like if (data) ....
#define apop_data_free (   freeme)

Free an apop_data structure.

  • As with free(), it is safe to send in a NULL pointer (in which case the function does nothing).
  • If the more pointer is not NULL, I will free the pointed-to data set first. If you don't want to free data sets down the chain, set more=NULL before calling this.
  • This is actually a macro (that calls apop_data_free_base). It sets freeme to NULL when it's done, because there's nothing safe you can do with the freed location, and you can later safely test conditions like if (data) ....
#define apop_data_free (   freeme)

Free an apop_data structure.

  • As with free(), it is safe to send in a NULL pointer (in which case the function does nothing).
  • If the more pointer is not NULL, I will free the pointed-to data set first. If you don't want to free data sets down the chain, set more=NULL before calling this.
  • This is actually a macro (that calls apop_data_free_base). It sets freeme to NULL when it's done, because there's nothing safe you can do with the freed location, and you can later safely test conditions like if (data) ....
#define apop_data_free (   freeme)

Free an apop_data structure.

  • As with free(), it is safe to send in a NULL pointer (in which case the function does nothing).
  • If the more pointer is not NULL, I will free the pointed-to data set first. If you don't want to free data sets down the chain, set more=NULL before calling this.
  • This is actually a macro (that calls apop_data_free_base). It sets freeme to NULL when it's done, because there's nothing safe you can do with the freed location, and you can later safely test conditions like if (data) ....
#define apop_gaussian

Alias for the apop_normal distribution, qv.

#define apop_gaussian

Alias for the apop_normal distribution, qv.

#define apop_gaussian

Alias for the apop_normal distribution, qv.

#define apop_gaussian

Alias for the apop_normal distribution, qv.

#define Apop_mcv (   matrix_to_view,
  col 
)

Get a vector view of a single column of a gsl_matrix.

Parameters
matrix_to_vewA gsl_matrix.
rowAn integer giving the column to be viewed.
Returns
A gsl_vector view of the given column. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.
1 gsl_matrix *m = apop_query_to_data("select col1, col2, col3 from data")->matrix;
2 printf("The correlation coefficient between columns two "
3  "and three is %g.\n", apop_vector_correlation(Apop_mcv(m, 2), Apop_mcv(m, 3)));
See also
Apop_r, Apop_cv
#define Apop_mcv (   matrix_to_view,
  col 
)

Get a vector view of a single column of a gsl_matrix.

Parameters
matrix_to_vewA gsl_matrix.
rowAn integer giving the column to be viewed.
Returns
A gsl_vector view of the given column. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.
1 gsl_matrix *m = apop_query_to_data("select col1, col2, col3 from data")->matrix;
2 printf("The correlation coefficient between columns two "
3  "and three is %g.\n", apop_vector_correlation(Apop_mcv(m, 2), Apop_mcv(m, 3)));
See also
Apop_r, Apop_cv
#define Apop_mcv (   matrix_to_view,
  col 
)

Get a vector view of a single column of a gsl_matrix.

Parameters
matrix_to_vewA gsl_matrix.
rowAn integer giving the column to be viewed.
Returns
A gsl_vector view of the given column. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.
1 gsl_matrix *m = apop_query_to_data("select col1, col2, col3 from data")->matrix;
2 printf("The correlation coefficient between columns two "
3  "and three is %g.\n", apop_vector_correlation(Apop_mcv(m, 2), Apop_mcv(m, 3)));
See also
Apop_r, Apop_cv
#define Apop_mcv (   matrix_to_view,
  col 
)

Get a vector view of a single column of a gsl_matrix.

Parameters
matrix_to_vewA gsl_matrix.
rowAn integer giving the column to be viewed.
Returns
A gsl_vector view of the given column. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.
1 gsl_matrix *m = apop_query_to_data("select col1, col2, col3 from data")->matrix;
2 printf("The correlation coefficient between columns two "
3  "and three is %g.\n", apop_vector_correlation(Apop_mcv(m, 2), Apop_mcv(m, 3)));
See also
Apop_r, Apop_cv
#define apop_model_copy_set (   model,
  type,
  ... 
)

Copy a model and add a settings group. Useful for models that require a settings group to function. See Apop_settings_add_group.

Returns
A pointer to the newly-prepped model.
#define apop_model_copy_set (   model,
  type,
  ... 
)

Copy a model and add a settings group. Useful for models that require a settings group to function. See Apop_settings_add_group.

Returns
A pointer to the newly-prepped model.
#define apop_model_copy_set (   model,
  type,
  ... 
)

Copy a model and add a settings group. Useful for models that require a settings group to function. See Apop_settings_add_group.

Returns
A pointer to the newly-prepped model.
#define apop_model_copy_set (   model,
  type,
  ... 
)

Copy a model and add a settings group. Useful for models that require a settings group to function. See Apop_settings_add_group.

Returns
A pointer to the newly-prepped model.
#define apop_model_cross (   ...)

Generate a model consisting of the cross product of several independent models. The output apop_model is a copy of apop_cross; see that model's documentation for details.

  • If you input only one model, return a copy of that model; print a warning iff apop_opts.verbose >= 2.
Exceptions
error=='n'First model input is NULL.

Examples:

#include <apop.h>
/* In this initial example, build a cross product of two Normal(2,.1) distributions.
Make 10,000 draws from it.
Then, build a cross product of two unparameterized Normals and estimate the parameters
of the combined model; check that they match the (2, .1) we started with.
*/
void cross_normals(){
double mu = 2;
double sigma = .1;
apop_model *n1 = apop_model_set_parameters(apop_normal, mu, sigma);
apop_model *two_independent_normals = apop_model_cross(n1, n2);
//
//We don't use it, but the cross product of three is just as easy:
apop_model *three_independent_normals = apop_model_cross(n1, n2, n3);
apop_data *draws = apop_model_draws(two_independent_normals, .count=10000);
//The unparameterized cross product:
apop_model_copy(apop_normal),
apop_model_copy(apop_normal)
);
apop_model *estimated_norms = apop_estimate(draws, two_n);
apop_model_print(estimated_norms);
apop_data *estp1 = Apop_settings_get(estimated_norms, apop_cross, model1)->parameters;
apop_data *estp2 = Apop_settings_get(estimated_norms, apop_cross, model2)->parameters;
assert(fabs(apop_data_get(estp1, 0) - mu) < 2e-3);
assert(fabs(apop_data_get(estp2, 0) - mu) < 2e-3);
assert(fabs(apop_data_get(estp1, 1) - sigma) < 2e-3);
assert(fabs(apop_data_get(estp2, 1) - sigma) < 2e-3);
}
//bind together a Poisson and a Normal
void norm_cross_poisson(){
apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
int len = 1e5;
apop_data *draws = apop_model_draws(mm, len);
for (int i=0; i< len; i++){
Apop_row_v(draws, i, onev);
assert((int)onev->data[0] == onev->data[0]);
assert(onev->data[1]<0);
}
/*The rest of the test script recovers the parameters.
Input data to an apop_cross model can take two formats. In cross_normals, the
draws are in a single matrix. Here, the data for the Poisson (col 0 of the draws)
will be put in an apop_data set, and the data for the Normal (col 1 of the draws)
on a second page appended to the first. Then, set the .splitpage element of the
apop_cross settings group to the name of the second page.
*/
apop_data *comeback = apop_data_alloc();
comeback->vector = apop_vector_copy(Apop_cv(draws, 0));
apop_data_add_page(comeback, apop_data_alloc(), "p2");
comeback->more->vector = apop_vector_copy(Apop_cv(draws, 1));
//set up the un-parameterized crossed model, including
//the name at which to split the data set
Apop_settings_add(estme, apop_cross, splitpage, "p2");
apop_model *ested = apop_estimate(comeback, estme);
//test that the parameters are as promised.
apop_model *m1back = apop_settings_get(ested, apop_cross, model1);
apop_model *m2back = apop_settings_get(ested, apop_cross, model2);
assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 5e-1);
assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 5e-1);
assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 5e-1);
//You can cross as many models as you'd like.
apop_model *m3 = apop_model_set_parameters(apop_poisson, 8);
apop_model *mmm = apop_model_cross(m1, m2, m3);
assert(fabs(apop_data_get(sum, .row=0, .colname="mean") - 3) < 2e-2);
assert(fabs(apop_data_get(sum, .row=1, .colname="mean") - -5) < 2e-2);
assert(fabs(apop_data_get(sum, .row=2, .colname="mean") - 8) < 4e-2);
assert(apop_data_get(sum, .row=0, .colname="median") == 3);
assert(apop_data_get(sum, .row=2, .colname="median") == 8);
}
int main(){
cross_normals();
norm_cross_poisson();
}
#define apop_model_cross (   ...)

Generate a model consisting of the cross product of several independent models. The output apop_model is a copy of apop_cross; see that model's documentation for details.

  • If you input only one model, return a copy of that model; print a warning iff apop_opts.verbose >= 2.
Exceptions
error=='n'First model input is NULL.

Examples:

#include <apop.h>
/* In this initial example, build a cross product of two Normal(2,.1) distributions.
Make 10,000 draws from it.
Then, build a cross product of two unparameterized Normals and estimate the parameters
of the combined model; check that they match the (2, .1) we started with.
*/
void cross_normals(){
double mu = 2;
double sigma = .1;
apop_model *n1 = apop_model_set_parameters(apop_normal, mu, sigma);
apop_model *two_independent_normals = apop_model_cross(n1, n2);
//
//We don't use it, but the cross product of three is just as easy:
apop_model *three_independent_normals = apop_model_cross(n1, n2, n3);
apop_data *draws = apop_model_draws(two_independent_normals, .count=10000);
//The unparameterized cross product:
apop_model_copy(apop_normal),
apop_model_copy(apop_normal)
);
apop_model *estimated_norms = apop_estimate(draws, two_n);
apop_model_print(estimated_norms);
apop_data *estp1 = Apop_settings_get(estimated_norms, apop_cross, model1)->parameters;
apop_data *estp2 = Apop_settings_get(estimated_norms, apop_cross, model2)->parameters;
assert(fabs(apop_data_get(estp1, 0) - mu) < 2e-3);
assert(fabs(apop_data_get(estp2, 0) - mu) < 2e-3);
assert(fabs(apop_data_get(estp1, 1) - sigma) < 2e-3);
assert(fabs(apop_data_get(estp2, 1) - sigma) < 2e-3);
}
//bind together a Poisson and a Normal
void norm_cross_poisson(){
apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
int len = 1e5;
apop_data *draws = apop_model_draws(mm, len);
for (int i=0; i< len; i++){
Apop_row_v(draws, i, onev);
assert((int)onev->data[0] == onev->data[0]);
assert(onev->data[1]<0);
}
/*The rest of the test script recovers the parameters.
Input data to an apop_cross model can take two formats. In cross_normals, the
draws are in a single matrix. Here, the data for the Poisson (col 0 of the draws)
will be put in an apop_data set, and the data for the Normal (col 1 of the draws)
on a second page appended to the first. Then, set the .splitpage element of the
apop_cross settings group to the name of the second page.
*/
apop_data *comeback = apop_data_alloc();
comeback->vector = apop_vector_copy(Apop_cv(draws, 0));
apop_data_add_page(comeback, apop_data_alloc(), "p2");
comeback->more->vector = apop_vector_copy(Apop_cv(draws, 1));
//set up the un-parameterized crossed model, including
//the name at which to split the data set
Apop_settings_add(estme, apop_cross, splitpage, "p2");
apop_model *ested = apop_estimate(comeback, estme);
//test that the parameters are as promised.
apop_model *m1back = apop_settings_get(ested, apop_cross, model1);
apop_model *m2back = apop_settings_get(ested, apop_cross, model2);
assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 5e-1);
assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 5e-1);
assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 5e-1);
//You can cross as many models as you'd like.
apop_model *m3 = apop_model_set_parameters(apop_poisson, 8);
apop_model *mmm = apop_model_cross(m1, m2, m3);
assert(fabs(apop_data_get(sum, .row=0, .colname="mean") - 3) < 2e-2);
assert(fabs(apop_data_get(sum, .row=1, .colname="mean") - -5) < 2e-2);
assert(fabs(apop_data_get(sum, .row=2, .colname="mean") - 8) < 4e-2);
assert(apop_data_get(sum, .row=0, .colname="median") == 3);
assert(apop_data_get(sum, .row=2, .colname="median") == 8);
}
int main(){
cross_normals();
norm_cross_poisson();
}
#define apop_model_cross (   ...)

Generate a model consisting of the cross product of several independent models. The output apop_model is a copy of apop_cross; see that model's documentation for details.

  • If you input only one model, return a copy of that model; print a warning iff apop_opts.verbose >= 2.
Exceptions
error=='n'First model input is NULL.

Examples:

#include <apop.h>
/* In this initial example, build a cross product of two Normal(2,.1) distributions.
Make 10,000 draws from it.
Then, build a cross product of two unparameterized Normals and estimate the parameters
of the combined model; check that they match the (2, .1) we started with.
*/
void cross_normals(){
double mu = 2;
double sigma = .1;
apop_model *n1 = apop_model_set_parameters(apop_normal, mu, sigma);
apop_model *two_independent_normals = apop_model_cross(n1, n2);
//
//We don't use it, but the cross product of three is just as easy:
apop_model *three_independent_normals = apop_model_cross(n1, n2, n3);
apop_data *draws = apop_model_draws(two_independent_normals, .count=10000);
//The unparameterized cross product:
apop_model_copy(apop_normal),
apop_model_copy(apop_normal)
);
apop_model *estimated_norms = apop_estimate(draws, two_n);
apop_model_print(estimated_norms);
apop_data *estp1 = Apop_settings_get(estimated_norms, apop_cross, model1)->parameters;
apop_data *estp2 = Apop_settings_get(estimated_norms, apop_cross, model2)->parameters;
assert(fabs(apop_data_get(estp1, 0) - mu) < 2e-3);
assert(fabs(apop_data_get(estp2, 0) - mu) < 2e-3);
assert(fabs(apop_data_get(estp1, 1) - sigma) < 2e-3);
assert(fabs(apop_data_get(estp2, 1) - sigma) < 2e-3);
}
//bind together a Poisson and a Normal
void norm_cross_poisson(){
apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
int len = 1e5;
apop_data *draws = apop_model_draws(mm, len);
for (int i=0; i< len; i++){
Apop_row_v(draws, i, onev);
assert((int)onev->data[0] == onev->data[0]);
assert(onev->data[1]<0);
}
/*The rest of the test script recovers the parameters.
Input data to an apop_cross model can take two formats. In cross_normals, the
draws are in a single matrix. Here, the data for the Poisson (col 0 of the draws)
will be put in an apop_data set, and the data for the Normal (col 1 of the draws)
on a second page appended to the first. Then, set the .splitpage element of the
apop_cross settings group to the name of the second page.
*/
apop_data *comeback = apop_data_alloc();
comeback->vector = apop_vector_copy(Apop_cv(draws, 0));
apop_data_add_page(comeback, apop_data_alloc(), "p2");
comeback->more->vector = apop_vector_copy(Apop_cv(draws, 1));
//set up the un-parameterized crossed model, including
//the name at which to split the data set
Apop_settings_add(estme, apop_cross, splitpage, "p2");
apop_model *ested = apop_estimate(comeback, estme);
//test that the parameters are as promised.
apop_model *m1back = apop_settings_get(ested, apop_cross, model1);
apop_model *m2back = apop_settings_get(ested, apop_cross, model2);
assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 5e-1);
assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 5e-1);
assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 5e-1);
//You can cross as many models as you'd like.
apop_model *m3 = apop_model_set_parameters(apop_poisson, 8);
apop_model *mmm = apop_model_cross(m1, m2, m3);
assert(fabs(apop_data_get(sum, .row=0, .colname="mean") - 3) < 2e-2);
assert(fabs(apop_data_get(sum, .row=1, .colname="mean") - -5) < 2e-2);
assert(fabs(apop_data_get(sum, .row=2, .colname="mean") - 8) < 4e-2);
assert(apop_data_get(sum, .row=0, .colname="median") == 3);
assert(apop_data_get(sum, .row=2, .colname="median") == 8);
}
int main(){
cross_normals();
norm_cross_poisson();
}
#define apop_model_cross (   ...)

Generate a model consisting of the cross product of several independent models. The output apop_model is a copy of apop_cross; see that model's documentation for details.

  • If you input only one model, return a copy of that model; print a warning iff apop_opts.verbose >= 2.
Exceptions
error=='n'First model input is NULL.

Examples:

#include <apop.h>
/* In this initial example, build a cross product of two Normal(2,.1) distributions.
Make 10,000 draws from it.
Then, build a cross product of two unparameterized Normals and estimate the parameters
of the combined model; check that they match the (2, .1) we started with.
*/
void cross_normals(){
double mu = 2;
double sigma = .1;
apop_model *n1 = apop_model_set_parameters(apop_normal, mu, sigma);
apop_model *two_independent_normals = apop_model_cross(n1, n2);
//
//We don't use it, but the cross product of three is just as easy:
apop_model *three_independent_normals = apop_model_cross(n1, n2, n3);
apop_data *draws = apop_model_draws(two_independent_normals, .count=10000);
//The unparameterized cross product:
apop_model_copy(apop_normal),
apop_model_copy(apop_normal)
);
apop_model *estimated_norms = apop_estimate(draws, two_n);
apop_model_print(estimated_norms);
apop_data *estp1 = Apop_settings_get(estimated_norms, apop_cross, model1)->parameters;
apop_data *estp2 = Apop_settings_get(estimated_norms, apop_cross, model2)->parameters;
assert(fabs(apop_data_get(estp1, 0) - mu) < 2e-3);
assert(fabs(apop_data_get(estp2, 0) - mu) < 2e-3);
assert(fabs(apop_data_get(estp1, 1) - sigma) < 2e-3);
assert(fabs(apop_data_get(estp2, 1) - sigma) < 2e-3);
}
//bind together a Poisson and a Normal
void norm_cross_poisson(){
apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
int len = 1e5;
apop_data *draws = apop_model_draws(mm, len);
for (int i=0; i< len; i++){
Apop_row_v(draws, i, onev);
assert((int)onev->data[0] == onev->data[0]);
assert(onev->data[1]<0);
}
/*The rest of the test script recovers the parameters.
Input data to an apop_cross model can take two formats. In cross_normals, the
draws are in a single matrix. Here, the data for the Poisson (col 0 of the draws)
will be put in an apop_data set, and the data for the Normal (col 1 of the draws)
on a second page appended to the first. Then, set the .splitpage element of the
apop_cross settings group to the name of the second page.
*/
apop_data *comeback = apop_data_alloc();
comeback->vector = apop_vector_copy(Apop_cv(draws, 0));
apop_data_add_page(comeback, apop_data_alloc(), "p2");
comeback->more->vector = apop_vector_copy(Apop_cv(draws, 1));
//set up the un-parameterized crossed model, including
//the name at which to split the data set
Apop_settings_add(estme, apop_cross, splitpage, "p2");
apop_model *ested = apop_estimate(comeback, estme);
//test that the parameters are as promised.
apop_model *m1back = apop_settings_get(ested, apop_cross, model1);
apop_model *m2back = apop_settings_get(ested, apop_cross, model2);
assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 5e-1);
assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 5e-1);
assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 5e-1);
//You can cross as many models as you'd like.
apop_model *m3 = apop_model_set_parameters(apop_poisson, 8);
apop_model *mmm = apop_model_cross(m1, m2, m3);
assert(fabs(apop_data_get(sum, .row=0, .colname="mean") - 3) < 2e-2);
assert(fabs(apop_data_get(sum, .row=1, .colname="mean") - -5) < 2e-2);
assert(fabs(apop_data_get(sum, .row=2, .colname="mean") - 8) < 4e-2);
assert(apop_data_get(sum, .row=0, .colname="median") == 3);
assert(apop_data_get(sum, .row=2, .colname="median") == 8);
}
int main(){
cross_normals();
norm_cross_poisson();
}
#define apop_model_mixture (   ...)

Produce a model as a linear combination of other models. See the documentation for the apop_mixture model.

Parameters
...A list of models, either all parameterized or all unparameterized. See examples in the apop_mixture documentation.
#define apop_model_mixture (   ...)

Produce a model as a linear combination of other models. See the documentation for the apop_mixture model.

Parameters
...A list of models, either all parameterized or all unparameterized. See examples in the apop_mixture documentation.
#define apop_model_mixture (   ...)

Produce a model as a linear combination of other models. See the documentation for the apop_mixture model.

Parameters
...A list of models, either all parameterized or all unparameterized. See examples in the apop_mixture documentation.
#define apop_model_mixture (   ...)

Produce a model as a linear combination of other models. See the documentation for the apop_mixture model.

Parameters
...A list of models, either all parameterized or all unparameterized. See examples in the apop_mixture documentation.
#define Apop_model_set_settings (   model,
  ... 
)

This is the complement to apop_model_set_parameters, for those models that are set up by adding settings group, rather than filling in a list of parameters.

For example, the apop_kernel_density model is built by adding a apop_kernel_density_settings group. From the example on the apop_kernel_density page:

1 apop_model *k2 = apop_model_set_settings(apop_kernel_density,
2  .base_data=d,
3  .set_fn = set_uniform_edges,
4  .kernel = apop_uniform);

The name of the model and the settings group to be built must match, which is the case for many model transformations, including apop_dconstrain and apop_cross. If the names do not match, use apop_model_copy_set.

#define Apop_model_set_settings (   model,
  ... 
)

This is the complement to apop_model_set_parameters, for those models that are set up by adding settings group, rather than filling in a list of parameters.

For example, the apop_kernel_density model is built by adding a apop_kernel_density_settings group. From the example on the apop_kernel_density page:

1 apop_model *k2 = apop_model_set_settings(apop_kernel_density,
2  .base_data=d,
3  .set_fn = set_uniform_edges,
4  .kernel = apop_uniform);

The name of the model and the settings group to be built must match, which is the case for many model transformations, including apop_dconstrain and apop_cross. If the names do not match, use apop_model_copy_set.

#define Apop_model_set_settings (   model,
  ... 
)

This is the complement to apop_model_set_parameters, for those models that are set up by adding settings group, rather than filling in a list of parameters.

For example, the apop_kernel_density model is built by adding a apop_kernel_density_settings group. From the example on the apop_kernel_density page:

1 apop_model *k2 = apop_model_set_settings(apop_kernel_density,
2  .base_data=d,
3  .set_fn = set_uniform_edges,
4  .kernel = apop_uniform);

The name of the model and the settings group to be built must match, which is the case for many model transformations, including apop_dconstrain and apop_cross. If the names do not match, use apop_model_copy_set.

#define Apop_model_set_settings (   model,
  ... 
)

This is the complement to apop_model_set_parameters, for those models that are set up by adding settings group, rather than filling in a list of parameters.

For example, the apop_kernel_density model is built by adding a apop_kernel_density_settings group. From the example on the apop_kernel_density page:

1 apop_model *k2 = apop_model_set_settings(apop_kernel_density,
2  .base_data=d,
3  .set_fn = set_uniform_edges,
4  .kernel = apop_uniform);

The name of the model and the settings group to be built must match, which is the case for many model transformations, including apop_dconstrain and apop_cross. If the names do not match, use apop_model_copy_set.

#define Apop_mrv (   matrix_to_view,
  row 
)

Get a vector view of a single row of a gsl_matrix.

Parameters
matrix_to_vewA gsl_matrix.
rowAn integer giving the row to be viewed.
Returns
A gsl_vector view of the given row. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See apop_vector_correlation for an example of use.

See also
Apop_r, Apop_rv
#define Apop_mrv (   matrix_to_view,
  row 
)

Get a vector view of a single row of a gsl_matrix.

Parameters
matrix_to_vewA gsl_matrix.
rowAn integer giving the row to be viewed.
Returns
A gsl_vector view of the given row. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See apop_vector_correlation for an example of use.

See also
Apop_r, Apop_rv
#define Apop_mrv (   matrix_to_view,
  row 
)

Get a vector view of a single row of a gsl_matrix.

Parameters
matrix_to_vewA gsl_matrix.
rowAn integer giving the row to be viewed.
Returns
A gsl_vector view of the given row. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See apop_vector_correlation for an example of use.

See also
Apop_r, Apop_rv
#define Apop_mrv (   matrix_to_view,
  row 
)

Get a vector view of a single row of a gsl_matrix.

Parameters
matrix_to_vewA gsl_matrix.
rowAn integer giving the row to be viewed.
Returns
A gsl_vector view of the given row. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See apop_vector_correlation for an example of use.

See also
Apop_r, Apop_rv
#define Apop_notify (   verbosity,
  ... 
)

Notify the user of errors, warning, or debug info.

writes to apop_opts.log_file, which is a FILE handle. The default is stderr, but use fopen to attach to a file.

Parameters
verbosityAt what verbosity level should the user be warned? E.g., if level==2, then print iff apop_opts.verbosity >= 2.
...The message to write to the log (presuming the verbosity level is high enough). This can be a printf-style format with following arguments, e.g., apop_notify(0, "Beta is currently %g", beta).
#define Apop_notify (   verbosity,
  ... 
)

Notify the user of errors, warning, or debug info.

writes to apop_opts.log_file, which is a FILE handle. The default is stderr, but use fopen to attach to a file.

Parameters
verbosityAt what verbosity level should the user be warned? E.g., if level==2, then print iff apop_opts.verbosity >= 2.
...The message to write to the log (presuming the verbosity level is high enough). This can be a printf-style format with following arguments, e.g., apop_notify(0, "Beta is currently %g", beta).
#define Apop_notify (   verbosity,
  ... 
)

Notify the user of errors, warning, or debug info.

writes to apop_opts.log_file, which is a FILE handle. The default is stderr, but use fopen to attach to a file.

Parameters
verbosityAt what verbosity level should the user be warned? E.g., if level==2, then print iff apop_opts.verbosity >= 2.
...The message to write to the log (presuming the verbosity level is high enough). This can be a printf-style format with following arguments, e.g., apop_notify(0, "Beta is currently %g", beta).
#define Apop_notify (   verbosity,
  ... 
)

Notify the user of errors, warning, or debug info.

writes to apop_opts.log_file, which is a FILE handle. The default is stderr, but use fopen to attach to a file.

Parameters
verbosityAt what verbosity level should the user be warned? E.g., if level==2, then print iff apop_opts.verbosity >= 2.
...The message to write to the log (presuming the verbosity level is high enough). This can be a printf-style format with following arguments, e.g., apop_notify(0, "Beta is currently %g", beta).
#define Apop_r (   d,
  rownum 
)

A macro to generate a temporary one-row view of apop_data set d, pulling out only row row. The view is also an apop_data set, with names and other decorations.

1 //pull a single row
2 apop_data *v = Apop_r(your_data, 7);
3 
4 //or loop through a sequence of one-row data sets.
5 apop_model *std = apop_model_set_parameters(apop_normal, 0, 1);
6 for (int i=0; i< your_data->matrix->size1; i++)
7  printf("Std Normal CDF up to observation %i is %g\n",
8  i, apop_cdf(Apop_r(your_data, i), std));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_rs, Apop_row_v, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_r (   d,
  rownum 
)

A macro to generate a temporary one-row view of apop_data set d, pulling out only row row. The view is also an apop_data set, with names and other decorations.

1 //pull a single row
2 apop_data *v = Apop_r(your_data, 7);
3 
4 //or loop through a sequence of one-row data sets.
5 apop_model *std = apop_model_set_parameters(apop_normal, 0, 1);
6 for (int i=0; i< your_data->matrix->size1; i++)
7  printf("Std Normal CDF up to observation %i is %g\n",
8  i, apop_cdf(Apop_r(your_data, i), std));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_rs, Apop_row_v, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_r (   d,
  rownum 
)

A macro to generate a temporary one-row view of apop_data set d, pulling out only row row. The view is also an apop_data set, with names and other decorations.

1 //pull a single row
2 apop_data *v = Apop_r(your_data, 7);
3 
4 //or loop through a sequence of one-row data sets.
5 apop_model *std = apop_model_set_parameters(apop_normal, 0, 1);
6 for (int i=0; i< your_data->matrix->size1; i++)
7  printf("Std Normal CDF up to observation %i is %g\n",
8  i, apop_cdf(Apop_r(your_data, i), std));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_rs, Apop_row_v, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_r (   d,
  rownum 
)

A macro to generate a temporary one-row view of apop_data set d, pulling out only row row. The view is also an apop_data set, with names and other decorations.

1 //pull a single row
2 apop_data *v = Apop_r(your_data, 7);
3 
4 //or loop through a sequence of one-row data sets.
5 apop_model *std = apop_model_set_parameters(apop_normal, 0, 1);
6 for (int i=0; i< your_data->matrix->size1; i++)
7  printf("Std Normal CDF up to observation %i is %g\n",
8  i, apop_cdf(Apop_r(your_data, i), std));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_rs, Apop_row_v, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_row_t (   d,
  rowname,
  outd 
)

After this call, v will hold an apop_data view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_r, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_rs, Apop_r, Apop_rv, Apop_row_tv, Apop_mrv
#define Apop_row_t (   d,
  rowname,
  outd 
)

After this call, v will hold an apop_data view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_r, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_rs, Apop_r, Apop_rv, Apop_row_tv, Apop_mrv
#define Apop_row_t (   d,
  rowname,
  outd 
)

After this call, v will hold an apop_data view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_r, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_rs, Apop_r, Apop_rv, Apop_row_tv, Apop_mrv
#define Apop_row_t (   d,
  rowname,
  outd 
)

After this call, v will hold an apop_data view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_r, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_rs, Apop_r, Apop_rv, Apop_row_tv, Apop_mrv
#define Apop_row_tv (   m,
  row,
 
)

After this call, v will hold a gsl_vector view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_rv, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_rs, Apop_r, Apop_rv, Apop_row_t, Apop_mrv
#define Apop_row_tv (   m,
  row,
 
)

After this call, v will hold a gsl_vector view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_rv, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_rs, Apop_r, Apop_rv, Apop_row_t, Apop_mrv
#define Apop_row_tv (   m,
  row,
 
)

After this call, v will hold a gsl_vector view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_rv, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_rs, Apop_r, Apop_rv, Apop_row_t, Apop_mrv
#define Apop_row_tv (   m,
  row,
 
)

After this call, v will hold a gsl_vector view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_rv, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also
Apop_rs, Apop_r, Apop_rv, Apop_row_t, Apop_mrv
#define Apop_rs (   d,
  rownum,
  len 
)

A macro to generate a temporary view of apop_data set d pulling only certain rows, beginning at row row and having height len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_rs (   d,
  rownum,
  len 
)

A macro to generate a temporary view of apop_data set d pulling only certain rows, beginning at row row and having height len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_rs (   d,
  rownum,
  len 
)

A macro to generate a temporary view of apop_data set d pulling only certain rows, beginning at row row and having height len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_rs (   d,
  rownum,
  len 
)

A macro to generate a temporary view of apop_data set d pulling only certain rows, beginning at row row and having height len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_rv (   data_to_view,
  row 
)

A macro to generate a temporary one-row view of the matrix in an apop_data set d, pulling out only row row. The view is a gsl_vector set.

1 gsl_vector *v = Apop_rv(your_data, i);
2 
3 for (int i=0; i< your_data->matrix->size1; i++)
4  printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_r(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_rv (   data_to_view,
  row 
)

A macro to generate a temporary one-row view of the matrix in an apop_data set d, pulling out only row row. The view is a gsl_vector set.

1 gsl_vector *v = Apop_rv(your_data, i);
2 
3 for (int i=0; i< your_data->matrix->size1; i++)
4  printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_r(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_rv (   data_to_view,
  row 
)

A macro to generate a temporary one-row view of the matrix in an apop_data set d, pulling out only row row. The view is a gsl_vector set.

1 gsl_vector *v = Apop_rv(your_data, i);
2 
3 for (int i=0; i< your_data->matrix->size1; i++)
4  printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_r(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_rv (   data_to_view,
  row 
)

A macro to generate a temporary one-row view of the matrix in an apop_data set d, pulling out only row row. The view is a gsl_vector set.

1 gsl_vector *v = Apop_rv(your_data, i);
2 
3 for (int i=0; i< your_data->matrix->size1; i++)
4  printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_r(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also
Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv
#define Apop_settings_add_group (   model,
  type,
  ... 
)

Add a settings group. The first two arguments (the model you are attaching to and the settings group name) are mandatory, and then you can use the Designated initializers syntax to specify default values (if any).

Returns
A pointer to the newly-prepped group.

See Settings groups, Optimization, or Apop_settting_set for examples.

  • If a settings group of the given type is already attached to the model, the previous version is removed. Use Apop_settings_get to check whether a group of the given type is already attached to a model, and Apop_settings_set to modify an existing group.
#define Apop_settings_add_group (   model,
  type,
  ... 
)

Add a settings group. The first two arguments (the model you are attaching to and the settings group name) are mandatory, and then you can use the Designated initializers syntax to specify default values (if any).

Returns
A pointer to the newly-prepped group.

See Settings groups, Optimization, or Apop_settting_set for examples.

  • If a settings group of the given type is already attached to the model, the previous version is removed. Use Apop_settings_get to check whether a group of the given type is already attached to a model, and Apop_settings_set to modify an existing group.
#define Apop_settings_add_group (   model,
  type,
  ... 
)

Add a settings group. The first two arguments (the model you are attaching to and the settings group name) are mandatory, and then you can use the Designated initializers syntax to specify default values (if any).

Returns
A pointer to the newly-prepped group.

See Settings groups, Optimization, or Apop_settting_set for examples.

  • If a settings group of the given type is already attached to the model, the previous version is removed. Use Apop_settings_get to check whether a group of the given type is already attached to a model, and Apop_settings_set to modify an existing group.
#define Apop_settings_add_group (   model,
  type,
  ... 
)

Add a settings group. The first two arguments (the model you are attaching to and the settings group name) are mandatory, and then you can use the Designated initializers syntax to specify default values (if any).

Returns
A pointer to the newly-prepped group.

See Settings groups, Optimization, or Apop_settting_set for examples.

  • If a settings group of the given type is already attached to the model, the previous version is removed. Use Apop_settings_get to check whether a group of the given type is already attached to a model, and Apop_settings_set to modify an existing group.
#define Apop_settings_copy (   name,
  ... 
)

A convenience macro for declaring the copy function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_copy (   name,
  ... 
)

A convenience macro for declaring the copy function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_copy (   name,
  ... 
)

A convenience macro for declaring the copy function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_copy (   name,
  ... 
)

A convenience macro for declaring the copy function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_declarations (   ysg)

Put this in your header file to declare the init, copy, and free functions for ysg_settings. Of course, these functions will also have to be defined in a .c file using Apop_settings_init, Apop_settings_copy, and Apop_settings_free.

#define Apop_settings_declarations (   ysg)

Put this in your header file to declare the init, copy, and free functions for ysg_settings. Of course, these functions will also have to be defined in a .c file using Apop_settings_init, Apop_settings_copy, and Apop_settings_free.

#define Apop_settings_declarations (   ysg)

Put this in your header file to declare the init, copy, and free functions for ysg_settings. Of course, these functions will also have to be defined in a .c file using Apop_settings_init, Apop_settings_copy, and Apop_settings_free.

#define Apop_settings_declarations (   ysg)

Put this in your header file to declare the init, copy, and free functions for ysg_settings. Of course, these functions will also have to be defined in a .c file using Apop_settings_init, Apop_settings_copy, and Apop_settings_free.

#define Apop_settings_free (   name,
  ... 
)

A convenience macro for declaring the delete function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_free (   name,
  ... 
)

A convenience macro for declaring the delete function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_free (   name,
  ... 
)

A convenience macro for declaring the delete function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_free (   name,
  ... 
)

A convenience macro for declaring the delete function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_get (   model,
  type,
  setting 
)

Retrieves a setting from a model. See Apop_settings_get_group to pull the entire group.

Parameters
modelAn apop_model.
typeA string giving the type of the settings group you are retrieving, without the _settings ending. E.g., for an apop_mle_settings group, use apop_mle.
settingThe struct element you want to retrieve.
#define Apop_settings_get (   model,
  type,
  setting 
)

Retrieves a setting from a model. See Apop_settings_get_group to pull the entire group.

Parameters
modelAn apop_model.
typeA string giving the type of the settings group you are retrieving, without the _settings ending. E.g., for an apop_mle_settings group, use apop_mle.
settingThe struct element you want to retrieve.
#define Apop_settings_get (   model,
  type,
  setting 
)

Retrieves a setting from a model. See Apop_settings_get_group to pull the entire group.

Parameters
modelAn apop_model.
typeA string giving the type of the settings group you are retrieving, without the _settings ending. E.g., for an apop_mle_settings group, use apop_mle.
settingThe struct element you want to retrieve.
#define Apop_settings_get (   model,
  type,
  setting 
)

Retrieves a setting from a model. See Apop_settings_get_group to pull the entire group.

Parameters
modelAn apop_model.
typeA string giving the type of the settings group you are retrieving, without the _settings ending. E.g., for an apop_mle_settings group, use apop_mle.
settingThe struct element you want to retrieve.
#define Apop_settings_get_group (   m,
  type 
)

Retrieves a settings group from a model. See Apop_settings_get to just pull a single item from within the settings group.

This macro returns NULL if a group of type type_settings isn't found attached to model m, so you can easily put it in a conditional like

1 if (!apop_settings_get_group(m, "apop_ols")) ...
Parameters
mAn apop_model
typeA string giving the type of the settings group you are retrieving. E.g., for an apop_mle_settings group, use only apop_mle.
Returns
A void pointer to the desired struct (or NULL if not found).
#define Apop_settings_get_group (   m,
  type 
)

Retrieves a settings group from a model. See Apop_settings_get to just pull a single item from within the settings group.

This macro returns NULL if a group of type type_settings isn't found attached to model m, so you can easily put it in a conditional like

1 if (!apop_settings_get_group(m, "apop_ols")) ...
Parameters
mAn apop_model
typeA string giving the type of the settings group you are retrieving. E.g., for an apop_mle_settings group, use only apop_mle.
Returns
A void pointer to the desired struct (or NULL if not found).
#define Apop_settings_get_group (   m,
  type 
)

Retrieves a settings group from a model. See Apop_settings_get to just pull a single item from within the settings group.

This macro returns NULL if a group of type type_settings isn't found attached to model m, so you can easily put it in a conditional like

1 if (!apop_settings_get_group(m, "apop_ols")) ...
Parameters
mAn apop_model
typeA string giving the type of the settings group you are retrieving. E.g., for an apop_mle_settings group, use only apop_mle.
Returns
A void pointer to the desired struct (or NULL if not found).
#define Apop_settings_get_group (   m,
  type 
)

Retrieves a settings group from a model. See Apop_settings_get to just pull a single item from within the settings group.

This macro returns NULL if a group of type type_settings isn't found attached to model m, so you can easily put it in a conditional like

1 if (!apop_settings_get_group(m, "apop_ols")) ...
Parameters
mAn apop_model
typeA string giving the type of the settings group you are retrieving. E.g., for an apop_mle_settings group, use only apop_mle.
Returns
A void pointer to the desired struct (or NULL if not found).
#define Apop_settings_init (   name,
  ... 
)

A convenience macro for declaring the initialization function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_init (   name,
  ... 
)

A convenience macro for declaring the initialization function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_init (   name,
  ... 
)

A convenience macro for declaring the initialization function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_init (   name,
  ... 
)

A convenience macro for declaring the initialization function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_rm_group (   m,
  type 
)

Removes a settings group from a model's list.

  • If the so-named group is not found, do nothing.
#define Apop_settings_rm_group (   m,
  type 
)

Removes a settings group from a model's list.

  • If the so-named group is not found, do nothing.
#define Apop_settings_rm_group (   m,
  type 
)

Removes a settings group from a model's list.

  • If the so-named group is not found, do nothing.
#define Apop_settings_rm_group (   m,
  type 
)

Removes a settings group from a model's list.

  • If the so-named group is not found, do nothing.
#define Apop_settings_set (   model,
  type,
  setting,
  data 
)

Modifies a single element of a settings group to the given value.

For example,

1 //set up a mixture of two Normals. This function initializes an apop_mixture_settings group
2 apop_model *mix = apop_model_mixture(apop_model_copy(apop_normal), apop_model_copy(apop_normal));
3 
4 //Add an apop_mle_settings group to specify the search strategy
5 Apop_settings_add_group(mix, apop_mle, .starting_pt=(double[]){.5, .5, 50, 5, 80, 5},
6  .step_size=3, .tolerance=1e-6);
7 
8 //The mix model now has apop_mle and apop_mixture settings groups attached. Modify them:
9 Apop_settings_set(mix, apop_mixture, find_weights, 'y'); //Search for optimal mixture weights
10 Apop_settings_set(mix, apop_mle, method, "NM simplex"); //Nelder-Mead simplex algorithm
11 apop_model *optimal_mix = apop_estimate(input_data, mix); //Everything is set up, so do the search.
  • If model==NULL, fails silently.
  • If model!=NULL but the given settings group is not found attached to the model, set model->error='s'.
#define Apop_settings_set (   model,
  type,
  setting,
  data 
)

Modifies a single element of a settings group to the given value.

For example,

1 //set up a mixture of two Normals. This function initializes an apop_mixture_settings group
2 apop_model *mix = apop_model_mixture(apop_model_copy(apop_normal), apop_model_copy(apop_normal));
3 
4 //Add an apop_mle_settings group to specify the search strategy
5 Apop_settings_add_group(mix, apop_mle, .starting_pt=(double[]){.5, .5, 50, 5, 80, 5},
6  .step_size=3, .tolerance=1e-6);
7 
8 //The mix model now has apop_mle and apop_mixture settings groups attached. Modify them:
9 Apop_settings_set(mix, apop_mixture, find_weights, 'y'); //Search for optimal mixture weights
10 Apop_settings_set(mix, apop_mle, method, "NM simplex"); //Nelder-Mead simplex algorithm
11 apop_model *optimal_mix = apop_estimate(input_data, mix); //Everything is set up, so do the search.
  • If model==NULL, fails silently.
  • If model!=NULL but the given settings group is not found attached to the model, set model->error='s'.
#define Apop_settings_set (   model,
  type,
  setting,
  data 
)

Modifies a single element of a settings group to the given value.

For example,

1 //set up a mixture of two Normals. This function initializes an apop_mixture_settings group
2 apop_model *mix = apop_model_mixture(apop_model_copy(apop_normal), apop_model_copy(apop_normal));
3 
4 //Add an apop_mle_settings group to specify the search strategy
5 Apop_settings_add_group(mix, apop_mle, .starting_pt=(double[]){.5, .5, 50, 5, 80, 5},
6  .step_size=3, .tolerance=1e-6);
7 
8 //The mix model now has apop_mle and apop_mixture settings groups attached. Modify them:
9 Apop_settings_set(mix, apop_mixture, find_weights, 'y'); //Search for optimal mixture weights
10 Apop_settings_set(mix, apop_mle, method, "NM simplex"); //Nelder-Mead simplex algorithm
11 apop_model *optimal_mix = apop_estimate(input_data, mix); //Everything is set up, so do the search.
  • If model==NULL, fails silently.
  • If model!=NULL but the given settings group is not found attached to the model, set model->error='s'.
#define Apop_settings_set (   model,
  type,
  setting,
  data 
)

Modifies a single element of a settings group to the given value.

For example,

1 //set up a mixture of two Normals. This function initializes an apop_mixture_settings group
2 apop_model *mix = apop_model_mixture(apop_model_copy(apop_normal), apop_model_copy(apop_normal));
3 
4 //Add an apop_mle_settings group to specify the search strategy
5 Apop_settings_add_group(mix, apop_mle, .starting_pt=(double[]){.5, .5, 50, 5, 80, 5},
6  .step_size=3, .tolerance=1e-6);
7 
8 //The mix model now has apop_mle and apop_mixture settings groups attached. Modify them:
9 Apop_settings_set(mix, apop_mixture, find_weights, 'y'); //Search for optimal mixture weights
10 Apop_settings_set(mix, apop_mle, method, "NM simplex"); //Nelder-Mead simplex algorithm
11 apop_model *optimal_mix = apop_estimate(input_data, mix); //Everything is set up, so do the search.
  • If model==NULL, fails silently.
  • If model!=NULL but the given settings group is not found attached to the model, set model->error='s'.
#define Apop_stopif (   test,
  onfail,
  level,
  ... 
)

Execute an action and print a message to the current FILE handle held by apop_opts.log_file (default: stderr).

Parameters
testThe expression that, if true, triggers the action.
onfailIf the assertion fails, do this. E.g., out->error='x'; return GSL_NAN. Notice that it is OK to include several lines of semicolon-separated code here, but if you have a lot to do, the most readable option may be goto outro, plus an appropriately-labeled section at the end of your function.
levelPrint the warning message only if apop_opts.verbose is greater than or equal to this. Zero usually works, but for minor infractions use one, or for more verbose debugging output use 2.
...The error message in printf form, plus any arguments to be inserted into the printf string. I'll provide the function name and a carriage return.

Some examples:

1 //the typical case, stopping function execution:
2 Apop_stopif(isnan(x), return NAN, 0, "x is NAN; failing");
3 
4 //Mark a flag, go to a cleanup step
5 Apop_stopif(x < 0, needs_cleanup=1; goto cleanup, 0, "x is %g; cleaning up and exiting.", x);
6 
7 //Print a diagnostic iff <tt>apop_opts.verbose>=1</tt> and continue
8 Apop_stopif(x < 0, , 1, "warning: x is %g.", x);
  • If apop_opts.stop_on_warning is nonzero and not 'v', then a failed test halts via abort(), even if the apop_opts.verbose level is set so that the warning message doesn't print to screen. Use this when running via debugger.
  • If apop_opts.stop_on_warning is 'v', then a failed test halts via abort() iff the verbosity level is high enough to print the error.
#define Apop_stopif (   test,
  onfail,
  level,
  ... 
)

Execute an action and print a message to the current FILE handle held by apop_opts.log_file (default: stderr).

Parameters
testThe expression that, if true, triggers the action.
onfailIf the assertion fails, do this. E.g., out->error='x'; return GSL_NAN. Notice that it is OK to include several lines of semicolon-separated code here, but if you have a lot to do, the most readable option may be goto outro, plus an appropriately-labeled section at the end of your function.
levelPrint the warning message only if apop_opts.verbose is greater than or equal to this. Zero usually works, but for minor infractions use one, or for more verbose debugging output use 2.
...The error message in printf form, plus any arguments to be inserted into the printf string. I'll provide the function name and a carriage return.

Some examples:

1 //the typical case, stopping function execution:
2 Apop_stopif(isnan(x), return NAN, 0, "x is NAN; failing");
3 
4 //Mark a flag, go to a cleanup step
5 Apop_stopif(x < 0, needs_cleanup=1; goto cleanup, 0, "x is %g; cleaning up and exiting.", x);
6 
7 //Print a diagnostic iff <tt>apop_opts.verbose>=1</tt> and continue
8 Apop_stopif(x < 0, , 1, "warning: x is %g.", x);
  • If apop_opts.stop_on_warning is nonzero and not 'v', then a failed test halts via abort(), even if the apop_opts.verbose level is set so that the warning message doesn't print to screen. Use this when running via debugger.
  • If apop_opts.stop_on_warning is 'v', then a failed test halts via abort() iff the verbosity level is high enough to print the error.
#define Apop_stopif (   test,
  onfail,
  level,
  ... 
)

Execute an action and print a message to the current FILE handle held by apop_opts.log_file (default: stderr).

Parameters
testThe expression that, if true, triggers the action.
onfailIf the assertion fails, do this. E.g., out->error='x'; return GSL_NAN. Notice that it is OK to include several lines of semicolon-separated code here, but if you have a lot to do, the most readable option may be goto outro, plus an appropriately-labeled section at the end of your function.
levelPrint the warning message only if apop_opts.verbose is greater than or equal to this. Zero usually works, but for minor infractions use one, or for more verbose debugging output use 2.
...The error message in printf form, plus any arguments to be inserted into the printf string. I'll provide the function name and a carriage return.

Some examples:

1 //the typical case, stopping function execution:
2 Apop_stopif(isnan(x), return NAN, 0, "x is NAN; failing");
3 
4 //Mark a flag, go to a cleanup step
5 Apop_stopif(x < 0, needs_cleanup=1; goto cleanup, 0, "x is %g; cleaning up and exiting.", x);
6 
7 //Print a diagnostic iff <tt>apop_opts.verbose>=1</tt> and continue
8 Apop_stopif(x < 0, , 1, "warning: x is %g.", x);
  • If apop_opts.stop_on_warning is nonzero and not 'v', then a failed test halts via abort(), even if the apop_opts.verbose level is set so that the warning message doesn't print to screen. Use this when running via debugger.
  • If apop_opts.stop_on_warning is 'v', then a failed test halts via abort() iff the verbosity level is high enough to print the error.
#define Apop_stopif (   test,
  onfail,
  level,
  ... 
)

Execute an action and print a message to the current FILE handle held by apop_opts.log_file (default: stderr).

Parameters
testThe expression that, if true, triggers the action.
onfailIf the assertion fails, do this. E.g., out->error='x'; return GSL_NAN. Notice that it is OK to include several lines of semicolon-separated code here, but if you have a lot to do, the most readable option may be goto outro, plus an appropriately-labeled section at the end of your function.
levelPrint the warning message only if apop_opts.verbose is greater than or equal to this. Zero usually works, but for minor infractions use one, or for more verbose debugging output use 2.
...The error message in printf form, plus any arguments to be inserted into the printf string. I'll provide the function name and a carriage return.

Some examples:

1 //the typical case, stopping function execution:
2 Apop_stopif(isnan(x), return NAN, 0, "x is NAN; failing");
3 
4 //Mark a flag, go to a cleanup step
5 Apop_stopif(x < 0, needs_cleanup=1; goto cleanup, 0, "x is %g; cleaning up and exiting.", x);
6 
7 //Print a diagnostic iff <tt>apop_opts.verbose>=1</tt> and continue
8 Apop_stopif(x < 0, , 1, "warning: x is %g.", x);
  • If apop_opts.stop_on_warning is nonzero and not 'v', then a failed test halts via abort(), even if the apop_opts.verbose level is set so that the warning message doesn't print to screen. Use this when running via debugger.
  • If apop_opts.stop_on_warning is 'v', then a failed test halts via abort() iff the verbosity level is high enough to print the error.
#define Apop_subm (   matrix_to_view,
  srow,
  scol,
  nrows,
  ncols 
)

Generate a view of a submatrix within a gsl_matrix. Like Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.

Parameters
data_to_viewThe root matrix
srowthe first row (in the root matrix) of the top of the submatrix
scolthe first column (in the root matrix) of the left edge of the submatrix
nrowsnumber of rows in the submatrix
ncolsnumber of columns in the submatrix
Returns
An automatically-allocated view of type gsl_matrix.
#define Apop_subm (   matrix_to_view,
  srow,
  scol,
  nrows,
  ncols 
)

Generate a view of a submatrix within a gsl_matrix. Like Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.

Parameters
data_to_viewThe root matrix
srowthe first row (in the root matrix) of the top of the submatrix
scolthe first column (in the root matrix) of the left edge of the submatrix
nrowsnumber of rows in the submatrix
ncolsnumber of columns in the submatrix
Returns
An automatically-allocated view of type gsl_matrix.
#define Apop_subm (   matrix_to_view,
  srow,
  scol,
  nrows,
  ncols 
)

Generate a view of a submatrix within a gsl_matrix. Like Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.

Parameters
data_to_viewThe root matrix
srowthe first row (in the root matrix) of the top of the submatrix
scolthe first column (in the root matrix) of the left edge of the submatrix
nrowsnumber of rows in the submatrix
ncolsnumber of columns in the submatrix
Returns
An automatically-allocated view of type gsl_matrix.
#define Apop_subm (   matrix_to_view,
  srow,
  scol,
  nrows,
  ncols 
)

Generate a view of a submatrix within a gsl_matrix. Like Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.

Parameters
data_to_viewThe root matrix
srowthe first row (in the root matrix) of the top of the submatrix
scolthe first column (in the root matrix) of the left edge of the submatrix
nrowsnumber of rows in the submatrix
ncolsnumber of columns in the submatrix
Returns
An automatically-allocated view of type gsl_matrix.

Function Documentation

apop_data * apop_anova ( char *  table,
char *  data,
char *  grouping1,
char *  grouping2 
)

This function produces a traditional one- or two-way ANOVA table. It works from data in an SQL table, using queries of a form like select data from table group by grouping1, grouping2.

Parameters
tableThe table to be queried. Anything that can go in an SQL from clause is OK, so this can be a plain table name or a temp table specification like (select ... ), with parens.
dataThe name of the column holding the count or other such data
grouping1The name of the first column by which to group data
grouping2If this is NULL, then the function will return a one-way ANOVA. Otherwise, the name of the second column by which to group data in a two-way ANOVA.
int apop_arms_draw ( double *  out,
gsl_rng *  r,
apop_model m 
)

Adaptive rejection Metropolis sampling, to make random draws from a univariate distribution.

The author, Wally Gilks, explains on http://www.amsta.leeds.ac.uk/~wally.gilks/adaptive.rejection/web_page/Welcome.html , that ``ARS works by constructing an envelope function of the log of the target density, which is then used in rejection sampling (see, for example, Ripley, 1987). Whenever a point is rejected by ARS, the envelope is updated to correspond more closely to the true log density, thereby reducing the chance of rejecting subsequent points. Fewer ARS rejection steps implies fewer point-evaluations of the log density.''

  • It accepts only functions with univariate inputs. I.e., it will put a single value into a 1x1 apop_data set, and then evaluate the log likelihood at that point. For multivariate situations, see apop_model_metropolis.
  • It is currently the default for the apop_draw function given a univariate model, so you can just call that if you prefer.
  • There are a great number of parameters, in the apop_arms_settings structure. The structure also holds a history of the points tested to date. That means that the system will be more accurate as more draws are made. It also means that if the parameters change, or you use apop_model_copy, you should call Apop_settings_rm_group(your_model, apop_arms) to clear the model of points that are not valid for a different situation.
gsl_vector * apop_array_to_vector ( double *  in,
int  size 
)

Copies a one-dimensional array to a gsl_vector. The input array is undisturbed.

Parameters
inAn array of doubles. (No default. Must not be NULL);
sizeHow long line is. If this is zero or omitted, I'll guess using the sizeof(line)/sizeof(line[0]) trick, which will work for most arrays allocated using double [] and won't work for those allocated using double *. (default = auto-guess)
Returns
A gsl_vector, allocated and filled with a copy of (not a pointer to) the input data.
  • If you send in a NULL vector, you get a NULL pointer in return. I warn you of this if apop_opts.verbosity >=1 .
apop_model * apop_beta_from_mean_var ( double  m,
double  v 
)

The Beta distribution is useful for modeling because it is bounded between zero and one, and can be either unimodal (if the variance is low) or bimodal (if the variance is high), and can have either a slant toward the bottom or top of the range (depending on the mean).

The distribution has two parameters, typically named $\alpha$ and $\beta$, which can be difficult to interpret. However, there is a one-to-one mapping between (alpha, beta) pairs and (mean, variance) pairs. Since we have good intuition about the meaning of means and variances, this function takes in a mean and variance, calculates alpha and beta behind the scenes, and returns the appropriate Beta distribution.

Parameters
mThe mean the Beta distribution should have. Notice that m is in [0,1].
vThe variance which the Beta distribution should have. It is in (0, 1/12), where (1/12) is the variance of a Uniform(0,1) distribution. Funny things happen with variance near 1/12 and mean far from 1/2.
Returns
Returns an apop_model produced by copying the apop_beta model and setting its parameters appropriately.
Exceptions
out->error=='r'Range error: mean is not within [0, 1].
apop_data * apop_bootstrap_cov ( apop_data data,
apop_model model,
gsl_rng *  rng,
int  iterations,
char  keep_boots,
char  ignore_nans,
apop_data **  boot_store 
)

Give me a data set and a model, and I'll give you the bootstrapped covariance matrix of the parameter estimates.

Parameters
dataThe data set. An apop_data set where each row is a single data point. (No default)
modelAn apop_model, whose estimate method will be used here. (No default)
iterationsHow many bootstrap draws should I make? (default: 1,000)
rngAn RNG that you have initialized, probably with apop_rng_alloc. (Default: an RNG from apop_rng_get_thread)
boot_storeIf not NULL, put the list of drawn parameter values here, with one parameter set per row. Sample use:
1 apop_data *boots;
2 apop_bootstrap_cov(data, model, .boot_store=&boots);
3 apop_data_print(boots);
The rows are packed via apop_data_pack, so use apop_data_unpack if needed. (Default: NULL)
ignore_nansIf 'y' and any of the elements in the estimation return NaN, then I will throw out that draw and try again. If 'n', then I will write that set of statistics to the list, NaN and all. I keep count of throw-aways; if there are more than iterations elements thrown out, then I throw an error and return with estimates using data I have so far. That is, I assume that NaNs are rare edge cases; if they are as common as good data, you might want to rethink how you are using the bootstrap mechanism. (Default: 'n')
Returns
An apop_data set whose matrix element is the estimated covariance matrix of the parameters.
Exceptions
out->error=='n'NULL input data.
out->error=='N'too many NaNs.

This example is a sort of demonstration of the Central Limit Theorem. The model is a simulation, where each call to the estimation routine produces the mean/std dev of a set of draws from a Uniform Distribution. Because the simulation takes no inputs, apop_bootstrap_cov simply re-runs the simulation and calculates a sequence of mean/std dev pairs, and reports the covariance of that generated data set.

#include <apop.h>
// Find the μ/σ of a set of 10 draws from a Uniform(-1, 1)
void sim_step(apop_data *none, apop_model *m){
int sub_draws = 20;
static apop_model *unif;
if (!unif) unif = apop_model_set_parameters(apop_uniform, -1, 1);
apop_data *draws= apop_model_draws(unif, sub_draws);
apop_data_set(m->parameters, 0, .val=apop_mean(Apop_cv(draws, 0)));
apop_data_set(m->parameters, 1, .val=sqrt(apop_var(Apop_cv(draws, 0))));
apop_data_add_names(m->parameters, 'r', "μ", "σ");
}
apop_model *clt_sim = &(apop_model){.name="CLT simulation", .vsize=2, .estimate=sim_step};
int main(){
apop_data *boots;
apop_data * boot_cov = apop_bootstrap_cov(NULL, clt_sim, .iterations=1000, .boot_store=&boots);
apop_data_print(boot_cov);
apop_data *means = Apop_c(boots, 0);
printf("\nStats via Normal model:\n");
apop_data *np = apop_estimate(means, apop_normal)->parameters;
np->more = NULL; //rm covariance of statistics.
//σ from the Normal should == sqrt(cov(μ_boot))
assert(fabs(sqrt(apop_data_get(boot_cov,0,0)) - apop_data_get(np, 1)) < 1e-4);
}
See also
apop_jackknife_cov
double apop_cdf ( apop_data d,
apop_model m 
)

Input a one-row data point/vector and a model; returns the area of the model's PDF beneath the given point.

By default, make random draws from the PDF and return the percentage of those draws beneath or equal to the given point. Many models have closed-form solutions that make no use of random draws.

See also apop_cdf_settings, which is the structure used to store draws already made (which means the second, third, ... calls to this function will take much less time than the first), the gsl_rng, and the number of draws to be made. These are handled without your involvement, but if you would like to change the number of draws from the default, add this group before calling apop_cdf :

1 Apop_model_add_group(your_model, apop_cdf, .draws=1e5, .rng=my_rng);
2 double cdf_value = apop_cdf(your_data_point, your_model);
  • Only the first row of the input apop_data set is used. Note that if you need to view row 20 of a data set as a one-row data set, use Apop_r.

Here are many examples using common, mostly symmetric distributions.

#include <apop.h>
int main(){
//Set up an apop_data set with only one number.
//Most of these functions will only look at the first data point encountered.
apop_data *onept = apop_data_falloc((1), 23);
apop_model *norm = apop_model_set_parameters(apop_normal, 23, 138.8);
double val = apop_cdf(onept, norm);
assert(fabs(val - 0.5) < 1e-4);
double tolerance = 1e-8;
//Macroizing the sample routine above:
#define model_val_cdf(model, value, cdf_result) { \
apop_data_set(onept, .val=(value)); \
assert(fabs((apop_cdf(onept, model))-(cdf_result))< tolerance); \
}
apop_model *uni = apop_model_set_parameters(apop_uniform, 20, 26);
model_val_cdf(uni, 0, 0);
model_val_cdf(uni, 20, 0);
model_val_cdf(uni, 21, 1./6);
model_val_cdf(uni, 23, 0.5);
model_val_cdf(uni, 25, 5./6);
model_val_cdf(uni, 26, 1);
model_val_cdf(uni, 260, 1);
//Improper uniform always returns 1/2.
model_val_cdf(apop_improper_uniform, 0, 0.5);
model_val_cdf(apop_improper_uniform, 228, 0.5);
model_val_cdf(apop_improper_uniform, INFINITY, 0.5);
apop_model *binom = apop_model_set_parameters(apop_binomial, 2001, 0.5);
model_val_cdf(binom, 0, 0);
model_val_cdf(binom, 1000, .5);
model_val_cdf(binom, 2000, 1);
apop_model *bernie = apop_model_set_parameters(apop_bernoulli, 0.75);
//p(0)=.25; p(1)=.75; that determines the CDF.
//Notice that the CDF's integral is over a closed interval.
model_val_cdf(bernie, -1, 0);
model_val_cdf(bernie, 0, 0.25);
model_val_cdf(bernie, 0.1, 0.25);
model_val_cdf(bernie, .99, 0.25);
model_val_cdf(bernie, 1, 1);
model_val_cdf(bernie, INFINITY, 1);
//alpha=beta -> symmetry
apop_model *beta = apop_model_set_parameters(apop_beta, 2, 2);
model_val_cdf(beta, -INFINITY, 0);
model_val_cdf(beta, 0.5, 0.5);
model_val_cdf(beta, INFINITY, 1);
//This beta distribution -> uniform
apop_model *beta_uni = apop_model_set_parameters(apop_beta, 1, 1);
model_val_cdf(beta_uni, 0, 0);
model_val_cdf(beta_uni, 1./6, 1./6);
model_val_cdf(beta_uni, 0.5, 0.5);
model_val_cdf(beta_uni, 1, 1);
beta_uni->cdf = NULL; //With no closed-form CDF; make random draws to estimate the CDF.
Apop_model_add_group(beta_uni, apop_cdf, .draws=1e6); //extra draws to improve accuracy, but we have to lower our tolerance anyway.
tolerance=1e-3;
model_val_cdf(beta_uni, 0, 0);
model_val_cdf(beta_uni, 1./6, 1./6);
model_val_cdf(beta_uni, 0.5, 0.5);
model_val_cdf(beta_uni, 1, 1);
//sum of three symmetric distributions: still symmetric.
apop_model *sum_of_three = apop_model_mixture(beta, apop_improper_uniform, beta_uni);
model_val_cdf(sum_of_three, 0.5, 0.5);
apop_data *threepts = apop_data_falloc((3,1), -1, 0, 1);
model_val_cdf(kernels, -5, 0);
model_val_cdf(kernels, 0, 0.5);
model_val_cdf(kernels, 10, 1);
}
void apop_crosstab_to_db ( apop_data in,
char *  tabname,
char *  row_col_name,
char *  col_col_name,
char *  data_col_name 
)

See apop_db_to_crosstab for the storyline; this is the complement, which takes a crosstab and writes its values to the database.

For example, I would take

c0c1
r023
r104

and do the following writes to the database:

1 insert into your_table values ('r0', 'c0', 2);
2 insert into your_table values ('r0', 'c1', 3);
3 insert into your_table values ('r1', 'c0', 3);
4 insert into your_table values ('r1', 'c1', 4);
  • If your data set does not have names (or not enough names), I will use the scheme above, filling in names of the form r0, r1, ... c0, c1, .... Text columns get their own names, t0, t1.
  • This function handles only the matrix and text.
void apop_data_add_named_elmt ( apop_data d,
char *  name,
double  val 
)

A convenience function to add a named element to a data set. Many of Apophenia's testing procedures use this to easily produce a column of named parameters. It is public as a convenience.

Parameters
dThe apop_data structure. Must not be NULL, but may be blank (as per allocation via apop_data_alloc ( ) ).
nameThe name to add
valthe value to add to the set.
  • I use the position of the last non-empty row name to know where to put the value. If there are two names in the data set, then I will put the new name in the third name slot and the data in the third slot in the vector. If you use this function from start to finish in building your list, then you'll be fine.
  • If the vector is too short (or NULL), I will call apop_vector_realloc internally to make space.
  • This fits well with the defaults for apop_data_get. An example:
1 apop_data *list = apop_data_alloc();
2 apop_data_add_named_elmt(list, "height", 165);
3 apop_data_add_named_elmt(list, "weight", 60);
4 
5 double height = apop_data_get(list, .rowname="height");
6 
7 //or
8 #define Lookup(dataset, key) apop_data_get(dataset, .rowname=#key)
9 height = Lookup(list, height);
apop_data * apop_data_add_page ( apop_data dataset,
apop_data newpage,
const char *  title 
)

Add a page to an apop_data set. It gets a name so you can find it later.

Parameters
datasetThe input data set, to which a page will be added.
newpageThe page to append
titleThe name of the new page.
Returns
The new page. I post a warning if I am appending or appending to a NULL data set and apop_opts.verbose >=1 .
  • See Pages for further notes.
apop_data * apop_data_alloc ( const size_t  size1,
const size_t  size2,
const int  size3 
)

Allocate an apop_data structure.

  • The typical case is three arguments, like apop_data_alloc(2,3,4): vector size, matrix rows, matrix cols. If the first argument is zero, you get a NULL vector.
  • Two arguments, apop_data_alloc(2,3), would allocate just a matrix, leaving the vector NULL.
  • One argument, apop_data_alloc(2), would allocate just a vector, leaving the matrix NULL.
  • Zero arguments, apop_data_alloc(), will produce a basically blank set, with out->matrix and out->vector set to NULL.

For allocating the text part, see apop_text_alloc.

The weights vector is set to NULL. If you need it, allocate it via

1 d->weights = gsl_vector_alloc(row_ct);
Returns
The apop_data structure, allocated and ready to be populated with data.
Exceptions
out->error=='a'Allocation error. The matrix, vector, or names couldn't be malloced, which probably means that you requested a very large data set.
  • An apop_data struct, by itself, is about 72 bytes. If I can't allocate that much memory, I return NULL. But if even this much fails, your computer may be on fire and you should go put it out.
See also
apop_data_calloc
apop_data * apop_data_calloc ( const size_t  size1,
const size_t  size2,
const int  size3 
)

Allocate a apop_data structure, to be filled with data; set everything in the allocated portion to zero. See apop_data_alloc for details.

Returns
The apop_data structure, allocated and zeroed out.
Exceptions
out->error=='a'allocation error; probably out of memory.
See also
apop_data_alloc
apop_data * apop_data_copy ( const apop_data in)

Copy one apop_data structure to another. That is, all data is duplicated.

Basically a front-end for apop_data_memcpy for those who prefer this sort of syntax.

If the data set has a more pointer, that will be followed and subsequent pages copied as well.

Parameters
inthe input data
Returns
a structure that this function will allocate and fill. If input is NULL, then this will be NULL.
Exceptions
out.error='a'Allocation error.
out.error='c'Cyclic link: D->more == D (may be later in the chain, e.g., D->more->more = D->more) You'll have only a partial copy.
out.error='d'Dimension error; should never happen.
out.error='p'Missing part error; should never happen.
  • If the input data set has an error, then I will copy it anyway, including the error flag (which might be overwritten). I print a warning if the verbosity level is >=1.
apop_data * apop_data_correlation ( const apop_data in)

Returns the matrix of correlation coefficients $(\sigma^2_{xy}/(\sigma_x\sigma_y))$ relating each column with each other.

Parameters
inA data matrix: rows are observations, columns are variables. If you give me a weights vector, I'll use it.
Returns
Returns the square variance/covariance matrix with dimensions equal to the number of input columns.
Exceptions
out->error='a'Allocation error.
apop_data * apop_data_covariance ( const apop_data in)

Returns the sample variance/covariance matrix relating each column of the matrix to each other column.

Parameters
inAn apop_data set. If the weights vector is set, I'll take it into account.
  • This is the sample covariance—dividing by $n-1$, not $n$. If you need the population variance, use
    1 apop_data *popcov = apop_data_covariance(indata);
    2 int size=indata->matrix->size1;
    3 gsl_matrix_scale(popcov->matrix, size/(size-1.));
Returns
Returns an apop_data set the variance/covariance matrix.
Exceptions
out->error='a'Allocation error.
char apop_data_free_base ( apop_data freeme)

Free the elements of the given apop_data set and then the apop_data set itself. Intended to be used by apop_data_free, a macro that calls this to free elements, then sets the value to NULL.

  • apop_data_free is a macro that calls this function and, on success, sets the input pointer to NULL. For typical cases, that's slightly more useful than this function.
Exceptions
freeme.error='c'Circular linking is against the rules. If freeme->more == freeme, then I set freeme.error='c' and return. If you send in a structure like A -> B -> B, then both data sets A and B will be marked.
Returns
0 on OK, 'c' on error.
double apop_data_get ( const apop_data data,
size_t  row,
int  col,
const char *  rowname,
const char *  colname,
const char *  page 
)

Returns the data element at the given point.

In case of error (probably that you asked for a data point out of bounds), returns NAN. See the set/get page for details and examples.

Parameters
dataThe data set. Must not be NULL.
rowThe row number of the desired element. If rowname==NULL, default is zero.
colThe column number of the desired element. -1 indicates the vector. If colname==NULL, default is zero if the ->matrix element is not NULL and -1 if the ->matrix element is NULL and the ->vector element is not.
rownameThe row name of the desired element. If NULL, use the row number.
colnameThe column name of the desired element. If NULL, use the column number.
pageThe case-insensitive name of the page on which the element is found. If NULL, use first page.
Returns
The value at the given location.
apop_data * apop_data_get_factor_names ( apop_data data,
int  col,
char  type 
)

Factor names are stored in an auxiliary table with a name like "<categories for your_var>". Producing this name is annoying (and prevents us from eventually making it human-language independent), so use this function to get the list of factor names.

Parameters
dataThe data set. (No default, must not be NULL)
colThe column in the main data set whose name I'll use to check for the factor name list. Vector==-1. (default=0)
typeIf you are referring to a text column, use 't'. (default='d')
Returns
A pointer to the page in the data set with the given factor names.
apop_data * apop_data_get_page ( const apop_data data,
const char *  title,
const char  match 
)

It's good form to get a page from your data set by name, because you may not know the order for the pages, and the stepping through makes for dull code anyway (apop_data *page = dataset; while (page->more) page= page->more;).

Parameters
dataThe apop_data set to use. No default; if NULL, gives a warning if apop_opts.verbose >=1 and returns NULL.
titleThe name of the page to retrieve. Default="<Info>", which is the name of the page of additional estimation information returned by estimation routines (log likelihood, status, AIC, BIC, confidence intervals, ...).
matchIf 'c', case-insensitive match (via strcasecmp); if 'e', exact match, if 'r' regular expression substring search (via apop_regex). Default='c'.
Returns
The page whose title matches what you gave me. If I don't find a match, return NULL.
apop_data * apop_data_listwise_delete ( apop_data d,
char  inplace 
)

If there is an NaN anywhere in the row of data (including the matrix, the vector, the weights, and the text) then delete the row from the data set.

  • If every row has a NaN, then this returns NULL.
  • If apop_opts.nan_string is not NULL, then I will make case-insensitive comparisons to the text elements to check for bad data as well.
  • If inplace = 'y', then I'll free each element of the input data set and refill it with the pruned elements. I'll still take up (up to) twice the size of the data set in memory during the function. If every row has a NaN, then your apop_data set will end up with NULL vector, matrix, .... if inplace = 'n', then the original data set is left where it was, though internal elements may be moved.
  • I only look at the first page of data (i.e. the more element is ignored).
  • Listwise deletion is often not a statistically valid means of dealing with missing data. It is typically better to impute the data (preferably multiple times). See apop_ml_impute for a less-invalid means, or Tea for survey imputation for heavy-duty survey editing and imputation.
  • This function uses the Designated initializers syntax for inputs.
Parameters
dThe data, with NaNs
inplaceIf 'y', clear out the pointer-to-apop_data that you sent in and refill with the pruned data. If 'n', leave the set alone and return a new data set. Default='n'.
Returns
A (potentially shorter) copy of the data set, without NaNs. If inplace=='y', a pointer to the input, which was shortened in place. If the entire data set is cleared out, then this will be NULL.
See also
apop_data_rm_rows
void apop_data_memcpy ( apop_data out,
const apop_data in 
)

Copy one apop_data structure to another.

This function does not allocate the output structure or the vector, matrix, text, or weights elements—I assume you have already done this and got the dimensions right. I will assert that there is at least enough room in the destination for your data, and fail if the copy would write more elements than there are bins.

  • If you want space allocated or are unsure about dimensions, use apop_data_copy.
  • If both in and out have a more pointer, also copy subsequent page(s).
  • You can use the subsetting macros, Apop_r, Apop_rs, Apop_c, and so on, to copy within a data set:
1 //Copy the contents of row i of mydata to row j.
2 apop_data *fromrow = Apop_r(mydata, i);
3 apop_data *torow = Apop_r(mydata, j);
4 apop_data_memcpy(torow, fromrow);
5 
6 // or just
7 apop_data_memcpy(Apop_r(mydata, i), Apop_r(mydata, j));
Parameters
outA structure that this function will fill. Must be preallocated with the appropriate sizes.
inThe input data.
Exceptions
out.error='d'Dimension error.
out.error='p'Part missing; e.g., in->matrix exists but out->matrix doesn't.
gsl_vector * apop_data_pack ( const apop_data in,
gsl_vector *  out,
char  more_pages,
char  use_info_pages 
)

This function takes in an apop_data set and writes it as a single column of numbers, outputting a gsl_vector. It is valid to use the out_vector->data element as an array of doubles of size out_vector->data->size (i.e. its stride==1).

The complement is apop_data_unpack. I.e.,

1 apop_data_unpack(apop_data_pack(in_data), data_copy)

will return the original data set (stripped of text and names).

Parameters
inan apop_data set. No default; if NULL, return NULL.
outIf this is not NULL, then put the output here. The dimensions must match exactly. If NULL, then allocate a new data set. Default = NULL.
more_pagesIf 'y', then follow the ->more pointer to fill subsequent pages; else fill only the first page. Informational pages will still be ignored, unless you set .use_info_pages='y' as well. Default = 'y'.
use_info_pagesPages in XML-style brackets, such as <Covariance> will be ignored unless you set .use_info_pages='y'. Be sure that this is set to the same thing when you both pack and unpack. Default: 'n'.
Returns
A gsl_vector with the vector data (if any), then each row of data (if any), then the weights (if any), then the same for subsequent pages (if any && .more_pages=='y'). If out is not NULL, then this is out.
Exceptions
NULLIf you give me a vector as input, and its size is not correct, returns NULL.
apop_data * apop_data_pmf_compress ( apop_data in)

Say that you have added a long list of observations to a single apop_data set, meaning that each row has weight one. There are a huge number of duplicates, perhaps because there are a handful of types that keep repeating:

Vector valueText nameWeights
12Dozen1
1Single1
2Pair1
2Pair1
1Single1
1Single1
2Pair1
2Pair1

Use this function to reduce this to a set of distinct values, with their weights adjusted accordingly:

Vector valueText nameWeights
12Dozen1
1Single3
2Pair4
Parameters
inAn apop_data set that may have duplicate rows. As above, the data may be in text and/or numeric formats.
Returns
Your input is changed in place, via apop_data_rm_rows, so use apop_data_copy before calling this function if you need to retain the original format. For your convenience, this function returns a pointer to your original data, which has now been pruned. If there is a weights vector, I will add those weights together as duplicates are merged. If there is no weights vector, I will create one, which is initially set to one for all values, and then aggregated as above.
void apop_data_print ( const apop_data data,
Output_declares   
)

Print an apop_data set to a file, the database, or the screen, as determined by the .output_type.

apop_data * apop_data_prune_columns_base ( apop_data d,
char **  colnames 
)

Keep only the columns of a data set that you name. This is the function called internally by the apop_data_prune_columns macro. In most cases, you'll want to use that macro. An example of the two uses demonstrating the difference:

1 apop_data_prune_columns(d, "mean", "median");
2 
3 char *list[] = {"mean", "median", NULL};
4 apop_data_prune_columns_base(d, list);
Parameters
dThe data set to prune.
colnamesA NULL-terminated list of names to retain.
Returns
A pointer to the input data set, now pruned.
See also
apop_data_rm_columns
double * apop_data_ptr ( apop_data data,
int  row,
int  col,
const char *  rowname,
const char *  colname,
const char *  page 
)

Get a pointer to an element of an apop_data set.

  • If a NULL vector or matrix (as the case may be), or the row/column you requested is outside bounds, return NULL.
  • See the set/get page for details.
Parameters
dataThe data set. Must not be NULL.
rowThe row number of the desired element. If rowname==NULL, default is zero.
colThe column number of the desired element. -1 indicates the vector. If colname==NULL, default is zero.
rownameThe row name of the desired element. If NULL, use the row number.
colnameThe column name of the desired element. If NULL, use the column number.
pageThe case-insensitive name of the page on which the element is found. If NULL, use first page.
Returns
A pointer to the element.
apop_data * apop_data_rank_compress ( apop_data in,
int  min_bins 
)

One often finds data where the column indicates the value of the data point. There may be two columns, and a mark in the first indicates a miss while a mark in the second is a hit. Or say that we have the following list of observations:

1 2 3 3 2 1 1 2 1 1 2 1 1

Then we could write this as:

1 0 1 2 3
2 ----------
3 0 6 4 2

because there are six 1s observed, four 2s observed, and two 3s observed. We call this rank format, because 1 (or zero) is typically the most common, 2 is second most common, et cetera.

This function takes in a list of observations, and aggregates them into a single row in rank format.

Parameters
inThe input apop_data set. If NULL, return NULL.
min_binsIf this is omitted, the number of bins is simply the largest number found. So if there are bins {0, 1, 2} and your data set happens to consist of 0 0 1 1 0, then I won't know to generate results with three bins where the last bin has a count of zero. Set .min_bins=2 to ensure that bin is included.
/* A round trip: generate Zipf-distributed draws, summarize them to a single list of
rankings, then expand the rankings to a list of single entries. The sorted list at the end
of this should be identical to the (sorted) original list. */
#include <apop.h>
int main(){
gsl_rng *r = apop_rng_alloc(2342);
int i, length = 1e4;
apop_model *a_zipf = apop_model_set_parameters(apop_zipf, 3.2);
apop_data *draws = apop_data_alloc(length);
for (i=0; i< length; i++)
apop_draw(apop_data_ptr(draws, i, -1), r, a_zipf);
apop_data *by_rankings = apop_data_rank_compress(draws);
//The first row of the matrix is suitable for plotting.
//apop_data_show(by_rankings);
assert(apop_matrix_sum(by_rankings->matrix) == length);
apop_data *re_expanded = apop_data_rank_expand(by_rankings);
gsl_sort_vector(draws->vector);
gsl_sort_vector(re_expanded->vector);
assert(apop_vector_distance(draws->vector, re_expanded->vector) < 1e-5);
}
apop_data * apop_data_rank_expand ( apop_data in)

The complement to this is apop_data_rank_compress; see that function's documentation for the story and an example.

This function takes in a data set where the zeroth column includes the count(s) of times that zero was observed, the first gives the count(s) of times that one was observed, et cetera. It outputs a data set whose vector element includes a list that has exactly the given frequency of zeros, ones, et cetera.

void apop_data_rm_columns ( apop_data d,
int *  drop 
)

Remove the columns of the apop_data set corresponding to a nonzero value in the drop vector.

  • The returned data structure looks like it was modified in place, but the data matrix and the names are duplicated before being pared down, so if your data is taking up more than half of your memory, this may not work.
Parameters
dThe apop_data structure to be pared down.
dropAn array of ints. If use[7]==1, then column seven will be cut from the output. A reminder: calloc(in->size2 , sizeof(int)) will fill your array with zeros on allocation, and memset(use, 1, in->size2 * sizeof(int)) will quickly fill an array of ints with nonzero values. apop_data_rm_rows
apop_data * apop_data_rm_page ( apop_data data,
const char *  title,
const char  free_p 
)

Remove the first page from an apop_data set that matches a given name.

Parameters
dataThe input data set, from which a page will be removed. No default. If NULL, maybe print a warning (see below).
titleThe case-insensitive name of the page to remove. Default: "<Info>"
free_pIf 'y', then apop_data_free the page. Default: 'y'.
Returns
If not freed, a pointer to the apop_data page that I just pulled out. Thus, you can use this to pull a single page from a data set. I set that page's more pointer to NULL, to minimize any confusion about more-than-linear linked list topologies. If free_p=='y' (the default) or the page is not found, return NULL.
  • I don't check the first page, so there's no concern that the head of your list of pages will move. Again, the intent of the ->more pointer in the apop_data set is not to fully implement a linked list, but primarily to allow you to staple auxiliary information to a main data set.
  • If I don't find the page you want, I return NULL, and maybe print a warning; see below.
  • For the two above cases where a warning may be printed, if the page is to be returned and apop_opts.verbose >= 1 , print a warning. If the page is to be freed and apop_opts.verbose >= 2 , print a warning.
  • The remaining more pointers in the apop_data set are adjusted accordingly.
apop_data * apop_data_rm_rows ( apop_data in,
int *  drop,
apop_fn_ir  do_drop,
void *  drop_parameter 
)

Remove the rows set to one in the drop vector or for which the do_drop function returns one.

Parameters
inthe apop_data structure to be pared down
dropa vector with as many elements as the max of the vector, matrix, or text parts of in, with a one marking those rows to be removed.
do_dropA function that returns one for rows to drop and zero for rows to not drop. A sample function:
1 int your_drop_function(apop_data *onerow, void *extra_param){
2  return gsl_isnan(apop_data_get(onerow)) ||
3  !strcmp(onerow->text[0][0], "Uninteresting data point");
4 }
apop_data_rm_rows will use Apop_r to get a subview of the input data set of height one, and send that subview to this function (and since arguments typically default to zero, you don't have to write out things like apop_data_get (onerow, .row=0, .col=0), which can help to keep things readable).
drop_parameterIf your do_drop function requires additional input, put it here and it will be passed through.
Returns
Returns a pointer to the input data set, now pruned.
  • If all the rows are to be removed, then you will wind up with the same apop_data set, with NULL vector, matrix, weight, and text. Therefore, you may wish to check for NULL elements after use. I remove rownames, but leave the other names, in case you want to add new data rows.
  • The typical use is to provide only a list or only a function. If both are NULL, I return without doing anything, and print a warning if apop_opts.verbose >=2. If you provide both, I will drop the row if either the vector has a one in that row's position, or if the function returns a nonzero value.
  • This function uses the Designated initializers syntax for inputs.
    See also
    apop_data_listwise_delete, apop_data_rm_columns
int apop_data_set ( apop_data data,
size_t  row,
int  col,
const double  val,
const char *  colname,
const char *  rowname,
const char *  page 
)

Set a data element. See the set/get page for details and examples.

Returns
0=OK, -1=error: couldn't find row/column name, or you asked for a location outside the vector/matrix bounds.
  • The error codes for out-of-bounds errors are thread-safe iff you are have a C11-compliant compiler (thanks to the _Thread_local keyword) or a version of GCC with the __thread extension enabled.
  • Set weights via gsl_vector_set(your_data->weights, row, val);.
  • Set text elements via apop_text_set.
Parameters
dataThe data set. Must not be NULL.
rowThe row number of the desired element. If rowname==NULL, default is zero.
colThe column number of the desired element. -1 indicates the vector. If colname==NULL, default is zero.
rownameThe row name of the desired element. If NULL, use the row number.
colnameThe column name of the desired element. If NULL, use the column number.
pageThe case-insensitive name of the page on which the element is found. If NULL, use first page.
valThe value to give the point.
apop_data * apop_data_sort ( apop_data data,
apop_data sort_order,
char  asc,
char  inplace,
double *  col_order 
)

Sort an apop_data set on an arbitrary sequence of columns.

The sort_order set is a one-row data set that should look like the data set being sorted. The easiest way to generate it is to use Apop_r to pull one row of the table, then copy and fill it. For each column you want used in the sort, assign a ranking giving whether the column should be sorted first, second, .... Columns you don't want used in the sorting should be set to NAN. Ties are broken by the earlier element in the default order (see below).

E.g., to sort by the last column of a five-column matrix first, then the next-to-last column, then the next-to-next-to-last, then by the first text column, then by the second text column:

1 apop_data *sort_order = apop_data_copy(Apop_r(data, 0));
2 sort_order->vector = NULL; //so it will be skipped.
3 Apop_data_fill(sort_order, NAN, NAN, 3, 2, 1);
4 apop_text_set(sort_order, 0, 0, "4");
5 apop_text_set(sort_order, 0, 1, "5");
6 apop_data_sort(data, sort_order);

To determine which columns are sorted at which step, I use only comparisons, not the actual numeric values. For example, (1, 2, 3) and (-1.32, 0, 27) work identically. For text, I use atof to convert the your text to a number, as in the example above that set text values of "4" and "5". A blank string, NaN numeric value, or NULL element in the apop_data set means that column will not be sorted.

  • Strings are sorted case-insensitively, using strcasecmp. [exercise for the reader: modify the source to use Glib's locale-correct string sorting.]
  • The setup generates a lexicographic sort using the columns you specify. If you would like a different sort order, such as Euclidian distance to the origin, you can generate a new column expressing your preferred metric, and then sorting on that. See the example below.
Parameters
dataThe data set to be sorted. If NULL, this function is a no-op that returns NULL.
sort_orderAn apop_data set describing the order in which columns are used for sorting, as above. If NULL, then sort by the vector, then each matrix column, then text, then weights, then row names.
inplaceIf 'n', make a copy, else sort in place. (default: 'y').
ascIf 'a', ascending; if 'd', descending. This is applied to all columns; column-by-column application is to do. (default: 'a').
col_orderFor internal use only. In your call, it should be NULL; you can leave this off your function call entirely and the Designated initializers syntax will takes care of it for you.
Returns
A pointer to the sorted data set. If inplace=='y' (the default), then this is the same as the input set.

A few examples:

#ifdef Datadir
#define DATADIR Datadir
#else
#define DATADIR "."
#endif
#include <apop.h>
#include <unistd.h>
#ifdef Testing
#include "sort_tests.c" //For Apophenia's test suite, some tedious checks that the sorts worked
#endif
//get_distance is for the sort-by-Euclidian distance example below.
double get_distance(gsl_vector *v) {return apop_vector_distance(v);}
int main(){
apop_text_to_db( DATADIR "/" "amash_vote_analysis.csv" );
apop_data *d = apop_query_to_mixed_data("mntmtm", "select 1,id,party,contribs/1000.0,vote,ideology from amash_vote_analysis " );
//use the default order of columns for sorting
apop_data *sorted = apop_data_sort(d, .inplace='n');
#ifndef Testing
apop_data_print(sorted);
#else
check_sorting1(sorted);
#endif
//set up a specific column order
perm->vector = NULL;
apop_data_fill(perm, 5, 3, 4);
apop_text_set(perm, 0, 0, "2");
apop_text_set(perm, 0, 1, "1");
apop_data_sort(d, perm);
#ifndef Testing
#else
check_sorting2(d);
#endif
//sort a list of names
apop_data_add_names(blank, 'r', "C", "E", "A");
assert(*blank->names->row[0] == 'A');
assert(*blank->names->row[1] == 'C');
assert(*blank->names->row[2] == 'E');
//take each row of the matrix as a vector; store the Euclidian distance to the origin in the vector;
//sort in descending order.
apop_data *rowvectors = apop_text_to_data( DATADIR "/" "test_data" );
apop_map(rowvectors, .fn_v=get_distance, .part='r', .inplace='y');
apop_data *arow = apop_data_copy(Apop_r(rowvectors, 0));
arow->matrix=NULL; //sort only by the distance vector
apop_data_sort(rowvectors, arow, .asc='d');
#ifndef Testing
apop_data_print(rowvectors);
#else
double prev = INFINITY;
for (int i=0; i< rowvectors->vector->size; i++){
double this = apop_data_get(rowvectors, i, -1);
assert(this < prev);
prev = this;
}
#endif
}
apop_data ** apop_data_split ( apop_data in,
int  splitpoint,
char  r_or_c 
)

Split one input apop_data structure into two.

For the opposite operation, see apop_data_stack.

Parameters
inThe apop_data structure to split
splitpointThe index of what will be the first row/column of the second data set. E.g., if this is -1 and r_or_c=='c', then the whole data set will be in the second data set; if this is the length of the matrix then the whole data set will be in the first data set. Another way to put it is that for values between zero and the matrix's size, splitpoint will equal the number of rows/columns in the first matrix.
r_or_cIf this is 'r' or 'R', then put some rows in the first data set and some in the second; of 'c' or 'C', split columns into first and second data sets.
Returns
An array of two apop_data sets. If one is empty then a NULL pointer will be returned in that position. For example, for a data set of 50 rows, apop_data **out = apop_data_split(data, 100, 'r') sets out[0] = apop_data_copy(data) and out[1] = NULL.
  • When splitting at a row, the text is also split.
  • The more pointer is ignored.
  • The apop_data->vector is taken to be the -1st element of the matrix.
  • Weights will be preserved. If splitting by rows, then the top and bottom parts of the weights vector will be assigned to the top and bottom parts of the main data set. If splitting by columns, identical copies of the weights vector will be assigned to both parts.
  • Data is copied, so you may want to call apop_data_free(in) after this.
apop_data * apop_data_stack ( apop_data m1,
apop_data m2,
char  posn,
char  inplace 
)

Put the first data set either on top of or to the left of the second data set.

For the opposite operation, see apop_data_split.

Parameters
m1the upper/rightmost data set (default = NULL)
m2the second data set (default = NULL)
posnIf 'r', stack rows of m1 above rows of m2
if 'c', stack columns of m1 to left of m2's
(default = 'r')
inplaceIf 'y', use apop_matrix_realloc and apop_vector_realloc to modify m1 in place. Otherwise, allocate a new apop_data set, leaving m1 undisturbed. (default='n')
Returns
The stacked data, either in a new apop_data set or m1
Exceptions
out->error=='a'Allocation error.
out->error=='d'Dimension error; couldn't make a complete copy.
  • The function returns a new data set, meaning that until you apop_data_free() the original data sets, you will be taking up twice as much memory.
  • If m1 or m2 are NULL, returns a copy of the other element, and if both are NULL, returns NULL. If m2 is NULL and inplace is 'y', returns the original m1 pointer unmodified.
  • Text is handled as you'd expect: If 'r', one set of text is stacked on top of the other [number of columns must match]; if 'c', one set of text is set next to the other [number of rows must match].
  • more is ignored.
  • If stacking rows on rows, the output vector is the input vectors stacked accordingly. If stacking columns by columns, the output vector is just a copy of the vector of m1 and m2->vector doesn't appear in the output at all.
  • The same rules for dealing with the vector(s) hold for the vector(s) of weights.
  • Names are a copy of the names for m1, with the names for m2 appended to the row or column list, as appropriate.
  • This function uses the Designated initializers syntax for inputs.
apop_data * apop_data_summarize ( apop_data indata)

Put summary information about the columns of a table (mean, std dev, variance, min, median, max) in a table.

Parameters
indataThe table to be summarized. An apop_data structure. May have a weights element.
Returns
An apop_data structure with one row for each column in the original table, and a column for each summary statistic.
Exceptions
out->error='a'Allocation error.
  • This function gives more columns than you probably want; use apop_data_prune_columns to pick the ones you want to see.
  • See apop_data_prune_columns for an example.
apop_data * apop_data_to_bins ( apop_data const *  indata,
apop_data const *  binspec,
int  bin_count,
char  close_top_bin 
)

Create a histogram from data by putting data into bins of fixed width. Your input apop_data set may be multidimensional, and may include both vector and matrix parts, and the bins output will have corresponding dimension.

Parameters
indataThe input data that will be binned, one observation per row. This is copied and the copy will be modified. (No default)
binspecThis is an apop_data set with the same number of columns as indata. If you want a fixed size for the bins, then the first row of the bin spec is the bin width for each column. This allows you to specify a width for each dimension, or specify the same size for all with something like:
1 apop_data *binspec = apop_data_copy(Apop_r(indata, 0));
2 gsl_matrix_set_all(binspec->matrix, 10); //bins of size 10 for all dim.s
3 apop_data_to_bins(indata, binspec);
The presumption is that the first bin starts at zero in all cases. You can add a second row to the spec to give the offset for each dimension. (default: NULL)
bin_countIf you don't provide a bin spec, I'll provide this many evenly-sized bins to cover the data set. (Default: $\sqrt{N}$)
close_top_binNormally, a bin covers the range from the point equal to its minimum to points strictly less than the minimum plus the width. if 'y', then the top bin includes points less than or equal to the upper bound. This solves the problem of displaying histograms where the top bin is just one point. (default: 'y' if binspec==NULL, else 'n')
Returns
A pointer to an apop_data set with the same dimension as your input data. Each cell is an integer giving the bin number into which the cell falls.
  • If no binspec and no binlist, then a grid with offset equal to the min of the column, and bin size such that it takes $\sqrt{N}$ bins to cover the range to the max element.
  • The text segment is not binned. The more pointer, if any, is not followed.
  • If there are weights, they are copied to the output via apop_vector_copy.
  • Given NULL input, return NULL output. Print a warning if apop_opts.verbose >= 2.

Iff you didn't give me a binspec, then I attach one to the output set as a page named <binspec>. This means that you can snap a second data set to the same grid using

1 apop_data_to_bins(first_set, NULL);
2 apop_data_to_bins(second_set, apop_data_get_page(first_set, "<binspec>"));
  • The output has exactly as many rows as the input. Because many rows will be identical after binning, it may be fruitful to run it through apop_data_pmf_compress to produce a short list with one total weight per bin.

Here is a sample program highlighting apop_data_to_bins and apop_data_pmf_compress .

#define _GNU_SOURCE
#include <apop.h>
#define printdata(dataset) \
printf("\n-----------\n\n"); \
apop_data_print(dataset);
int main(){
apop_data_fill(d, 1, 2, 3, 3, 1, 2);
apop_text_fill(d, "A", "A", "A", "A", "A", "B");
asprintf(&d->names->title, "Original data set");
printdata(d);
//binned, where bin ends are equidistant but not necessarily in the data
asprintf(&binned->names->title, "Post binning");
printdata(binned);
assert(fabs(//equal distance between bins
(apop_data_get(binned, 1) - apop_data_get(binned, 0))
- (apop_data_get(binned, 2) - apop_data_get(binned, 1))) < 1e-5);
//compressed, where the data is as in the original, but weights
//are redone to accommodate repeated observations.
asprintf(&d->names->title, "Post compression");
printdata(d);
assert(apop_sum(d->weights)==6);
apop_data *firstrow = Apop_r(d, 0); //1A
assert(fabs(apop_p(firstrow, d_as_pmf) - 2./6 < 1e-5));
}
apop_data * apop_data_to_dummies ( apop_data d,
int  col,
char  type,
int  keep_first,
char  append,
char  remove 
)

A utility to make a matrix of dummy variables. You give me a single vector that lists the category number for each item, and I'll produce a matrix with a single one in each row in the column specified.

After that, you have to decide what to do with the new matrix and the original data column.

  • You can manually join the dummy data set with your main data, e.g.:
    1 apop_data *dummies = apop_data_to_dummies(main_regression_vars, .col=8, .type='t');
    2 apop_data_stack(main_regression_vars, dummies, 'c', .inplace='y');
  • The .remove='y' option specifies that I should use apop_data_rm_columns to remove the column used to generate the dummies. Implemented only for type=='d'.
  • By specifying .append='y' or .append='e' I will run the above two lines for you. Your apop_data pointer will not change, but its matrix element will be reallocated (via apop_data_stack).
  • By specifying .append='i', I will place the matrix of dummies in place, immediately after the data column you had specified. You will probably use this with .remove='y' to replace the single column with the new set of dummy columns. Bear in mind that if there are two or more dummy columns, adding columns will change subsequent column numbers; use apop_name_find to find columns instead of giving an explicit column number.
  • If .append='i' and you asked for a text column, I will append to the end of the table, which is equivalent to append='e'.
Parameters
dThe data set with the column to be dummified (No default.)
colThe column number to be transformed; -1==vector (default = 0)
type'd'==data column, 't'==text column. (default = 't')
keep_firstIf 'n', return a matrix where each row has a one in the (column specified minus one). That is, the zeroth category is dropped, the first category has an entry in column zero, et cetera. If you don't know why this is useful, then this is what you need. If you know what you're doing and need something special, set this to 'y' and the first category won't be dropped. (default = 'n')
appendIf 'e' or 'y', append the dummy grid to the end of the original data matrix. If 'i', insert in place, immediately after the original data column. (default = 'n')
removeIf 'y', remove the original data or text column. (default = 'n')
Returns
An apop_data set whose matrix element is the one-zero matrix of dummies. If you used .append, then this is the main matrix. Also, I add a page named "\<categories for your_var\>" giving a reference table of names and column numbers (where your_var is the appropriate column heading).
Exceptions
out->error=='a'allocation error
out->error=='d'dimension error
See also
apop_data_to_factors
apop_data * apop_data_to_factors ( apop_data data,
char  intype,
int  incol,
int  outcol 
)

Convert a column of text or numbers into a column of numeric factors, which you can use for a multinomial probit/logit, for example.

If you don't run this on your data first, apop_probit and apop_logit default to running it on the vector or (if no vector) zeroth column of the matrix of the input apop_data set, because those models need a list of the unique values of the dependent variable.

Parameters
dataThe data set to be modified in place. (No default. If NULL, returns NULL and a warning)
intypeIf 't', then incol refers to text, if 'd', refers to the vector or matrix. (default = 't')
incolThe column in the text that will be converted. -1 is the vector. (default = 0)
outcolThe column in the data set where the numeric factors will be written (-1 means the vector). (default = 0)

For example:

1 apop_data *d = apop_query_to_mixed_data("mmt", "select 0, year, color from data");
2 apop_data_to_factors(d);

Notice that the query pulled a column of zeros for the sake of saving room for the factors. It reads column zero of the text, and writes it to column zero of the matrix.

Another example:

1 apop_data *d = apop_query_to_data("mmt", "select type, year from data");
2 apop_data_to_factors(d, .intype='d', .incol=0, .outcol=0);

Here, the type column is converted to sequential integer factors and those factors overwrite the original data. Since a reference table is added as a second page of the apop_data set, you can recover the original values as needed.

Returns
A table of the factors used in the code. This is an apop_data set with only one column of text. Also, I add a page named "<categories for your_var>" giving a reference table of names and column numbers (where your_var is the appropriate column heading) use apop_data_get_factor_names to retrieve that table.
Exceptions
out->error=='a'allocation error.
out->error=='d'dimension error.
  • If the vector or matrix you wanted to write to is NULL, I will allocate it for you.
  • See Generating factors for further discussion.
  • See the documentation for apop_logit for a sample linear model using this function.
  • This function uses the Designated initializers syntax for inputs.
See also
apop_data_to_dummies
apop_data * apop_data_transpose ( apop_data in,
char  transpose_text,
char  inplace 
)

Transpose the matrix and text elements of the input data set, including the row/column names.

The vector and weights elements of the input data set are completely ignored (but see also apop_vector_to_matrix, which can convert a vector to a 1 X N matrix.) If copying, these other elements won't be present; if .inplace='y', it is up to you to handle these not-transposed elements correctly.

Parameters
inThe input apop_data set. If NULL, I return NULL. (default: NULL)
transpose_textIf 'y', then also transpose the text element. (default: 'y')
inplaceIf 'y', transpose the input in place; if 'n', produce a transposed copy, leaving the original untouched. Due to how gsl_matrix_transpose_memcpy works, a copy will still be made, then copied to the original location. (default: 'y')
Returns
If inplace=='n', a newly alloced apop_data set, with the appropriately transposed matrix and/or text. The vector and weights elements will be NULL. If transpose_text='n', then the text element of the output set will also be NULL.
if inplace=='y', a pointer to the original data set, with matrix and (if transpose_text='y', text) transposed and vector and weights left in place untouched.
  • Row names are written to column names of the output matrix, text, or both (whichever is not empty in the input).
  • If only the matrix or only the text have names, then the one set of names is written to the row names of the output.
  • If both matrix column names and text column names are present, text column names are lost.
  • if you have a gsl_matrix with no names or text, you may prefer to use gsl_matrix_transpose_memcpy.
  • This function uses the Designated initializers syntax for inputs.
void apop_data_unpack ( const gsl_vector *  in,
apop_data d,
char  use_info_pages 
)

This is the complement to apop_data_pack, qv. It writes the gsl_vector produced by that function back to the apop_data set you provide. It overwrites the data in the vector and matrix elements and, if present, the weights (and that's it, so names or text are as before).

Parameters
inA gsl_vector of the form produced by apop_data_pack. No default; must not be NULL.
dThat data set to be filled. Must be allocated to the correct size. No default; must not be NULL.
use_info_pagesPages in XML-style brackets, such as <Covariance> will be ignored unless you set .use_info_pages='y'. Be sure that this is set to the same thing when you both pack and unpack. (Default: 'n').
  • If I get to the end of the first page of the apop_data set and have more entries in the vector to unpack, and the data to fill has a more element, then I will continue into subsequent pages.
  • This function uses the Designated initializers syntax for inputs.
int apop_db_close ( char  vacuum)

Closes the database on disk. If you opened the database with apop_db_open(NULL), then this is basically optional.

Parameters
vacuum'v': vacuum—do clean-up to minimize the size of the database on disk.
'q': Don't bother; just close the database. (default = 'q')
Returns
0 on OK, nonzero on error.
int apop_db_open ( char const *  filename)

If you want to use a database on the hard drive instead of memory, then call this once and only once before using any other database utilities.

With SQLite, if you want a disposable database which you won't use after the program ends, don't bother with this function.

The trade-offs between an on-disk database and an in-memory db are as one would expect: memory is faster, but the database is destroyed when the program exits.

MySQL users: either set the environment variable APOP_DB_ENGINE=mysql or set apop_opts.db_engine = 'm'.

The Apophenia package assumes you are only using a single database at a time. You can use the SQL attach function to load other databases, or see this blog post for further suggestions and sample code.

When you are done doing your database manipulations, call apop_db_close if writing to disk.

Parameters
filenameThe name of a file on the hard drive on which to store the database. If NULL, then the database will be kept in memory (in which case, the other database functions will call this function for you and you don't need to bother).
Returns
0: everything OK
1: database did not open.
apop_data * apop_db_to_crosstab ( char const *  tabname,
char const *  row,
char const *  col,
char const *  data,
char  is_aggregate 
)

Give the name of a table in the database, and optional names of three of its columns: the x-dimension, the y-dimension, and the data. The output is a 2D matrix with rows indexed by 'row' and cols by 'col' and the cells filled with the entry in the 'data' column.

Parameters
tabnameThe database table I'm querying. Anything that will work inside a from clause is OK, such as a subquery in parens. (no default; must not be NULL)
rowThe column of the data set that will indicate the rows of the output crosstab (no default; must not be NULL)
colThe column of the data set that will indicate the columns of the output crosstab (no default; must not be NULL)
dataThe column of the data set holding the data for the cells of the crosstab (default: count(*))
is_aggregateSet to 'y' if the data is a function like count(*) or sum(col). That is, set to 'y' if querying this would require a group by clause. (default: if I find an end-paren in datacol, 'y'; else 'n'.)
  • If the query to get data to fill the table (select row, col, data from tabname) returns an empty data set, then I will return a NULL data set and if apop_opts.verbosity >= 1 print a warning.
Exceptions
out->error='n'Name not found error.
out->error='q'Query returned an empty table (which might mean that it just failed).
  • The simplest use is to get a tally of how often (r1, r2) appears in the data via apop_db_to_crosstab("datatab", "r1", "r2").
  • If you want a 1-D crosstab, omit the other dimension. Or omit both to get a grand tally of your statistic for the entire table.
  • There is a commnad-line tool, apop_db_to_crosstab that calls this function.
  • This function uses the Designated initializers syntax for inputs.
double apop_det_and_inv ( const gsl_matrix *  in,
gsl_matrix **  out,
int  calc_det,
int  calc_inv 
)

Calculate the determinant of a matrix, its inverse, or both, via LU decomposition. The in matrix is not destroyed in the process.

See also
apop_matrix_determinant, apop_matrix_inverse
Parameters
inThe matrix to be inverted/determined.
outIf you want an inverse, this is where to place the matrix to be filled with the inverse. Will be allocated by the function.
calc_det0: Do not calculate the determinant.
1: Do.
calc_inv0: Do not calculate the inverse.
1: Do.
Returns
If calc_det == 1, then return the determinant. Otherwise, just returns zero. If calc_inv!=0, then *out is pointed to the matrix inverse. In case of difficulty, I will set *out=NULL and return NaN.
apop_data * apop_dot ( const apop_data d1,
const apop_data d2,
char  form1,
char  form2 
)

A convenience function for dot products, which requires less prep and typing than the gsl_cblas_dgexx functions.

It makes use of the semi-overloading of the apop_data structure. d1 may be a vector or a matrix, and the same for d2, so this function can do vector dot matrix, matrix dot matrix, and so on. If d1 includes both a vector and a matrix, then later parameters will indicate which to use.

Parameters
d1the left part of $ d1 \cdot d2$
d2the right part of $ d1 \cdot d2$
form1't' or 'p': transpose or prime d1->matrix, or, if d1->matrix is NULL, read d1->vector as a row vector.
'n' or 0: use matrix if present; no transpose. (the default)
'v': ignore the matrix and use the vector.
form2As above, with d2.
Returns
an apop_data set. If two matrices come in, the vector element is NULL and the matrix has the dot product; if either or both are vectors, the vector has the output and the matrix is NULL.
Exceptions
out->error='a'Allocation error.
out->error='d'dimension-matching error.
out->error='m'GSL math error.
NULLIf you ask me to take the dot product of NULL, I return NULL.
  • Some systems auto-transpose non-conforming matrices. You input a $3 \times 5$ and a $3 \times 5$ matrix, and the system assumes that you meant to transpose the second, producing a $(3 \times 5) \cdot (5 \times 3) \rightarrow (3 \times 3)$ output. Apophenia does not do this. First, it's ambiguous whether the output should be $3 \times 3$ or $5 \times 5$. Second, your next run might have three observations, and two $3 \times 3$ matrices don't require transposition; auto-transposition thus creates situations where bugs can pop up on only some iterations of a loop.
  • For a vector $\cdot$ a matrix, the vector is always treated as a row vector, meaning that a $(3\times 1)$ dot a $(3\times 4)$ matrix is correct, and produces a $(1 \times 4)$ vector. For a matrix $\cdot$ a vector, the vector is always treated as a column vector. Requests for transposing the vector are ignored in both cases.
  • As a corrollary to the above rule, a vector dot a vector always produces a scalar, which will be put in the zeroth element of the output vector; see the example.
  • If you want to multiply an $N \times 1$ vector $\cdot$ a $1 \times N$ vector to produce an $N \times N$ matrix, then use apop_vector_to_matrix to turn your vectors into matrices; see the example.
  • A note for readers of Modeling with Data: the awkward instructions on using this function on p 130 are now obsolete, thanks to the designated initializer syntax for function calls. Notably, in the case where d1 is a vector and d2 a matrix, then apop_dot(d1,d2,'t') won't work, because 't' now refers to d1. Instead use apop_dot(d1,d2,.form2='t') or apop_dot(d1,d2,0, 't')
  • This function uses the Designated initializers syntax for inputs.

Sample code:

/* A demonstration of dot products and various useful
transformations among types. */
#include <apop.h>
double eps=1e-3;//slow to converge series-->large tolerance.
#define Diff(L, R) Apop_assert(fabs((L)-(R)<(eps)), "%g is too different from %g (abitrary limit=%g).", (double)(L), (double)(R), eps);
int main(){
int len = 3000;
gsl_vector *v = gsl_vector_alloc(len);
for (double i=0; i< len; i++) gsl_vector_set(v, i, 1./(i+1));
double square;
gsl_blas_ddot(v, v, &square);
printf("1 + (1/2)^2 + (1/3)^2 + ...= %g\n", square);
double pi_over_six = gsl_pow_2(M_PI)/6.;
Diff(square, pi_over_six);
/* Now using apop_dot, in a few forms.
First, vector-as-data dot itself.
If one of the inputs is a vector,
apop_dot puts the output in a vector-as-data:*/
apop_data *v_as_data = &(apop_data){.vector=v};
apop_data *vdotv = apop_dot(v_as_data, v_as_data);
Diff(gsl_vector_get(vdotv->vector, 0), pi_over_six);
/* Wrap matrix in an apop_data set. */
gsl_matrix *v_as_matrix = apop_vector_to_matrix(v);
apop_data dm = (apop_data){.matrix=v_as_matrix};
// (1 X len) vector dot (len X 1) matrix --- produce a scalar (one item vector).
apop_data *mdotv = apop_dot(v_as_data, &dm);
double scalarval = apop_data_get(mdotv);
Diff(scalarval, pi_over_six);
//(len X 1) dot (len X 1) --- bad dimensions.
apop_opts.verbose=-1; //don't print an error.
apop_data *mdotv2 = apop_dot(&dm, v_as_data);
apop_opts.verbose=0; //back to safety.
assert(mdotv2->error);
// If we want (len X 1) dot (1 X len) --> (len X len),
// use apop_vector_to_matrix.
apop_data dmr = (apop_data){.matrix=apop_vector_to_matrix(v, .row_col='r')};
apop_data *product_matrix = apop_dot(&dm, &dmr);
//The trace is the sum of squares:
gsl_vector_view trace = gsl_matrix_diagonal(product_matrix->matrix);
double tracesum = apop_sum(&trace.vector);
Diff(tracesum, pi_over_six);
apop_data_free(product_matrix);
gsl_matrix_free(dmr.matrix);
}
int apop_draw ( double *  out,
gsl_rng *  r,
apop_model m 
)

Draw from a model.

Parameters
outAn already-allocated array of doubles to be filled by the draw method. It must have size m->dsize.
rA gsl_rng, probably allocated via apop_rng_alloc. Optional; if NULL, then I will call apop_rng_get_thread for an RNG.
mThe model from which to make draws.
  • If the model has its own draw method, then this function will call it.
  • Else, if the model is univariate, use apop_arms_draw to generate random draws.
  • Else, if the model is multivariate, use apop_model_metropolis to generate random draws.
  • This makes a single draw of the given size. See apop_model_draws to fill a matrix with draws.
Returns
Zero on success; nozero on failure. out[0] is probably NAN on failure.
apop_model * apop_estimate ( apop_data d,
apop_model m 
)

Estimate the parameters of a model given data.

This function copies the input model, preps it (see apop_prep), and calls m.estimate(d, m) (which users are encouraged to never call directly). If your model has no estimate method, then call apop_maximum_likelihood(d, m), with the default MLE settings.

Parameters
dThe data
mThe model
Returns
A pointer to an output model, which typically matches the input model but has its parameters element filled in.
apop_data * apop_estimate_coefficient_of_determination ( apop_model m)

Also known as $R^2$. Let $Y$ be the dependent variable, $\epsilon$ the residual, $n$ the number of data points, and $k$ the number of independent vars (including the constant). Returns an apop_data set with the following entries (in the vector element):

  • $ SST \equiv \sum (Y_i - \bar Y) ^2 $
  • $ SSE \equiv \sum \epsilon ^2 $
  • $ R^2 \equiv 1 - {SSE\over SST} $
  • $ R^2_{adj} \equiv R^2 - {(k-1)\over (n-k-1)}(1-R^2) $

Internally allocates (and frees) a vector the size of your data set.

Returns
A $5 \times 1$ apop_data table with the following fields:
  • "R squared"
  • "R squared adj"
  • "SSE"
  • "SST"
  • "SSR"
If the output is in sss, use apop_data_get(sss, .rowname="SSE") to get the SSE, and so on for the other items.
Parameters
mA model. I use the pointer to the data set used for estimation and the info page named "<Predicted>". The Predicted page should include observed, expected, and residual columns, which I use to generate the sums of squared errors and residuals, et cetera. All generalized linear models produce a page with this name and of this form, as do a host of other models. Nothing keeps you from finding the $R^2$ of, say, a kernel smooth; it is up to you to determine whether such a thing is appropriate to your given models and situation.
  • apop_estimate(yourdata, apop_ols) does this automatically
  • If I don't find a "<Predicted>" page, print an error (iff apop_opts.verbose >=0) and return NULL.
  • The number of observations equals the number of rows in the Predicted page
  • The number of independent variables, needed only for the adjusted $R^2$, is from the number of columns in the main data set's matrix (i.e. the first page; i.e. the set of parameters if this is the parameters output from a model estimation).
  • If your data (first page again) has a weights vector, I will find weighted SSE, SST, and SSR (and calculate the $R^2$s using those values).
apop_model * apop_estimate_restart ( apop_model e,
apop_model copy,
char *  starting_pt,
double  boundary 
)

Maximum likelihod searches are not guaranteed to find a global optimum, and it can be difficult to tune a search such that it covers a wide space, but also accurately hones in on the optimum. In both cases, one could restart the search using a different starting point or different parameters.

The simplest use of this function is to restart a model at the latest parameter estimates.

1 apop_model *m = apop_estimate(data, model_using_an_MLE_search);
2 for (int i=0; i< 10; i++)
3  m = apop_estimate_restart(m);
4 apop_data_show(m);

By adding a line to reduce the tolerance each round [e.g., Apop_settings_set(m, apop_mle, tolerance, pow(10,-i))], you can start broad and hone in on a precise optimum.

You may have a new estimation method, such as first doing a coarse simulated annealing search, then a fine conjugate gradient search. When reading this example, recall that the form for adding a new settings group differs from the form for modifying existing settings:

1 Apop_model_add_settings(your_base_model, apop_mle, .method=APOP_SIMAN);
2 apop_model *m = apop_estimate(data, your_base_model);
3 Apop_settings_set(m, apop_mle, method, APOP_CG_PR);
4 m = apop_estimate_restart(m);
5 apop_data_show(m);

Only one estimate is returned, either the one you sent in or a new one. The loser (which may be the one you sent in) is freed, to prevent memory leaks.

Parameters
eAn apop_model that is the output from a prior MLE estimation. (No default, must not be NULL.)
copyAnother not-yet-parametrized model that will be re-estimated with (1) the same data and (2) a starting_pt as per the next setting (probably to the parameters of e). If this is NULL, then copy e. (Default = NULL)
starting_pt"ep"=last estimate of the first model (i.e., its current parameter estimates)
"es"= starting point originally used by the first model
"np"=current parameters of the new (second) model
"ns"=starting point specified by the new model's MLE settings. (default = "ep")
boundaryI test whether the starting point you give me has magintude greater than this bound, so I can warn you if there's divergence in your sequence of re-estimations. (default: 1e8)
Returns
If the new estimated parameters include any NaNs/Infs, then the old estimate is returned (even if the old estimate included NaNs/Infs). Otherwise, the estimate with the largest log likelihood is returned.
apop_data * apop_f_test ( apop_model est,
apop_data contrast 
)

Runs an F-test specified by q and c. See the chapter on hypothesis testing in Modeling With Data, p 309, which will tell you that:

\[{N-K\over q} {({\bf Q}'\hat\beta - {\bf c})' [{\bf Q}' ({\bf X}'{\bf X})^{-1} {\bf Q}]^{-1} ({\bf Q}' \hat\beta - {\bf c}) \over {\bf u}' {\bf u} } \sim F_{q,N-K},\]

and that's what this function is based on.

Parameters
estAn apop_model that you have already calculated. (No default)
contrastAn apop_data set whose matrix represents ${\bf Q}$ and whose vector represents ${\bf c}$. Each row represents a hypothesis. (Defaults: if matrix is NULL, it is set to the identity matrix with the top row missing. If the vector is NULL, it is set to a zero matrix of length equal to the height of the contrast matrix. Thus, if the entire apop_data set is NULL or omitted, we are testing the hypothesis that all but $\beta_1$ are zero.)
Returns
An apop_data set with a few variants on the confidence with which we can reject the joint hypothesis.
Exceptions
out->error='a'Allocation error.
out->error='d'dimension-matching error.
out->error='i'matrix inversion error.
out->error='m'GSL math error.
  • There are two approaches to an $F$-test: the ANOVA approach, which is typically built around the claim that all effects but the mean are zero; and the more general regression form, which allows for any set of linear claims about the data. If you send a NULL contrast set, I will generate the set of linear contrasts that are equivalent to the ANOVA-type approach. This is why the top row of the default ${\bf Q}$ matrix is missing: there is no hypothesis test about the coefficient for the constant term. See the example below.
  • This function uses the Designated initializers syntax for inputs.
#ifdef Datadir
#define DATADIR Datadir
#else
#define DATADIR "."
#endif
#include <apop.h>
#define Diff(L, R, eps) {double left=(L), right=(R); Apop_stopif(isnan(left-right) || fabs((left)-(right))>(eps), abort(), 0, "%g is too different from %g (abitrary limit=%g).", (double)(left), (double)(right), eps);}
void test_f(apop_model *est){
apop_data *constr= apop_data_calloc(est->parameters->vector->size-1, est->parameters->vector->size);
int i;
for (i=1; i< est->parameters->vector->size; i++)
apop_data_set(constr, i-1, i, 1);
apop_data *ftab = apop_F_test(est, constr);
apop_data *ftab2 = apop_F_test(est, NULL);
//apop_data_show(ftab);
//apop_data_show(ftab2);
double n = est->data->matrix->size1;
double K = est->parameters->vector->size-1;
double r = apop_data_get(rsq, .rowname="R squared");
double f = apop_data_get(ftab, .rowname="F statistic");
double f2 = apop_data_get(ftab2, .rowname="F statistic");
Diff (f , r*(n-K)/((1-r)*K) , 1e-3);
Diff (f2 , r*(n-K)/((1-r)*K) , 1e-3);
}
int main(){
apop_data *d = apop_text_to_data( DATADIR "/" "test_data2" );
Apop_model_add_group(an_ols_model, apop_lm, .want_expected_value= 1);
apop_model *e = apop_estimate(d, an_ols_model);
test_f(e);
}

Runs an F-test specified by q and c. See the chapter on hypothesis testing in Modeling With Data, p 309, which will tell you that:

\[{N-K\over q} {({\bf Q}'\hat\beta - {\bf c})' [{\bf Q}' ({\bf X}'{\bf X})^{-1} {\bf Q}]^{-1} ({\bf Q}' \hat\beta - {\bf c}) \over {\bf u}' {\bf u} } \sim F_{q,N-K},\]

and that's what this function is based on.

Parameters
estAn apop_model that you have already calculated. (No default)
contrastAn apop_data set whose matrix represents ${\bf Q}$ and whose vector represents ${\bf c}$. Each row represents a hypothesis. (Defaults: if matrix is NULL, it is set to the identity matrix with the top row missing. If the vector is NULL, it is set to a zero matrix of length equal to the height of the contrast matrix. Thus, if the entire apop_data set is NULL or omitted, we are testing the hypothesis that all but $\beta_1$ are zero.)
Returns
An apop_data set with a few variants on the confidence with which we can reject the joint hypothesis.
Exceptions
out->error='a'Allocation error.
out->error='d'dimension-matching error.
out->error='i'matrix inversion error.
out->error='m'GSL math error.
  • There are two approaches to an $F$-test: the ANOVA approach, which is typically built around the claim that all effects but the mean are zero; and the more general regression form, which allows for any set of linear claims about the data. If you send a NULL contrast set, I will generate the set of linear contrasts that are equivalent to the ANOVA-type approach. This is why the top row of the default ${\bf Q}$ matrix is missing: there is no hypothesis test about the coefficient for the constant term. See the example below.
  • This function uses the Designated initializers syntax for inputs.
#ifdef Datadir
#define DATADIR Datadir
#else
#define DATADIR "."
#endif
#include <apop.h>
#define Diff(L, R, eps) {double left=(L), right=(R); Apop_stopif(isnan(left-right) || fabs((left)-(right))>(eps), abort(), 0, "%g is too different from %g (abitrary limit=%g).", (double)(left), (double)(right), eps);}
void test_f(apop_model *est){
apop_data *constr= apop_data_calloc(est->parameters->vector->size-1, est->parameters->vector->size);
int i;
for (i=1; i< est->parameters->vector->size; i++)
apop_data_set(constr, i-1, i, 1);
apop_data *ftab = apop_F_test(est, constr);
apop_data *ftab2 = apop_F_test(est, NULL);
//apop_data_show(ftab);
//apop_data_show(ftab2);
double n = est->data->matrix->size1;
double K = est->parameters->vector->size-1;
double r = apop_data_get(rsq, .rowname="R squared");
double f = apop_data_get(ftab, .rowname="F statistic");
double f2 = apop_data_get(ftab2, .rowname="F statistic");
Diff (f , r*(n-K)/((1-r)*K) , 1e-3);
Diff (f2 , r*(n-K)/((1-r)*K) , 1e-3);
}
int main(){
apop_data *d = apop_text_to_data( DATADIR "/" "test_data2" );
Apop_model_add_group(an_ols_model, apop_lm, .want_expected_value= 1);
apop_model *e = apop_estimate(d, an_ols_model);
test_f(e);
}
long double apop_generalized_harmonic ( int  N,
double  s 
)

Calculate $\sum_{n=1}^N {1\over n^s}$

  • There are no doubt efficient shortcuts do doing this, but I use brute force. [Though Knuth's Art of Programming v1 doesn't offer anything, which is strong indication of nonexistence.] To speed things along, I save the results so that they can just be looked up should you request the same calculation.
  • If N is zero or negative, return NaN. Notify the user if apop_opts.verbosity >=0

For example:

#include <apop.h>
int main(){
double out = apop_generalized_harmonic(270, 0.0);
assert (out == 270);
out = apop_generalized_harmonic(370, -1.0);
assert (out == 370*371/2);
out = apop_generalized_harmonic(12, -1.0);
assert (out == 12*13/2);
}
apop_data * apop_histograms_test_goodness_of_fit ( apop_model observed,
apop_model expected 
)

Test the goodness-of-fit between two apop_pmf models.

Let $o_i$ be the $i$th observed bin and $e_i$ the expected value of that bin; then under typical assumptions, $ $\Sum_i^N (o_i-e_i)^2/e_i \sim \Chi^2_{N-1}$.

If you send two histograms, I assume that the histograms are synced: for PMFs, you've used apop_data_to_bins to generate two histograms using the same binspec, or you've used apop_data_pmf_compress to guarantee that each observation value appears exactly once in each data set.

In any case, all values in the observed set must appear in the expected set with nonzero weight; otherwise this will return a $\chi^2$ statistic of GSL_POSINF, indicating that it is impossible for the observed data to have been drawn from the expected distribution.

  • If an observation row has weight zero, I skip it. if apop_opts.verbose >=1 I will show a warning.
apop_data * apop_jackknife_cov ( apop_data in,
apop_model model 
)

Give me a data set and a model, and I'll give you the jackknifed covariance matrix of the model parameters.

The basic algorithm for the jackknife (glossing over the details): create a sequence of data sets, each with exactly one observation removed, and then produce a new set of parameter estimates using that slightly shortened data set. Then, find the covariance matrix of the derived parameters.

  • Jackknife or bootstrap? As a broad rule of thumb, the jackknife works best on models that are closer to linear. The worse a linear approximation does (at the given data), the worse the jackknife approximates the variance.
Parameters
inThe data set. An apop_data set where each row is a single data point.
modelAn apop_model, that will be used internally by apop_estimate.
Exceptions
out->error=='n'NULL input data.
Returns
An apop_data set whose matrix element is the estimated covariance matrix of the parameters.
See also
apop_bootstrap_cov

For example:

#include <apop.h>
int main(){
int draw_ct = 1000;
apop_model *m = apop_model_set_parameters(apop_normal, 1, 3);
double sigma = apop_data_get(m->parameters, 1);
apop_data *d = apop_model_draws(m, draw_ct);
double error = fabs(apop_data_get(out, 0,0)-gsl_pow_2(sigma)/draw_ct) //var(mu)
+ fabs(apop_data_get(out, 1,1)-gsl_pow_2(sigma)/(2*draw_ct))//var(sigma)
+fabs(apop_data_get(out, 0,1)) +fabs(apop_data_get(out, 1,0));//cov(mu,sigma); should be 0.
assert(error < 1e-2);//Not very accurate.
}
long double apop_kl_divergence ( apop_model from,
apop_model to,
int  draw_ct,
gsl_rng *  rng 
)

Kullback-Leibler divergence.

This measure of the divergence of one distribution from another has the form $ D(p,q) = \sum_i \ln(p_i/q_i) p_i $. Notice that it is not a distance, because there is an asymmetry between $p$ and $q$, so one can expect that $D(p, q) \neq D(q, p)$.

Parameters
fromthe $p$ in the above formula. (No default; must not be NULL)
tothe $q$ in the above formula. (No default; must not be NULL)
draw_ctIf I do the calculation via random draws, how many? (Default = 1e5)
rngA gsl_rng. If NULL or number of threads is greater than 1, I'll take care of the RNG; see apop_rng_get_thread. (Default = NULL)

This function can take empirical histogram-type models (apop_pmf) or continuous models like apop_loess or apop_normal.

If the from distribution is a PMF (determined by checking whether its p function is that of apop_pmf), then I'll step through it for the points in the summation.

  • If you have two empirical distributions in the form of apop_pmf, they must be synced: if $p_i>0$ but $q_i=0$, then the function returns GSL_NEGINF. If apop_opts.verbose >=1 I print a message as well.

If the from distribution is not a PMF, then I will take draw_ct random draws from from and evaluate at those points.

  • Set apop_opts.verbose = 3 for observation-by-observation info.
long double apop_linear_constraint ( gsl_vector *  beta,
apop_data constraint,
double  margin 
)

This is designed to be called from within the constraint method of your apop_model. Just write the constraint vector+matrix and this will do the rest. See Setting Constraints for detailed discussion.

Parameters
betaThe proposed vector about to be tested. No default, must not be NULL.
constraintA vector/matrix pair [v | m1 m2 ... mn] where each row is interpreted as a less-than inequality: $v < m1x1+ m2x2 + ... + mnxn$. For example, say your constraints are $3 < 2x + 4y - 7z$ and $y$ is positive, i.e. $0 < y$. Allocate and fill the matrix representing these two constraints via:
1 apop_data *constr = apop_data_falloc((2,2,3), 3, 2, 4, 7,
2  0, 0, 1, 0);
. Default: each elements is greater than zero. For three parameters this would be equivalent to setting
1 apop_data *constr = apop_data_falloc((3,3,3), 0, 1, 0, 0,
2  0, 0, 1, 0,
3  0, 0, 0, 1);
marginIf zero, then this is a >= constraint, otherwise I will return a point this amount within the borders. You could try GSL_DBL_EPSILON, which is the smallest value a double can hold, or something like 1e-3. Default = 0.
Returns
The penalty: the distance between beta and the closest point that meets the constraints. If the constraint is met, the penalty is zero. If the constraint is not met, this beta is shifted by margin (Euclidean distance) to meet the constraints.
double apop_log_likelihood ( apop_data d,
apop_model m 
)

Find the log likelihood of a data/parametrized model pair.

Parameters
dThe data
mThe parametrized model, which must have either a log_likelihood or a p method.
apop_data * apop_map ( apop_data in,
apop_fn_d *  fn_d,
apop_fn_v *  fn_v,
apop_fn_r *  fn_r,
apop_fn_dp *  fn_dp,
apop_fn_vp *  fn_vp,
apop_fn_rp *  fn_rp,
apop_fn_dpi *  fn_dpi,
apop_fn_vpi *  fn_vpi,
apop_fn_rpi *  fn_rpi,
apop_fn_di *  fn_di,
apop_fn_vi *  fn_vi,
apop_fn_ri *  fn_ri,
void *  param,
int  inplace,
char  part,
int  all_pages 
)

Apply a function to every element of a data set, matrix or vector; or, apply a vector-taking function to every row or column of a matrix.

Your function could take any combination of a gsl_vector, a double, an apop_data, a parameter set, and the position of the element in the vector or matrix. As such, the function takes twelve function inputs, one for each combination of vector/matrix, params/no params, index/no index. Fortunately, because this function uses the Designated initializers syntax for inputs, you will specify only one.

For example, here is a function that will cut off each element of the input data to between $(-1, +1)$. It takes in a lone double and a parameter in a void*, so it gets sent to apop_map via .fn_dp=cutoff.

1 double cutoff(double in, void *limit_in){
2  double *limit = limit_in;
3  return GSL_MAX(-*limit, GSL_MIN(*limit, in));
4 }
5 
6 double param = 1;
7 apop_map(your_data, .fn_dp=cutoff, .param=&param, .inplace='y');
Parameters
fn_vA function of the form double your_fn(gsl_vector *in)
fn_dA function of the form double your_fn(double in)
fn_rA function of the form double your_fn(apop_data *in)
fn_vpA function of the form double your_fn(gsl_vector *in, void *param)
fn_dpA function of the form double your_fn(double in, void *param)
fn_rpA function of the form double your_fn(apop_data *in, void *param)
fn_vpiA function of the form double your_fn(gsl_vector *in, void *param, int index)
fn_dpiA function of the form double your_fn(double in, void *param, int index)
fn_rpiA function of the form double your_fn(apop_data *in, void *param, int index)
fn_viA function of the form double your_fn(gsl_vector *in, int index)
fn_diA function of the form double your_fn(double in, int index)
fn_riA function of the form double your_fn(apop_data *in, int index)
inThe input data set. If NULL, I'll return NULL immediately.
paramA pointer to the parameters to be passed to those function forms taking a *param.
partWhich part of the apop_data struct should I use?
'v'==Just the vector
'm'==Every element of the matrix, in turn
'a'==Both 'v' and 'm'
'r'==Apply a function gsl_vector $\to$ double to each row of the matrix
'c'==Apply a function gsl_vector $\to$ double to each column of the matrix
Default is 'a', but notice that I'll ignore a NULL vector or matrix, so if your data set has only a vector or only a matrix, that's what I'll use.
all_pagesIf 'y', then follow the more pointer to subsequent pages. If 'n', handle only the first page of data. Default: 'n'.
inplaceIf 'n' (the default), generate a new apop_data set for output, which will contain the mapped values (and the names from the original set).
If 'y', modify in place. The double $\to$ double versions, 'v', 'm', and 'a', write to exactly the same location as before. The gsl_vector $\to$ double versions, 'r', and 'c', will write to the vector. Be careful: if you are writing in place and there is already a vector there, then the original vector is lost.
If 'v' (as in void), return NULL. (Default = 'n')
Exceptions
out->error='p'missing or mismatched parts error, such as NULL matrix when you sent a function acting on the matrix element.
  • The function forms with r in them, like fn_ri, are row-by-row. I'll use Apop_r to get each row in turn, and send it to the function. The first implication is that your function should be expecting a apop_data set with exactly one row in it. The second is that part is ignored: it only makes sense to go row-by-row.
  • For these r functions, if you set inplace='y', then you will be modifying your input data set, row by row; if you set inplace='n', then I will return an apop_data set whose vector element is as long as your data set (i.e., as long as the longest of your text, vector, or matrix parts).
  • If you set omp_set_num_threads(n) using $n>1$, split the data set into as many chunks as you specify and process them simultaneously. You need to watch out for the usual hang-ups about multithreaded programming, but if your data is iid, and each row's processing is independent of the others, you should have no problems. Bear in mind that generating threads takes some small overhead, so simple cases like adding a few hundred numbers will actually be slower when threading.
  • See Map/apply for many more examples and notes.
    See also
    apop_map_sum
double apop_map_sum ( apop_data in,
apop_fn_d *  fn_d,
apop_fn_v *  fn_v,
apop_fn_r *  fn_r,
apop_fn_dp *  fn_dp,
apop_fn_vp *  fn_vp,
apop_fn_rp *  fn_rp,
apop_fn_dpi *  fn_dpi,
apop_fn_vpi *  fn_vpi,
apop_fn_rpi *  fn_rpi,
apop_fn_di *  fn_di,
apop_fn_vi *  fn_vi,
apop_fn_ri *  fn_ri,
void *  param,
char  part,
int  all_pages 
)

A function that effectively calls apop_map and returns the sum of the resulting elements. Thus, this function returns a double. See the apop_map page for details of the inputs, which are the same here, except that inplace doesn't make sense—this function will always just add up the input function outputs.

  • I don't copy the input data to send to your input function. Therefore, if your function modifies its inputs as a side-effect, your data set will be modified as this function runs.
  • The sum of zero elements is zero, so that is what is returned if the input apop_data set is NULL. If apop_opts.verbose >= 2 print a warning.
  • See Map/apply for many more examples and notes.
  • This function uses the Designated initializers syntax for inputs.
void apop_matrix_apply ( gsl_matrix *  m,
void(*)(gsl_vector *)  fn 
)

Apply a function to every row of a matrix. The function that you input takes in a gsl_vector and returns nothing. apop_matrix_apply will produce a vector view of each row, and send each row to your function.

Parameters
mThe matrix
fnA function of the form void fn(gsl_vector* in) which may modify the data at the in pointer in place.
void apop_matrix_apply_all ( gsl_matrix *  in,
void(*)(double *)  fn 
)

Applies a function to every element in a matrix (as opposed to every row)

Parameters
inThe matrix whose elements will be inputs to the function
fnA function with a form like void f(double *in) which may modify the data at the in pointer in place.
gsl_matrix * apop_matrix_copy ( const gsl_matrix *  in)

Copy one gsl_matrix to another. That is, all data are duplicated. Unlike gsl_matrix_memcpy, this function allocates and returns the destination, so you can use it like this:

1 gsl_matrix *a_copy = apop_matrix_copy(original);
Parameters
inthe input data
Returns
A structure that this function will allocate and fill. If gsl_matrix_alloc fails, returns NULL.
double apop_matrix_determinant ( const gsl_matrix *  in)

Find the determinant of a matrix. The in matrix is not destroyed in the process.

See also apop_matrix_inverse , or apop_det_and_inv to do both at once.

Parameters
inThe matrix to be determined.
Returns
The determinant.
gsl_matrix * apop_matrix_inverse ( const gsl_matrix *  in)

Inverts a matrix. The in matrix is not destroyed in the process. You may want to call apop_matrix_determinant first to check that your input is invertible, or use apop_det_and_inv to do both at once.

Parameters
inThe matrix to be inverted.
Returns
Its inverse.
int apop_matrix_is_positive_semidefinite ( gsl_matrix *  m,
char  semi 
)

Test whether the input matrix is positive semidefinite (PSD).

A covariance matrix will always be PSD, so this function can tell you whether your matrix is a valid covariance matrix.

Consider the 1x1 matrix in the upper left of the input, then the 2x2 matrix in the upper left, on up to the full matrix. If the matrix is PSD, then each of these has a positive determinant. This function thus calculates $N$ determinants for an $N$x $N$ matrix.

Parameters
mThe matrix to test. If NULL, I will return zero—not PSD.
semiIf anything but 's', check for positive definite, not semidefinite. (default 's')

See also apop_matrix_to_positive_semidefinite, which will change the input to something PSD.

gsl_vector * apop_matrix_map ( const gsl_matrix *  m,
double(*)(gsl_vector *)  fn 
)

Map a function onto every row of a matrix. The function that you input takes in a gsl_vector and returns a double. This function will produce a sequence of vector views of each row of the input matrix, and send each to your function. It will output a gsl_vector holding your function's output for each row.

Parameters
mThe matrix
fnA function of the form double fn(gsl_vector* in)
Returns
A gsl_vector with the corresponding value for each row.
gsl_matrix * apop_matrix_map_all ( const gsl_matrix *  in,
double(*)(double)  fn 
)

Maps a function to every element in a matrix (as opposed to every row).

Parameters
inThe matrix whose elements will be inputs to the function
fnA function with a form like double f(double in).
Returns
a matrix of the same size as the original, with the function applied.
double apop_matrix_map_all_sum ( const gsl_matrix *  in,
double(*)(double)  fn 
)

Like apop_matrix_map_all, but returns the sum of the resulting mapped function. For example, apop_matrix_map_all_sum(v, isnan) returns the number of elements of m that are NaN.

double apop_matrix_map_sum ( const gsl_matrix *  in,
double(*)(gsl_vector *)  fn 
)

Like apop_matrix_map, but returns the sum of the resulting mapped vector. For example, let log_like be a function that returns the log likelihood of an input vector; then apop_matrix_map_sum(m, log_like) returns the total log likelihood of the rows of m.

double apop_matrix_mean ( const gsl_matrix *  data)

Returns the mean of all elements of a matrix.

Parameters
dataThe matrix to be averaged. If NULL, return zero.
Returns
The mean of all cells of the matrix.
void apop_matrix_mean_and_var ( const gsl_matrix *  data,
double *  mean,
double *  var 
)

Returns the mean and population variance of all elements of a matrix.

  • If NULL, return $\mu=0, \sigma^2=NaN$.
  • Gives the population variance (sum of squares divided by $N$). If you want sample variance, multiply the result by $N/(N-1)$:
    1 double mu, var;
    2 apop_data *data= apop_query_to_data("select * from indata");
    3 apop_matrix_mean_and_var(data->matrix, &mu, &var);
    4 var *= (data->size1*data->size2)/(data->size1*data->size2-1.0);
Parameters
datathe matrix to be averaged.
meanwhere to put the mean to be calculated.
varwhere to put the variance to be calculated.
apop_data * apop_matrix_pca ( gsl_matrix *  data,
int const  dimensions_we_want 
)

Principal component analysis: hand in a matrix and (optionally) a number of desired dimensions, and I'll return a data set where each column of the matrix is an eigenvector. The columns are sorted, so column zero has the greatest weight. The vector element of the data set gives the weights.

You may also specify the number of elements your principal component space should have. If this is equal to the rank of the space in which the input data lives, then the sum of weights will be one. If the dimensions desired is less than that (probably so you can prepare a plot), then the weights will be accordingly smaller, giving you an indication of how much variation these dimensions explain.

Parameters
dataThe input matrix. I modify int in place so that each column has mean zero. (No default. If NULL, return NULL and print a warning iff apop_opts.verbose >= 1.)
dimensions_we_wantThe singular value decomposition will return this many of the eigenvectors with the largest eigenvalues. (default: the size of the covariance matrix, i.e. data->size2)
Returns
Returns an apop_data set whose matrix is the principal component space. Each column of the returned matrix will be another eigenvector; the columns will be ordered by the eigenvalues.

The data set's vector will be the largest eigenvalues, scaled by the total of all eigenvalues (including those that were thrown out). The sum of these returned values will give you the percentage of variance explained by the factor analysis.

Exceptions
out->error=='a'Allocation error.
void apop_matrix_print ( const gsl_matrix *  data,
Output_declares   
)

Print a gsl_matrix to the screen, a file, a pipe, or a database table.

gsl_matrix * apop_matrix_realloc ( gsl_matrix *  m,
size_t  newheight,
size_t  newwidth 
)

This function will resize a gsl_matrix to a new height or width.

Data in the matrix will be retained. If the new height or width is smaller than the old, then data in the later rows/columns will be cropped away (in a non–memory-leaking manner). If the new height or width is larger than the old, then new cells will be filled with garbage; it is your responsibility to zero out or otherwise fill new rows/columns before use.

  • A large number of reallocs can take a noticeable amount of time. You are encouraged to determine the size of your data beforehand and avoid writing for loops that reallocate the matrix at every iteration.
  • The gsl_matrix is a versatile struct that can represent submatrices and other cuts from parent data. Resizing a subset of a parent matrix makes no sense, so return NULL and print a warning if asked to resize a view of a matrix.
Parameters
mThe already-allocated matrix to resize. If you give me NULL, this becomes equivalent to gsl_matrix_alloc
newheight,newwidthThe height and width you'd like the matrix to be.
Returns
m, now resized
gsl_matrix * apop_matrix_stack ( gsl_matrix *  m1,
gsl_matrix const *  m2,
char  posn,
char  inplace 
)

Put the first matrix either on top of or to the right of the second matrix. Returns a new matrix, meaning that at the end of this function, until you gsl_matrix_free() the original matrices, you will be taking up twice as much memory. Plan accordingly.

Parameters
m1the upper/rightmost matrix (default: NULL, in which case this copies m2)
m2the second matrix (default: NULL, in which case m1 is returned)
posnIf 'r', stack rows on top of other rows. If 'c' stack columns next to columns. (default: 'r')
inplaceIf 'y', use apop_matrix_realloc to modify m1 in place; see the caveats on that function. Otherwise, allocate a new matrix, leaving m1 undisturbed. (default: 'n')
Returns
the stacked data, either in a new matrix or a pointer to m1.

For example, here is a function to merge four matrices into a single two-part-by-two-part matrix. The original matrices are unchanged.

1 gsl_matrix *apop_stack_two_by_two(gsl_matrix *ul, gsl_matrix *ur, gsl_matrix *dl, gsl_matrix *dr){
2  gsl_matrix *output, *t;
3  output = apop_matrix_stack(ul, ur, 'c');
4  t = apop_matrix_stack(dl, dr, 'c');
5  apop_matrix_stack(output, t, 'r', .inplace='y');
6  gsl_matrix_free(t);
7  return output;
8 }
long double apop_matrix_sum ( const gsl_matrix *  m)

Returns the sum of the elements of a matrix. Occasionally convenient.

Parameters
mthe matrix to be summed.
double apop_matrix_to_positive_semidefinite ( gsl_matrix *  m)

This function takes in a matrix and converts it in place to the `closest' positive semidefinite matrix.

Parameters
mOn input, any matrix; on output, a positive semidefinite matrix. If NULL, return NaN and print an error.
Returns
the distance between the original and new matrices.
  • See also the test function apop_matrix_is_positive_semidefinite.
  • This function can be used as the core of a model constraint.
  • Adapted from the R Matrix package's nearPD, which is Copyright (2007) Jens Oehlschlägel [under the GPL].
void apop_maximum_likelihood ( apop_data data,
apop_model dist 
)

Find the likelihood-maximizing parameters of a model given data.

  • I assume that apop_prep has been called on your model. The easiest way to guarantee this is to use apop_estimate, which calls this function if the input model has no estimate method.
  • All of the settings are specified by adding a apop_mle_settings struct to your model, so see the many notes there. Notably, the default method is the Fletcher-Reeves conjugate gradient method, and if your model does not have a dlog likelihood function, then a numeric gradient will be calculated via apop_numerical_gradient. Add an apop_mle_settings group to your model to set tuning parameters or select other methods, including the Nelder-Mead simplex, simulated annealing, and root-finding.
Parameters
dataAn apop_data set.
distThe apop_model object: apop_gamma, apop_probit, apop_zipf, &c. You can add an apop_mle_settings struct to it (Apop_model_add_group(your_model, apop_mle, .verbose=1, .method="PR cg", and_so_on)).
Returns
None, but the input model is modified to include the parameter estimates, &c.
  • There is auxiliary info in the ->info element of the post-estimation struct. Get elements via, e.g.:
    1 apop_model *est = apop_estimate(your_data, apop_probit);
    2 
    3 
    4 int status = apop_data_get(est->info, .rowname="status");
    5 if (status)
    6  //trouble
    7 else
    8  //optimum found
    9  apop_data_print(est->parameters); //Here are the estimated parameters
  • During the search for an optimum, ctrl-C (SIGINT) will halt the search, and the function will return whatever parameters the search was on at the time.
apop_model * apop_ml_impute ( apop_data d,
apop_model mvn 
)

Impute the most likely data points to replace NaNs in the data, and insert them into the given data. That is, the data set is modified in place.

How it works: this uses the machinery for apop_model_fix_params. The only difference is that this searches over the data space and takes the parameter space as fixed, while basic fix params model searches parameters and takes data as fixed. So this function just does the necessary data-parameter switching to make that happen.

Parameters
dThe data set. It comes in with NaNs and leaves entirely filled in.
mvnA parametrized apop_model from which you expect the data was derived. if NULL, then I'll use the Multivariate Normal that best fits the data after listwise deletion.
Returns
An estimated apop_model. Also, the data input will be filled in and ready to use.
apop_model * apop_model_clear ( apop_data data,
apop_model model 
)

Set up the parameters and info elements of the apop_model:

At close, the input model has parameters of the correct size.

  • This is the default action for apop_prep, and many models with a custom prep routine call apop_model_clear at the end. Also, apop_estimate calls this function internally, which means that you robably never have to call this function directly.
  • If the model has already been prepped, this function should be a no-op.
Parameters
dataIf your params vary with the size of the data set, then the function needs a data set to calibrate against. Otherwise, it's OK to set this to NULL.
modelThe model whose output elements will be modified.
Returns
A pointer to the same model, should you need it.
Exceptions
outmodel->error=='d'dimension error.
apop_model * apop_model_copy ( apop_model in)

Outputs a copy of the apop_model input.

Parameters
inThe model to be copied
Returns
A copy of the original. Includes copies of all settings groups, and the parameters (if not NULL, copied via apop_data_copy).
  • If in.more_size > 0 I memcpy the more pointer from the original data set.
  • The data set at in->data is not copied, but is also pointed to.
Exceptions
out->error=='a'Allocation error. In extreme cases, where there aren't even a few hundred bytes available, I will return NULL.
out->error=='s'Error copying settings groups.
out->error=='p'Error copying parameters or info page; the given apop_data struct may be NULL or may have its own ->error element.
apop_data * apop_model_draws ( apop_model model,
int  count,
apop_data draws 
)

Make a set of random draws from a model and write them to an apop_data set.

Parameters
modelThe model from which draws will be made. Must already be prepared and/or estimated.
countThe number of draws to make. If draw_matrix is not NULL, then this is ignored and count=draw_matrix->matrix->size1. default=1000.
drawsIf not NULL, a pre-allocated data set whose matrix element will be filled with draws.
Returns
An apop_data set with the matrix filled with size draws. If draw_matrix!=NULL, then return a pointer to it.
Exceptions
out->error=='m'Input model isn't good for making draws: it is NULL, or m->dsize=0.
out->error=='s'You gave me a draws matrix, but its size is less than the size of a single draw from the data, model->dsize.
out->error=='d'Trouble drawing from the distribution for at least one row. That row is set to all NAN.
  • Prints a warning if you send in a non-NULL apop_data set, but its matrix element is NULL, when apop_opts.verbose>=1.
  • See also apop_draw, which makes a single draw.
  • Random numbers are generated using RNGs from apop_rng_get_thread, qv.

Here is a two-line program to draw a different set of ten Standard Normals on every run (provided runs are more than a second apart):

#include <apop.h>
#include <time.h>
int main(){
apop_opts.rng_seed = time(NULL);
apop_model_set_parameters(apop_normal, 0, 1),
.count=10,
)
);
}
long double apop_model_entropy ( apop_model in,
int  draws 
)

Calculate the entropy of a model: $\int -\ln(p(x))p(x)dx$, which is the expected value of $-\ln(p(x))$.

The default method is to make draws using apop_model_draws, then evaluate the log likelihood at those points using the model's log_likelihood method.

There are a number of routines for specific models, inlcuding the apop_normal and apop_pmf models.

  • If you want the entropy of a data set, see apop_vector_entropy.
  • The entropy is calculated using natural logs. If you prefer base-2 logs, just divide by $\ln(2)$: apop_model_entropy(my_model)/log(2).
Parameters
inA parameterized apop_model. That is, you have already used apop_estimate or apop_model_set_parameters to estimate/set the model parameters.
drawsIf using the default method of making random draws, how many random draws to make (default=1,000)

Sample code:

#include <apop.h>
#define Diff(left, right, eps) Apop_stopif(fabs((left)-(right))>(eps), \
abort(), 0, "%g is too different from %g (abitrary limit=%g).", \
(double)(left), (double)(right), eps)
/* The entropy function, like some other functions (including apop_update) has a lookup
table for known models like the Normal distribution. If the input model has
\c log_likelihood, \c p, and \c draw functions that are the ones found in \ref
apop_nomrmal, then use a known calculation to report entropy; else report based on
random draws from the model.
If we make a copy of the \ref apop_normal model and replace the log likelihood with
a new function that produces identical values, the lookup table will not find the
modified model, and the calculation via random draws will be done. Of course, the
final entropy as calculated using both methods should differ only by a small amount.
*/
long double mask(apop_data *d, apop_model *m){
return apop_normal->log_likelihood(d, m);
}
int main(){
for (double i=0.1; i< 10; i+=.2){
apop_model *n = apop_model_set_parameters(apop_normal, 8, i);
long double v= apop_model_entropy(n);
n->log_likelihood = mask;
long double w= apop_model_entropy(n, 50000);
Diff(v, w, 5e-2);
}
}
apop_model * apop_model_fix_params ( apop_model model_in)

Produce a model based on another model, but with some of the parameters fixed at a given value.

You will send me the model whose parameters you want fixed, with the parameters element set as follows. For the fixed parameters, simply give the values to which they will be fixed. Set the free parameters to NaN.

For example, here is a Binomial distribution with a fixed $n=30$ but $p_1$ allowed to float freely:

1 apop_model *bi30 = apop_model_fix_params(apop_model_set_parameters(apop_binomial, 30, NAN));
2 Apop_model_add_group(bi30, apop_mle, .starting_pt=(double[]){.5}); // The Binomial doesn't like the
3  // default starting point of 1.
4 apop_model *out = apop_estimate(your_data, bi30);

The output is an apop_model that can be estimated, Bayesian updated, et cetera.

  • Rather than using this model, you may simply want a now-filled-in copy of the original model. Use apop_model_fix_params_get_base to retrieve the original model's parameters.
  • The estimate method always uses an MLE, and it never calls the base model's estimate method.
  • If the input model has an apop_mle_settings group attached, I'll use them for the estimate method. Otherwise, I'll set my own.
  • If the parameter input has non-NaN values at the free parameters, then I'll use those as the starting point for any MLE search; the defaults for the variables without fixed values starts from 1 as usual.
  • I do check the more pointer of the parameters for additional pages and NaNs on those pages.

Here is a sample program. It produces a few thousand draws from a Multivariate Normal distribution, and then tries to recover the means given a var/covar matrix fixed at the correct variance.

#include <apop.h>
int main(){
size_t ct = 5e4;
//set up the model & params
apop_data *params = apop_data_falloc((2,2,2), 8, 1, 0.5,
2, 0.5, 1);
pvm->parameters = apop_data_copy(params);
pvm->dsize = 2;
apop_data *d = apop_model_draws(pvm, ct);
//set up and estimate a model with fixed covariance matrix but free means
gsl_vector_set_all(pvm->parameters->vector, GSL_NAN);
apop_model *e1 = apop_estimate(d, mep1);
//compare results
printf("original params: ");
apop_vector_print(params->vector);
printf("estimated params: ");
apop_vector_print(e1->parameters->vector);
assert(apop_vector_distance(params->vector, e1->parameters->vector)<1e-2);
}
Parameters
model_inThe base model
Returns
a model that can be used like any other, with the given params fixed or free.
apop_model * apop_model_fix_params_get_base ( apop_model fixed_model)

The apop_model_fix_params function produces a model that has only the non-fixed parameters of the model. After estimation of the fixed-parameter model, this function fills the parameters element of the base model and returns a pointer to the base model.

void apop_model_free ( apop_model free_me)

Free an apop_model structure.

  • The parameters and settings are freed. These are the elements that are copied by apop_model_copy.
  • The data element is not freed, because the odds are you still need it.
  • If free_me->more_size is positive, the function runs free(free_me->more). But it has no idea what the more element contains; if it points to other structures (like an apop_data set), you need to free them before calling this function.
  • If free_me is NULL, this does nothing.
Parameters
free_meA pointer to the model to be freed.
apop_data * apop_model_hessian ( apop_data data,
apop_model model,
double  delta 
)

Numerically estimate the matrix of second derivatives of the parameter values, via a series of re-evaluations at small differential steps. [Therefore, it may be expensive to do this for a very computationally-intensive model.]

Parameters
dataThe apop_data at which the model was estimated (default: NULL)
modelThe apop_model, with parameters already estimated (no default, must not be NULL)
deltathe step size for the differentials. (default: 1e-3, but see below)
Returns
The matrix of estimated second derivatives at the given data and parameter values.
  • If you do not set delta as an input, I first look for an apop_mle_settings group attached to the input model, and check that for a delta element. If that is also missing, use the default of 1e-3.
  • This function uses the Designated initializers syntax for inputs.
apop_model * apop_model_metropolis ( apop_data d,
gsl_rng *  rng,
apop_model m 
)

Use Metropolis-Hastings Markov chain Monte Carlo to make draws from the given model.

The basic storyline is that draws are made from a proposal distribution, and the likelihood of your model given your data and the drawn parameters evaluated. At each step, a new set of proposal parameters are drawn, and if they are more likely than the previous set the new proposal is accepted as the next step, else with probability (prob of new params)/(prob of old params), they are accepted as the next step anyway. Otherwise the last accepted proposal is repeated.

The output is an apop_pmf model with a data set listing the draws that were accepted, including those repetitions. The output model is modified so that subsequent draws are one more step from the Markov chain, via apop_model_metropolis_draw.

Parameters
dThe apop_data set used for evaluating the likelihood of a proposed parameter set.
rngA gsl_rng, probably allocated via apop_rng_alloc. (Default: an RNG from apop_rng_get_thread)
mThe apop_model from which parameters are being drawn. (No default; must not be NULL)
Returns
A modified apop_pmf model representing the results of the search. It has a specialized draw method that returns another step from the Markov chain with each draw.
Exceptions
out->error='c'Proposal was outside of a constraint; see below.
  • If a proposal fails to meet the constraint element of the model you input, then the proposal is thrown out and a new one selected. By the default proposal distribution, this is not mathematically correct (it breaks detailed balance), and values near the constraint will be oversampled. The output model will have outmodel->error=='c'. It is up to you to decide whether the resulting distribution is good enough for your purposes or whether to take the time to write a custom proposal and step function to accommodate the constraint.

Attach an apop_mcmc_settings group to your model to specify the proposal distribution, burnin, and other details of the search. See the apop_mcmc_settings documentation for details.

  • The default proposal includes an adaptive step: you specify a target accept rate (default: .35), and if the accept rate is currently higher the variance of the proposals is widened to explore more of the space; if the accept rate is currently lower the variance is narrowed to stay closer to the last accepted proposal. Technically, this breaks ergodicity of the Markov chain, but the consensus seems to be that this is not a serious problem. If it does concern you, you can set the base_adapt_fn in the apop_mcmc_settings group to a do-nothing function, or one that damps its adaptation as $n\to\infty$.
  • If you have a univariate model, apop_arms_draw may be a suitable simpler alternative.
  • Note the gibbs_chunks element of the apop_mcmc_settings group. If you set gibbs_chunks='a', all parameters are drawn as a set, and accepted/rejected as a set. The variances are adapted at an identical rate. If you set gibbs_chunks='i', then each scalar parameter is assigned its own proposal distribution, which is adapted at its own pace. With gibbs_chunks='b' (the default), then each of the vector, matrix, and weights of your model's parameters are drawn/accepted/adapted as a block (and so on to additional chunks if your model has ->more pages). This works well for complex models which naturally break down into subsets of parameters.
  • Each chunk counts as a step in the Markov chain. Therefore, if there are several chunks, you can expect chunks to repeat from step to step. If you want a draw after cycling through all chunks, try using apop_model_metropolis_draw, which has that behavior.
  • If the likelihood model has NULL parameters, I will allocate them. That means you can use one of the stock models that ship with Apophenia. If I need to run the model's prep routine to get the size of the parameters, then I will make a copy of the likelihood model, run prep, and then allocate parameters for that copy of a model.
  • On exit, the parameters element of your likelihood model has the last accepted parameter proposal.
  • If you set apop_opts.verbose=2 or greater, I will report the accept rate of the M-H sampler. It is a common rule of thumb to select a proposal so that this is between 20% and 50%. Set apop_opts.verbose=3 to see the stream of proposal points, their likelihoods, and the acceptance odds. You may want to set apop_opts.log_file=fopen("yourlog", "w") first.
int apop_model_metropolis_draw ( double *  out,
gsl_rng *  rng,
apop_model model 
)

The draw method for models estimated via apop_model_metropolis.

That method produces an apop_pmf, typically with a few thousand draws from the model in a batch. If you want to get a single next step from the Markov chain, use this.

A Markov chain works by making a new draw and then accepting or rejecting the draw. If the draw is rejected, the last value is reported as the next step in the chain. Users sometimes mitigate this repetition by making a batch of draws (say, ten at a time) and using only the last.

If you run this without first running apop_model_metropolis, I will run it for you, meaning that there will be an initial burn-in period before the first draw that can be reported to you. That run is done using model->data as input.

Parameters
outAn array of doubles, which will hold the draw, in the style of apop_draw.
rngA gsl_rng, already initialized, probably via apop_rng_alloc.
modelA model which was probably already run through apop_model_metropolis.
Returns
On return, out is filled with the next step in the Markov chain. The ->data element of the PMF model is extended to include the additional steps in the chain. If a proposal failed the model constraints, then return 1; else return 0. See the notes in the documentation for apop_model_metropolis.
  • After pulling the attached settings group, the parent model is ignored. One expects that base_model in the mcmc settings group == the parent model.
  • If your settings break the model parameters into several chunks, this function returns after stepping through all chunks.
apop_data * apop_model_numerical_covariance ( apop_data data,
apop_model model,
double  delta 
)

Produce the covariance matrix for the parameters of an estimated model via the derivative of the score function at the parameter. I.e., I find the second derivative via apop_model_hessian , and take the negation of the inverse.

I follow Efron and Hinkley in using the estimated information matrix—the value of the information matrix at the estimated value of the score—not the expected information matrix that is the integral over all possible data. See Pawitan 2001 (who cribbed a little off of Efron and Hinkley) or Klemens 2008 (who directly cribbed off of both) for further details.

Parameters
dataThe data by which your model was estimated
modelA model whose parameters have been estimated.
deltaThe differential by which to step for sampling changes. (default: 1e-3, but see below)
Returns
A covariance matrix for the data. Also, if the data does not have a "<Covariance>" page, I'll set it to the result as well [i.e., I won't overwrite an existing covariance page].
  • If you do not set delta as an input, I first look for an apop_mle_settings group attached to the input model, and check that for a delta element. If that is also missing, use the default of 1e-3.
  • This function uses the Designated initializers syntax for inputs.
void apop_model_print ( apop_model model,
FILE *  output_pipe 
)

Print the results of an estimation for a human to look over.

Parameters
modelThe model whose information should be displayed (No default. If NULL, print NULL)
output_pipeThe output stream. Default: stdout. If you'd like something else, use fopen. E.g.:
1 FILE *out =fopen("outfile.txt", "w"); //or "a" to append.
2 apop_model_print(the_model, out);
3 fclose(out); //optional in many cases.
  • The default prints the name, parameters, info, &c. but I check a vtable for alternate methods you define; see Registering new methods in vtables for details. The typedef new functions must conform to and the hash used for lookups are:
1 typedef void (*apop_model_print_type)(apop_model *params, FILE *out);
2 #define apop_model_print_hash(m1) ((m1)->log_likelihood ? (size_t)(m1)->log_likelihood : \
3  (m1)->p ? (size_t)(m1)->p*33 : \
4  (m1)->estimate ? (size_t)(m1)->estimate*33*33 : \
5  (m1)->draw ? (size_t)(m1)->draw*33*27 : \
6  (m1)->cdf ? (size_t)(m1)->cdf*27*27 : 27)

When building a special print method, all output should fprintf to the input FILE* handle. Apophenia's output routines also accept a file handle; e.g., if the file handle is named out, then if the thismodel print method uses apop_data_print to print the parameters, it must do so via a form like apop_data_print(thismodel->parameters, .output_pipe=ap).

Your print method can use both by masking itself for a few lines:

1 void print_method(apop_model *in, FILE* ap){
2  void *temp = in->estimate;
3  in->estimate = NULL;
4  apop_model_print(in, ap);
5  in->estimate = temp;
6 
7  printf("Additional info:\n");
8  ...
9 }
  • Print methods are intended for human consumption and are subject to change.
  • This function uses the Designated initializers syntax for inputs.
apop_model * apop_model_to_pmf ( apop_model model,
apop_data binspec,
long int  draws,
int  bin_count 
)

Make random draws from an apop_model, and bin them using a binspec in the style of apop_data_to_bins. If you have a data set that used the same binspec, you now have synced histograms, which you can plot or sensibly test hypotheses about.

Parameters
binspecA description of the bins in which to place the draws; see apop_data_to_bins. (default: as in apop_data_to_bins.)
modelThe model to be drawn from. Because this function works via random draws, the model needs to have a draw method. (No default)
drawsThe number of random draws to make. (arbitrary default = 10,000)
bin_countIf no bin spec, the number of bins to use (default: as per apop_data_to_bins, $\sqrt(N)$)
Returns
An apop_pmf model, with a new binned data set attached (which you may have to apop_data_free(output_model->data) to prevent memory leaks). The weights on the data set are normalized to sum to one.
long double apop_multivariate_gamma ( double  a,
int  p 
)

The multivariate generalization of the Gamma distribution.

\[ \Gamma_p(a)= \pi^{p(p-1)/4}\prod_{j=1}^p \Gamma\left[ a+(1-j)/2\right]. \]

Because $\Gamma(x)$ is undefined for $x\in\{0, -1, -2, ...\}$, this function returns NAN when $a+(1-j)/2$ takes on one of those values.

See also apop_multivariate_lngamma, which is more numerically stable in most cases.

long double apop_multivariate_lngamma ( double  a,
int  p 
)

The log of the multivariate generalization of the Gamma; see also apop_multivariate_gamma.

int apop_name_add ( apop_name n,
char const *  add_me,
char  type 
)

Adds a name to the apop_name structure. Puts it at the end of the given list.

Parameters
nAn existing, allocated apop_name structure.
add_meA string. If NULL, do nothing; return -1.
type'r': add a row name
'c': add a matrix column name
't': add a text column name
'h': add a title (i.e., a header).
'v': add (or overwrite) the vector name
Returns
Returns the number of rows/cols/depvars after you have added the new one. But if add_me is NULL, return -1.
apop_name * apop_name_alloc ( void  )

Allocates a name structure

Returns
An allocated, empty name structure. In the very unlikely event that malloc fails, return NULL.

Because apop_data_alloc uses this to set up its output, you will rarely if ever need to call this function explicitly. You may want to use it if wrapping a gsl_matrix into an apop_data set. For example, to put a title on a vector:

1 apop_data *d = &(apop_data){.vector=your_vector, .names=apop_name_alloc()};
2 apop_name_add(d->names, "A column of numbers", 'v');
3 apop_data_print(d);
4 
5 ...
6 apop_name_free(d->names); //but d itself is auto-allocated; no need to free it.
apop_name * apop_name_copy ( apop_name in)

Copy one apop_name structure to another. That is, all data is duplicated.

Used internally by apop_data_copy, but sometimes useful by itself. For example, say that we have an apop_data struct named d and a gsl_matrix of the same dimensions named m; we could give m the labels from d for printing:

1 apop_data *wrapped = &(apop_data){.matrix=m, .names=apop_name_copy(d)};
2 apop_data_print(wrapped);
3 apop_name_free(wrapped->names); //wrapped itself is auto-allocated; do not free.
Parameters
inThe input names
Returns
A apop_name struct with copies of all input names.
int apop_name_find ( const apop_name n,
const char *  name,
const char  type 
)

Finds the position of an element in a list of names.

The function uses POSIX's strcasecmp, and so does case-insensitive search the way that function does.

Parameters
nthe apop_name object to search.
namethe name you seek; see above.
type'c' (=column), 'r' (=row), or 't' (=text). Default is 'c'.
Returns
The position of findme. If 'c', then this may be -1, meaning the vector name. If not found, returns -2. On error, e.g. name==NULL, returns -2.
void apop_name_free ( apop_name free_me)

Free the memory used by an apop_name structure.

void apop_name_print ( apop_name n)

Prints the given list of names to stdout. Useful for debugging.

Parameters
nThe apop_name structure
void apop_name_stack ( apop_name n1,
apop_name nadd,
char  type1,
char  typeadd 
)

Append one list of names to another.

If the first list is empty, then this is a copy function.

Parameters
n1The first set of names (no default, must not be NULL)
naddThe second set of names, which will be appended after the first. (no default. If NULL, a no-op.)
type1Either 'c', 'r', 't', or 'v' stating whether you are merging the columns, rows, text, or vector. If 'v', then ignore typeadd and just overwrite the target vector name with the source name. (default: 'r')
typeaddEither 'c', 'r', 't', or 'v' stating whether you are merging the columns, rows, or text. If 'v', then overwrite the target with the source vector name. (default: type1)
gsl_vector * apop_numerical_gradient ( apop_data data,
apop_model model,
double  delta 
)

A wrapper around the GSL's one-dimensional gsl_deriv_central to find a numeric differential for each dimension of the input apop_model's log likelihood (or p if log_likelihood is NULL).

Parameters
dataThe apop_data set to use for all evaluations.
modelThe apop_model, expressing the function whose derivative is sought. The gradient is taken via small changes along the model parameters.
deltaThe size of the differential. (default: 1e-3, but see below)
1 gsl_vector *gradient = apop_numerical_gradient(data, your_parametrized_model);
  • If you do not set delta as an input, I first look for an apop_mle_settings group attached to the input model, and check that for a delta element. If that is also missing, use the default of 1e-3.
  • This function uses the Designated initializers syntax for inputs.
double apop_p ( apop_data d,
apop_model m 
)

Find the probability of a data/parametrized model pair.

Parameters
dThe data
mThe parametrized model, which must have either a log_likelihood or a p method.
apop_data * apop_paired_t_test ( gsl_vector *  a,
gsl_vector *  b 
)

Answers the question: with what confidence can I say that the mean difference between the two columns is zero?

If apop_opts.verbose >=2, then display some information, like the mean/var/count for both vectors and the t statistic, to stderr.

Parameters
aA column of data
bA matched column of data
Returns
an apop_data set with the following elements: mean left - right: the difference in means; if positive, first vector has larger mean, and one-tailed test is testing $L > R$, else reverse if negative.
t statistic: used for the test
df: degrees of freedom
p value, 1 tail: the p-value for a one-tailed test that one vector mean is greater than the other.
confidence, 1 tail: 1- p value.
p value, 2 tail: the p-value for the two-tailed test that left mean = right mean.
confidence, 2 tail: 1-p value
See also
apop_t_test for an example, and for when the element-by-element difference between the vectors has no sensible interpretation.
apop_model * apop_parameter_model ( apop_data d,
apop_model m 
)

Get a model describing the distribution of the given parameter estimates.

For many models, the parameter estimates are well-known, such as the $t$-distribution of the parameters for OLS.

For models where the distribution of $\hat{p}$ is not known, if you give me data, I will return an apop_normal or apop_multivariate_normal model, using the parameter estimates as mean and apop_bootstrap_cov for the variances.

If you don't give me data, then I will assume that this is a stochastic model where re-running the model will produce different parameter estimates each time. In this case, I will run the model 1e4 times and return a apop_pmf model with the resulting parameter distributions.

Before calling this, I expect that you have already run apop_estimate to produce $\hat{p}$.

The apop_pm_settings structure dictates details of how the model is generated. For example, if you want only the distribution of the third parameter, and you know the distribution will be a PMF generated via random draws, then set settings and call the model via:

1 apop_model_group_add(your_model, apop_pm, .index =3, .draws=3e5);
2 apop_model *dist = apop_parameter_model(your_data, your_model);

Some useful parts of apop_pm_settings:

  • index gives the position of the parameter (in apop_data_pack order) in which you are interested. Thus, if this is zero or more, then you will get a univariate output distribution describing a single parameter. If index == -1, then I will give you the multivariate distribution across all parameters. The default is zero (i.e. the univariate distribution of the zeroth parameter).
  • draws If there is no closed-form solution and bootstrap is inappropriate, then the last resort is a large numbr of random draws of the model, summarized into a PMF. Default: 1,000 draws.
  • rng If the method requires random draws, then use this. If you provide NULL and one is needed, I provide one for you via apop_rng_get_thread.

The default is via resampling as above, but special-case calculations for certain models are held in a vtable; see Registering new methods in vtables for details. The typedef new functions must conform to and the hash used for lookups are:

1 typedef apop_model* (*apop_parameter_model_type)(apop_data *, apop_model *);
2 #define apop_parameter_model_hash(m1) ((size_t)((m1).log_likelihood ? (m1).log_likelihood : (m1).p)*33 + (m1).estimate ? (size_t)(m1).estimate: 27)
apop_data * apop_predict ( apop_data d,
apop_model m 
)

A prediction supplies E(a missing value | original data, already-estimated parameters, and other supplied data elements ).

For a regression, one would first estimate the parameters of the model, then supply a row of predictors X. The value of the dependent variable $y$ is unknown, so the system would predict that value.

For a univariate model (i.e. a model in one-dimensional data space), there is only one variable to omit and fill in, so the prediction problem reduces to the expected value: E(a missing value | original data, already-estimated parameters). [In some models, this may not be the expected value, but is a best value for the missing item using some other meaning of `best'.]

In other cases, prediction is the missing data problem: for three-dimensional data, you may supply the input (34, NaN, 12), and the parameterized model provides the most likely value of the middle parameter given the parameters and known data.

  • If you give me a NULL data set, I will assume you want all values filled in, for most models with the expected value.
  • If you give me data with NaNs, I will take those as the points to be predicted given the provided data.

If the model has no predict method, the default is to use the apop_ml_impute function to do the work. That function does a maximum-likelihood search for the best parameters.

Returns
If you gave me a non-NULL data set, I will return that, with the NaNs filled in. If NULL input, I will allocate an apop_data set and fill it with the expected values.

There may be a second page (i.e., a apop_data set attached to the ->more pointer of the main) listing confidence and standard error information. See your specific model documentation for details.

  • Special-case calculations for certain models are held in a vtable; see Registering new methods in vtables for details. The typedef new functions must conform to and the hash used for lookups are:
1 typedef apop_data * (*apop_predict_type)(apop_data *d, apop_model *params);
2 #define apop_predict_hash(m1) ((size_t)((m1).log_likelihood ? (m1).log_likelihood : (m1).p)*33 + (m1).estimate ? (size_t)(m1).estimate: 27)
void apop_prep ( apop_data d,
apop_model m 
)

Allocate and initialize the parameters, info, and other requisite parts of a apop_model.

Some models have associated prep routines that also attach settings groups to the model, and set up additional special-case functions in vtables.

  • The input model is modified in place.
  • If called repeatedly, subsequent calls to apop_prep are no-ops. Thus, a model can not be re-prepped using a new data set or other conditions.
  • The default prep is to simply call apop_model_clear. If the input apop_model has a prep method, then that gets called instead.
int apop_prep_output ( char const *  output_name,
FILE **  output_pipe,
char *  output_type,
char *  output_append 
)

If you're reading this, it is probably because you were referred by another function that uses this internally. You should never call this function directly, but do read this documentation.

There are four settings that affect how output happens, which can be set when you call the function that sent you to this documentation, e.g:

1 apop_data_print(your_data, .output_type ='f', .output_append = 'w');
Parameters
output_nameThe name of the output file, if any. For a database, the table to write.
output_pipeIf you have already opened a file and have a FILE* on hand, use this instead of giving the file name.
output_type'p' = pipe, 'f'= file, 'd' = database
output_append'a' = append (default), 'w' = write over.

At the end, output_name, output_pipe, and output_type are all set. Notably, the local output_pipe will have the correct location for the calling function to fprintf to.

  • See legi for more discussion.
  • The default is output to stdout. For example,
    1 apop_data_print(your_data);
    2 //is equivalent to
    3 apop_data_print(your_data, .output_type='p', .output_pipe=stdout);
  • Tip: if writing to the database, you can get a major speed boost by wrapping the call in a begin/commit wrapper:
1 apop_query("begin;");
2 apop_data_print(your_data, .output_name="dbtab", .output_type='d');
3 apop_query("commit;");
apop_data * apop_rake ( char const *  margin_table,
char *const *  var_list,
int  var_ct,
char *const *  contrasts,
int  contrast_ct,
char const *  structural_zeros,
int  max_iterations,
double  tolerance,
char const *  count_col,
char const *  init_table,
char const *  init_count_col,
double  nudge 
)

Fit a log-linear model via iterative proportional fitting, aka raking.

Raking has many uses. The Modeling with Data blog presents a series of discussions of uses of raking, including some worked examples.

Or see Wikipedia for an overview of Log linear models, aka Poisson regressions. One approach toward log-linear modeling is a regression form; let there be four categories, A, B, C, and D, from which we can produce a model positing, for example, that cell count is a function of a form like $g_1(A) + g_2(BC) + g_3(CD)$. In this case, we would assign a separate coefficient to every possible value of A, every possible value of (B, C), and every value of (C, D). Raking is the technique that searches for that large set of parameters.

The combinations of categories that are considered to be relevant are called contrasts, after ANOVA terminology of the 1940s.

The other constraint on the search are structural zeros, which are values that you know can never be non-zero, due to field-specific facts about the variables. For example, U.S. Social Security payments are available only to those age 65 or older, so "age <65 and gets_soc_security=1" is a structural zero.

Because there is one parameter for every combination, there may be millions of parameters to estimate, so the search to find the most likely value requires some attention to technique. For over half a century, the consensus method for searching has been raking, which iteratively draws each category closer to the mean in a somewhat simple manner (this was first developed circa 1940 and had to be feasible by hand), but which is guaranteed to eventually arrive at the maximum likelihood estimate for all cells.

Another complication is that the table is invariably sparse. One can easily construct tables with millions of cells, but the corresponding data set may have only a few thousand observations.

This function uses the database to resolve the sparseness problem. It constructs a query requesting all combinations of categories the could possibly be non-zero after raking, given all of the above constraints. Then, raking is done using only that subset. This means that the work is done on a number of cells proportional to the number of data points, not to the full cross of all categories. Set apop_opts.verbose to 2 or greater to show the query on stderr.

  • One could use raking to generate `fully synthetic' data: start with observation-level data in a margin table. Begin the raking with a starting data set of all-ones. Then rake until the all-ones set transforms into something that conforms to the margins and (if any) structural zeros. You now have a data set which matches the marginal totals but does not use any other information from the observation-level data. If you do not specify an .init_table, then an all-ones default table will be used.
Parameters
margin_tableThe name of the table in the database to use for calculating the margins. The table should have one observation per row. (No default)
var_listThe full list of variables to search. A list of strings, e.g., (char *[]){"var1", "var2", ..., "var15"}
var_ctThe count of the full list of variables to search.
contrastsThe contrasts describing your model. Like the var_list input, a list of strings like (char *[]){"var1", "var7", "var13"} contrast is a pipe-delimited list of variable names. (No default)
contrast_ctThe number of contrasts in the list of contrasts. (No default)
structural_zerosa SQL clause indicating combinations that can never take a nonzero value. This will go into a where clause, so anything you could put there is OK, e.g. "age <65 and gets_soc_security=1 or age <15 and married=1". Your margin data is not checked for structural zeros. (default: no structural zeros)
max_iterationsNumber of rounds of raking at which the algorithm halts. (default: 1000)
toleranceI calculate the change for each cell from round to round; if the largest cell change is smaller than this, I stop. (default: 1e-5)
count_colThis column gives the count of how many observations are represented by each row. If NULL, ech row represents one person. (default: NULL)
init_tableThe default is to initially set all table elements to one and then rake from there. This is effectively the `fully synthetic' approach, which uses only the information in the margins and derives the data set closest to the all-ones data set that is consistent with the margins. Care is taken to maintan sparsity in this case. If you specify an init_table, then I will get the initial cell counts from it. (default: the fully-synthetic approach, using a starting point of an all-ones grid.)
init_count_colThe column in init_table with the cell counts.
nudgeThere is a common hack of adding a small value to every zero entry, because a zero entry will always scale to zero, while a small value could eventually scale to anything. Recall that this function works on sparse sets, so I first filter out those cells that could possibly have a nonzero value given the observations, then I add nudge to any zero cells within that subset.
Returns
An apop_data set where every row is a single combination of variable values and the weights vector gives the most likely value for each cell.
Exceptions
out->error='i'Input was somehow wrong.
out->error='c'Raking did not converge, reached max. iteration count.
  • Set apop_opts.verbose=3 to see the intermediate tables at the end of each round of raking.
  • If you want all cells to have nonzero value, then you can do that via pre-processing:
    1 apop_query("update data_table set count_col = 1e-3 where count_col = 0");
  • This function is thread-safe. To make this happen, temp database tables are named using a number built with omp_get_thread_num.
  • This function uses the Designated initializers syntax for inputs.
int apop_regex ( const char *  string,
const char *  regex,
apop_data **  substrings,
const char  use_case 
)

Extract subsets from a string via regular expressions.

This function takes a regular expression and repeatedly applies it to an input string. It returns the count of matches, and optionally returns the matches themselves organized into the text grid of an apop_data set.

  • There are three common flavors of regular expression: Basic, Extended, and Perl-compatible (BRE, ERE, PCRE). I use EREs, as per the specs of your C library, which should match POSIX's ERE specification.

For example, "p.val" will match "P value", "p.value", "p values" (and even "tempeval", so be careful).

If you give a non-NULL address in which to place a table of paren-delimited substrings, I'll return them as a row in the text element of the returned apop_data set. I'll return all the matches, filling the first row with substrings from the first application of your regex, then filling the next row with another set of matches (if any), and so on to the end of the string. Useful when parsing a list of items, for example.

Parameters
stringThe string to search (no default)
regexThe regular expression (no default)
substringsParens in the regex indicate that I should return matching substrings. Give me the address of an apop_data* set, and I will allocate and fill the text portion with matches. Default= NULL, meaning do not return substrings (even if parens exist in the regex). If no match, return an empty apop_data set, so output->textsize[0]==0.
use_caseShould I be case sensitive, 'y' or 'n'? (default = 'n', which is not the POSIX default.)
Returns
Count of matches found. 0 == no match. substrings may be allocated and filled if needed.
  • If apop_opts.stop_on_warning='n' returns -1 on error (e.g., regex NULL or didn't compile).
  • If strings==NULL, I return 0—no match—and if substrings is provided, set it to NULL.
  • Here is the test function. Notice that the substring-pulling function call passes &subs, not plain subs.
#include <apop.h>
int main(){
char string1[] = "Hello. I am a string.";
assert(apop_regex(string1, "hell"));
apop_data *subs;
apop_regex(string1, "(e).*I.*(xxx)*(am)", .substrings = &subs);
//apop_data_show(subs);
assert(!strcmp(subs->text[0][0], "e"));
assert(!strlen(subs->text[0][1])); //The non-match to (xx)* has a zero-length blank
assert(!strcmp(subs->text[0][2], "am"));
//Split a comma-delimited list, throwing out white space.
//Notice that the regex includes only one instance of a non-comma blob
//ending in a non-space followed by a comma, but the function keeps
//applying it until the end of string.
char string2[] = " one, two , three ,four";
apop_regex(string2, " *([^,]*[^ ]) *(,|$) *", &subs);
assert(!strcmp(*subs->text[0], "one"));
assert(!strcmp(*subs->text[1], "two"));
assert(!strcmp(*subs->text[2], "three"));
assert(!strcmp(*subs->text[3], "four"));
//Get a parenthetical. For EREs, \( \) match plain parens in the text.
char string3[] = " one (but secretly, two)";
apop_regex(string3, "(\\([^)]*\\))", &subs);
assert(!strcmp(*subs->text[0], "(but secretly, two)"));
//NULL input string ==> no-op.
int match_count = apop_regex(NULL, " *([^,]*[^ ]) *(,|$) *", &subs);
assert(!match_count);
assert(!subs);
}
  • Each set of matches will be one row of the output data. E.g., given the regex ([A-Za-z])([0-9]), the column zero of outdata will hold letters, and column one will hold numbers. Use apop_data_transpose to reverse this so that the letters are in outdata->text[0] and numbers in outdata->text[1].
gsl_rng * apop_rng_alloc ( int  seed)

Initialize a gsl_rng.

Uses the Tausworth routine.

Parameters
seedThe seed. No need to get funny with it: 0, 1, and 2 will produce wholly different streams.
Returns
The RNG ready for your use.
  • If you are confident that your code is debugged and would like a new stream of values every time your program runs (provided your runs are more than a second apart), seed with the time:
#include <apop.h>
#include <time.h>
int main(){
apop_opts.rng_seed = time(NULL);
apop_model_set_parameters(apop_normal, 0, 1),
.count=10,
)
);
}
double apop_rng_GHgB3 ( gsl_rng *  r,
double *  a 
)

RNG from a Generalized Hypergeometric type B3.

Devroye uses this as the base for many of his distribution-generators, including the Waring.

  • If one of the inputs is <=0, error; return NaN and print a warning.
void apop_score ( apop_data d,
gsl_vector *  out,
apop_model m 
)

Find the vector of first derivatives (aka the gradient) of the log likelihood of a data/parametrized model pair.

On input, the model m must already be sufficiently prepped that the log likelihood can be evaluated; see p, log_likelihood for details.

On output, the gsl_vector input to the function will be filled with the gradients (or NaNs on errors). If the model parameters have a more complex shape than a simple vector, then the vector will be in apop_data_pack order; use apop_data_unpack to reformat to the preferred shape.

Parameters
dThe apop_data set at which the score is being evaluated.
outThe score to be returned. I expect you to have allocated this already.
mThe parametrized model, which must have either a log_likelihood or a p method.
1 typedef void (*apop_score_type)(apop_data *d, gsl_vector *gradient, apop_model *m);
2 #define apop_score_hash(m1) ((size_t)((m1).log_likelihood ? (m1).log_likelihood : (m1).p))
apop_data * apop_t_test ( gsl_vector *  a,
gsl_vector *  b 
)

Answers the question: with what confidence can I say that the means of these two columns of data are different?

If apop_opts.verbose is >=1, then display some information to stdout, like the mean/var/count for both vectors and the t statistic.

Parameters
aone column of data
banother column of data
Returns
an apop_data set with the following elements: mean left - right: the difference in means; if positive, first vector has larger mean, and one-tailed test is testing $L > R$, else reverse if negative.
t statistic: used for the test
df: degrees of freedom
p value, 1 tail: the p-value for a one-tailed test that one vector mean is greater than the other.
confidence, 1 tail: 1- p value.
p value, 2 tail: the p-value for the two-tailed test that left mean = right mean.
confidence, 2 tail: 1-p value

Example usage:

1 gsl_vector *L = apop_query_to_vector("select * from data where sex='M'");
2 gsl_vector *R = apop_query_to_vector("select * from data where sex='F'");
3 apop_data *test_out = apop_t_test(L, R);
4 printf("Reject the null hypothesis of no difference between M and F with %g%% confidence\n", apop_data_get(test_out, .rowname="confidence, 2 tail"));
See also
apop_paired_t_test, which answers the question: with what confidence can I say that the mean difference between the two columns is zero?
int apop_table_exists ( char const *  name,
char  remove 
)

Check for the existence of a table, and maybe delete it.

Recreating a table which already exists can cause errors, so it is good practice to check for existence first. Also, this is the stylish way to delete a table, since just calling "drop table" will give you an error if the table doesn't exist.

Parameters
namethe table name (no default)
remove'd' ==>delete table so it can be recreated in main.
'n' ==>no action. Return result so program can continue. (default)
Returns
0 = table does not exist
1 = table was found, and if remove=='d', has been deleted -1 = processing error
  • In the SQLite engine, this function considers table views to be tables.
  • This function uses the Designated initializers syntax for inputs.
double apop_test ( double  statistic,
char *  distribution,
double  p1,
double  p2,
char  tail 
)

This is a convenience function to do the lookup of a given statistic along a given distribution. You give me a statistic, its (hypothesized) distribution, and whether to use the upper tail, lower tail, or both. I will return the odds of a Type I error given the model—in statistician jargon, the $p$-value. [Type I error: odds of rejecting the null hypothesis when it is true.]

For example,

1 apop_test(1.3);

will return the density of the standard Normal distribution that is more than 1.3 from zero. If this function returns a small value, we can be confident that the statistic is significant. Or,

1 apop_test(1.3, "t", 10, .tail='u');

will give the appropriate odds for an upper-tailed test using the $t$-distribution with 10 degrees of freedom (e.g., a $t$-test of the null hypothesis that the statistic is less than or equal to zero).

Several more distributions are supported; see below.

  • For a two-tailed test (the default), this returns the density outside the range. I'll only do this for symmetric distributions.
  • For an upper-tail test ('u'), this returns the density above the cutoff
  • For a lower-tail test ('l'), this returns the density below the cutoff
Parameters
statisticThe scalar value to be tested.
distributionThe name of the distribution; see below.
p1The first parameter for the distribution; see below.
p2The second parameter for the distribution; see below.
tail'u' = upper tail; 'l' = lower tail; anything else = two-tailed. (default = two-tailed)
Returns
The odds of a Type I error given the model (the $p$-value).

Here are the distributions you can use and their parameters.

"normal" or "gaussian"

  • p1= $\mu$, p2= $\sigma$
  • default (0, 1)

"lognormal"

  • p1= $\mu$, p2= $\sigma$
  • default (0, 1)
  • Remember, $\mu$ and $\sigma$ refer to the Normal one would get after exponentiation
  • One-tailed tests only

"uniform"

  • p1=lower edge, p2=upper edge
  • default (0, 1)
  • two-tailed tests are run relative to the center, (p1+p2)/2.

"t"

  • p1=df
  • no default

"chi squared", "chi", "chisq":

  • p1=df
  • no default
  • One-tailed tests only; default='u' ( $p$-value for typical cases)

"f"

  • p1=df1, p2=df2
  • no default
  • One-tailed tests only
apop_data * apop_test_anova_independence ( apop_data d)

Run a Chi-squared test on an ANOVA table, i.e., an NxN table with the null hypothesis that all cells are equally likely.

Parameters
dThe input data, which is a crosstab of various elements. They don't have to sum to one.
Returns
A apop_data set including elements named "chi squared statistic", "df", and "p value". Retrieve via, e.g., apop_data_get(out, .rowname="p value").
See also
apop_test_fisher_exact
apop_data * apop_test_fisher_exact ( apop_data intab)

Run the Fisher exact test on an input contingency table.

Returns
An apop_data set with two rows:
"probability of table": Probability of the observed table for fixed marginal totals.
"p value": Table p-value. The probability of a more extreme table, where `extreme' is in a probabilistic sense.
  • If there are processing errors, these values will be NaN.
Exceptions
out->error=='p'Processing error in the test.

For example:

#include <apop.h>
int main() {
/* This test is thanks to Nick Eriksson, who sent it to me in the form of a bug report. */
apop_data * testdata = apop_data_falloc((2, 3),
30, 50, 45,
34, 12, 17 );
assert(fabs(apop_data_get(t2,.rowname="p value") - 0.0001761) < 1e-6);
}
apop_data * apop_test_kolmogorov ( apop_model m1,
apop_model m2 
)

Run the Kolmogorov-Smirnov test to determine whether two distributions are identical.

Parameters
m1A sorted PMF model. I.e., a model estimated via something like apop_model *m1 = apop_estimate(apop_data_sort(input_data), apop_pmf);
m2Another apop_model. If it is a PMF, then I will use a two-sample test, which is different from the one-sample test used if this is not a PMF.
Returns
An apop_data set including the $p$-value from the Kolmogorov-Smirnov test that the two distributions are equal.
Exceptions
out->error='m'Model error: m1 is not an apop_pmf. I verify this by checking whether m1->cdf == apop_pmf->cdf.
  • If you are using a apop_pmf model, the data set(s) must be sorted before you set up the model, as per the example below. See apop_data_sort and the discussion of CDFs in the apop_pmf documentation. If you don't do this, the test will almost certainly reject the null hypothesis that m1 and m2 are identical. A future version of Apophenia may implement a mechanism to allow this function to test for sorted data, but it currently can't.

Here is an example, which tests whether a set of draws from a Normal(0, 1) matches a sequence of Normal distributions with increasing mean.

#include <apop.h>
//This program finds the p-value of a K-S test between
//500 draws from a N(0, 1) and a N(x, 1), where x grows from 0 to 1.
apop_model * model_to_pmfs(apop_model *m1, int size){
apop_data *outd1 = apop_model_draws(m1, size);
}
int main(){
apop_model *n1 = apop_model_set_parameters(apop_normal, 0, 1);
apop_model *pmf1 = model_to_pmfs(n1, 5e2);
apop_data *ktest;
//first, there should be zero divergence between a PMF and itself:
apop_model *pmf2 = apop_model_copy(pmf1);
ktest = apop_test_kolmogorov(pmf1, pmf2);
double pval = apop_data_get(ktest, .rowname="p value, 2 tail");
assert(pval > .999);
//as the mean m drifts, the pval for a comparison
//between a N(0, 1) and N(m, 1) gets smaller.
printf("mean\tpval\n");
double prior_pval = 18;
for(double i=0; i<= .6; i+=0.2){
apop_model *n11 = apop_model_set_parameters(apop_normal, i, 1);
ktest = apop_test_kolmogorov(pmf1, n11);
apop_data_print(ktest, NULL);
double pval = apop_data_get(ktest, .rowname="p value, 2 tail");
assert(pval < prior_pval);
printf("%g\t%g\n", i, pval);
prior_pval = pval;
}
}
apop_data * apop_text_alloc ( apop_data in,
const size_t  row,
const size_t  col 
)

This allocates or resizes the text element of an apop_data set.

If the text element already exists, then this is effectively a realloc function, reshaping to the size you specify.

Parameters
inAn apop_data set. It's OK to send in NULL, in which case an apop_data set with NULL matrix and vector elements is returned.
rowthe number of rows of text.
colthe number of columns of text.
Returns
A pointer to the relevant apop_data set. If the input was not NULL, then this is a repeat of the input pointer.
Exceptions
out->error=='a'Allocation error.
void apop_text_free ( char ***  freeme,
int  rows,
int  cols 
)

Free a matrix of chars* (i.e., a char***). This is what apop_data_free uses internally to deallocate the text element of an apop_data set. You may never need to use it directly.

Sample usage:

1 apop_text_free(yourdata->text, yourdata->textsize[0], yourdata->textsize[1]);
char * apop_text_paste ( apop_data const *  strings,
char *  between,
char *  before,
char *  after,
char *  between_cols,
apop_fn_riip  prune,
void *  prune_parameter 
)

Join together the text grid of an apop_data set into a single string.

For example, say that we have a data set with some text: row 0 has "a0", "b0", "c0"; row 2 has "a1", "b1", "c1"; and so on. We would like to produce

1 insert into tab values ('a0', 'b0', 'c0');
2 insert into tab values ('a1', 'b1', 'c1');
3 ...

This could be sent to an SQL engine to copy the data to a database (but this is just an example for demonstration—use apop_data_print to write to a database table).

To construct this single string from the text grid, we would need to add:

  • before the text, Insert into tab values ('.
  • between each element on a row: ', '
  • between rows: '); \ninsert into tab values('
  • at the tail end: ');'

Thus, do the conversion via:

1 char *insert_string = apop_text_paste(indata,
2  .before="Insert into tab values ('",
3  .between="', '",
4  .between_cols="'); \\ninsert into tab values(',
5  .after="');'"
6 );

Parameters
stringsAn apop_data set with a grid of text to be combined into a single string
betweenThe text to put in between the rows of the table, such as ", ". (Default is a single space: " ")
beforeThe text to put at the head of the string. For the query example, this would be .before="select ". (Default: NULL)
afterThe text to put at the tail of the string. For the query example, .after=" from data_table". (Default: NULL)
between_colsThe text to insert between columns of text. See below for an example (Default is set to equal .between)
pruneIf you don't want to use the entire text set, you can provide a function to indicate which elements should be pruned out. Some examples:
1 //Just use column 3
2 int is_not_col_3(apop_data *indata, int row, int col, void *ignore){
3  return col!=3;
4 }
5 
6 //Jump over blanks as if they don't exist.
7 int is_blank(apop_data *indata, int row, int col, void *ignore){
8  return strlen(indata->text[row][col])==0;
9 }
prune_parameterA void pointer to pass to your prune function.
Returns
A single string with the elements of the strings table joined as per your specification. Allocated by the function, to be freed by you if desired.
  • If the table of strings is NULL or has no text, the output string will have only the .before and .after parts with nothing in between.
  • if apop_opts.verbose >=3, then print the pasted text to stderr.
  • It is sometimes useful to use Apop_r and Apop_rs to get a view of only one or a few rows in conjunction with this function.

This sample snippet generates the SQL for a query using a list of column names (where the query begins with select , ends with from datatab, and has commas in between each element), re-processes the same list to produce the head of an HTML table, then produces the body of the table with the query result.

#include <apop.h>
int main(){
apop_query("create table datatab(name, age, sex);"
"insert into datatab values ('Alex', 23, 'm');"
"insert into datatab values ('Alex', 32, 'f');"
"insert into datatab values ('Michael', 41, 'f');"
"insert into datatab values ('Michael', 14, 'm');");
apop_data *cols = apop_text_alloc(NULL, 3, 1);
apop_text_set(cols, 0, 0, "name");
apop_text_set(cols, 1, 0, "age");
apop_text_set(cols, 2, 0, "sex");
char *query= apop_text_paste(cols, .before="select ", .between=", ");
apop_data *d = apop_query_to_text("%s from datatab", query);
char *html_head = apop_text_paste(cols, .before="<table><tr><td>",
.between="</td><td>", .after="</tr>\n<tr><td>");
char *html_table = apop_text_paste(d, .before=html_head, .after="</td></tr></table>\n",
.between="</tr>\n<tr><td>", .between_cols="</td><td>");
FILE *outfile = fopen("yourdata.html", "w");
fprintf(outfile, "%s", html_table);
fclose(outfile);
}

int apop_text_set ( apop_data in,
const size_t  row,
const size_t  col,
const char *  fmt,
  ... 
)

Add a string to the text element of an apop_data set. If you send me a NULL string, I will write the value of apop_opts.nan_string in the given slot. If there is already something in that slot, that string is freed, preventing memory leaks.

Parameters
inThe apop_data set, that already has an allocated text element.
rowThe row
colThe column
fmtThe text to write.
...You can use a printf-style fmt and follow it with the usual variables to fill in.
Returns
0=OK, -1=error (probably out-of-bounds)
  • UTF-8 or ASCII text is correctly handled.
  • Apophenia follows a general rule of not reallocating behind your back: if your text matrix is currently of size (3,3) and you try to put an item in slot (4,4), then I display an error rather than reallocating the text matrix.
  • The string added is a copy (via asprintf), not a pointer to the input(s).
  • If there had been a string at the grid point you are writing to, the old one is freed to prevent leaks. Remember this if you had other pointers aliasing that string.
  • If an element is NULL, write apop_opts.nan_string at that point. You may prefer to use "" to express a blank.
  • apop_text_alloc will reallocate to a new size if you need. For example, this code will fill the diagonals of the text array with a message, resizing as it goes:
1 apop_data *list = (something already allocated.);
2 for (int n=0; n < 10; n++){
3  apop_text_alloc(list, n+1, n+1);
4  apop_text_set(list, n, n, "This is cell (%i, %i)", n, n);
5 }
apop_data * apop_text_to_data ( char const *  text_file,
int  has_row_names,
int  has_col_names,
int const *  field_ends,
char const *  delimiters 
)

Read a delimited or fixed-wisdth text file into the matrix element of an apop_data set.

See Input text file formatting.

See also apop_text_to_db, which handles text data, and may othewise be a perferable approach to data management.

Parameters
text_file= "-" The name of the text file to be read in. If "-" (the default), use stdin.
has_row_namesDoes the lines of data have row names? 'y' =yes; 'n' =no (default: 'n')
has_col_namesIs the top line a list of column names? See Input text file formatting for notes on dimension (default: 'y')
field_endsIf fields have a fixed size, give the end of each field, e.g. .field_ends=(int[]){3, 8 11}. (default: NULL, indicating not fixed width)
delimitersA string listing the characters that delimit fields. (default: "|,\t")
Returns
Returns an apop_data set.
Exceptions
out->error=='a'allocation error
out->error=='t'text-reading error

example: See apop_ols.

int apop_text_to_db ( char const *  text_file,
char *  tabname,
int  has_row_names,
int  has_col_names,
char **  field_names,
int const *  field_ends,
apop_data field_params,
char *  table_params,
char const *  delimiters,
char  if_table_exists 
)

Read a delimited or fixed-width text file into a database table. See Input text file formatting.

For purely numeric data, you may be able to bypass the database by using apop_text_to_data.

See the apop_ols page for an example that uses this function to read in sample data (also listed on that page).

Apophenia ships with an apop_text_to_db command-line utility, which is a wrapper for this function.

Especially if you are using a pre-2007 version of SQLite, there may be a speedup to putting this function in a begin/commit wrapper:

1 apop_query("begin;");
2 apop_data_print(dataset, .output_name="dbtab", .output_type='d');
3 apop_query("commit;");
Parameters
text_fileThe name of the text file to be read in. If "-", then read from STDIN. (default: "-")
tabnameThe name to give the table in the database (default: text_file after the last slash and up to the next dot. E.g., text_file=="../data/pant_lengths.csv" gives tabname=="pant_lengths")
has_row_namesDoes the lines of data have row names? (default: 0)
has_col_namesIs the top line a list of column names? (default: 1)
field_namesThe list of field names, which will be the columns for the table. If has_col_names==1, read the names from the file (and just set this to NULL). If has_col_names == 1 && field_names !=NULL, I'll use the field names. (default: NULL)
field_endsIf fields have a fixed size, give the end of each field, e.g. .field_ends=(int[]){3, 8 11}. (default: NULL, indicating not fixed width)
field_paramsThere is an implicit create table in setting up the database. If you want to add a type, constraint, or key, put that here. The relevant part of the input apop_data set is the text grid, which should be $N \times 2$. The first item in each row (your_params->text[n][0], for each $n$) is a regular expression to match against the variable names; the second item (your_params->text[n][1]) is the type, constraint, and/or key (i.e., what comes after the name in the create query). Not all variables need be mentioned; the default type if nothing matches is numeric. I go in order until I find a regex that matches the given field, so if you don't like the default, then set the last row to have name .*, which is a regex guaranteed to match anything that wasn't matched by an earlier row, and then set the associated type to your preferred default. See apop_regex on details of matching. (default: NULL)
table_paramsThere is an implicit create table in setting up the database. If you want to add a table constraint or key, such as not null primary key (age, sex), put that here.
delimitersA string listing the characters that delimit fields. default = "|,\t"
if_table_existsWhat should I do if the table exists?
'n' Do nothing; exit this function. (default)
'd' Retain the table but delete all data; refill with the new data (i.e., call "delete * from your_table").
'o' Overwrite the table from scratch; deleting the previous table entirely.
'a' Append new data to the existing table.
Returns
Returns the number of rows on success, -1 on error.
apop_data * apop_text_unique_elements ( const apop_data d,
size_t  col 
)

Give me a column of text, and I'll give you a sorted list of the unique elements. This is basically running select distinct * from datacolumn, but without the aid of the database.

Parameters
dAn apop_data set with a text component
colThe text column you want me to use.
Returns
An apop_data set with a single sorted column of text, where each unique text input appears once.
See also
apop_vector_unique_elements
apop_model * apop_update ( apop_data data,
apop_model prior,
apop_model likelihood,
gsl_rng *  rng 
)

Take in a prior and likelihood distribution, and output a posterior distribution.

  • This function first checks a table of conjugate distributions for the pair you sent in. If the models are listed on the table, then the function returns a corresponding closed-form model with updated parameters.
  • If the parameters aren't in the table of conjugate, and the prior distribution has a p or log_likelihood element, then use apop_model_metropolis to generate the posterior. If you expect MCMC to run, you may add an apop_mcmc_settings group to your prior to control the details of the search. See also the apop_model_metropolis documentation.
  • If the prior does not have a p or log_likelihood but does have a draw element, then make draws from the prior and weight them by the p given by the likelihood distribution. This is not a rejection sampling method, so the burnin is ignored.
Parameters
dataThe input data, that will be used by the likelihood function (default = NULL.)
priorThe prior apop_model. If the system needs to estimate the posterior via MCMC, this needs to have a log_likelihood or p method. (No default, must not be NULL.)
likelihoodThe likelihood apop_model. If the system needs to estimate the posterior via MCMC, this needs to have a log_likelihood or p method (ll preferred). (No default, must not be NULL.)
rngA gsl_rng, already initialized (e.g., via apop_rng_alloc). (default: an RNG from apop_rng_get_thread)
Returns
an apop_model struct representing the posterior, with updated parameters.
  • In all cases, the output is a apop_model that can be used as the input to this function, so you can chain Bayesian updating procedures.
  • Here are the conjugate distributions currently defined:
Prior Likelihood Notes
Beta Binomial
Beta Bernoulli
Exponential Gamma Gamma likelihood represents the distribution of $\lambda^{-1}$, not plain $\lambda$
Normal Normal Assumes prior with fixed $\sigma$; updates distribution for $\mu$
Gamma Poisson Uses sum and size of the data

Here is a test function that compares the output via conjugate table and via Metropolis-Hastings sampling:

#include <apop.h>
//For the test suite.
void distances(gsl_vector *v1, gsl_vector *v2, double tol){
double error = apop_vector_distance(v1, v2, .metric='m');
double updated_size = apop_vector_sum(v1);
Apop_stopif(error/updated_size > tol, exit(1), 0, "The error is %g, which is too big.", error/updated_size);
}
int main(){
double binom_start = 0.6;
double beta_start_a = 0.3;
double beta_start_b = 0.5;
double n = 4000;
//First, the easy estimation using the conjugate distribution table.
apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start);
apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b);
apop_model *updated = apop_update(.prior= beta, .likelihood=bin);
//Now estimate via MCMC.
//Requires a one-parameter binomial, with n fixed,
//and a data set of n data points with the right p.
apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN);
apop_data *bin_draws = apop_data_falloc((1,2), n*(1-binom_start), n*binom_start);
bin = apop_model_fix_params(bcopy);
Apop_settings_add_group(beta, apop_mcmc, .burnin=.2, .periods=1e5);
apop_model *out_h = apop_update(bin_draws, beta, bin, NULL);
apop_model *out_beta = apop_estimate(out_h->data, apop_beta);
//Finally, we can compare the conjugate and Gibbs results:
distances(updated->parameters->vector, out_beta->parameters->vector, 0.01);
//The apop_update function used apop_model_metropolis to generate
//a batch of draws, so the draw method for out_h is apop_model_metropolis_draw.
//So, here we make more draws using metropolis, and compare the beta
//distribution that fits to those draws to the beta distribution output above.
int draws = 1.3e5;
apop_data *d = apop_model_draws(out_h, draws);
distances(updated->parameters->vector, drawn->parameters->vector, 0.02);
}
  • The conjugate table is stored using a vtable; see Registering new methods in vtables for details. If you are writing a new vtable entry, the typedef new functions must conform to and the hash used for lookups are:
1 typedef apop_model *(*apop_update_type)(apop_data *, apop_model , apop_model);
2 #define apop_update_hash(m1, m2) ((size_t)(m1).draw + (size_t)((m2).log_likelihood ? (m2).log_likelihood : (m2).p)*33)
void apop_vector_apply ( gsl_vector *  v,
void(*)(double *)  fn 
)

Apply a function to every row of a matrix. The function that you input takes in a double* and may modify the input value in place. This function will send a pointer to each element of your vector to your function.

Parameters
vThe input vector
fnA function of the form void fn(double in)
int apop_vector_bounded ( const gsl_vector *  in,
long double  max 
)

Test that all elements of a vector are within bounds, so you can preempt a procedure that is about to break on infinite or too-large values.

Parameters
inA gsl_vector
maxAn upper and lower bound to the elements of the vector. (default: INFINITY)
Returns
1 if everything is bounded: not Inf, -Inf, or NaN, and $-\max < x < \max$;
0 otherwise.
  • A NULL vector has no unbounded elements, so NULL input returns 1. You get a warning if apop_opts.verbosity >=2.
  • This function uses the Designated initializers syntax for inputs.
gsl_vector * apop_vector_copy ( const gsl_vector *  in)

Copy one gsl_vector to another. That is, all data is duplicated. Unlike gsl_vector_memcpy, this function allocates and returns the destination, so you can use it like this:

1 gsl_vector *a_copy = apop_vector_copy(original);
Parameters
inThe input vector
Returns
A structure that this function will allocate and fill. If gsl_vector_alloc fails, returns NULL and print a warning.
double apop_vector_correlation ( const gsl_vector *  ina,
const gsl_vector *  inb,
const gsl_vector *  weights 
)

Returns the correlation coefficient of two vectors: $ {\hbox{cov}(a,b)\over \sqrt{\hbox{var}(a)} \sqrt{\hbox{var}(b)}}.$

An example

1 gsl_matrix *m = apop_text_to_data("indata")->matrix;
2 printf("The correlation coefficient between rows two "
3  "and three is %g.\n", apop_vector_correlation(Apop_mrv(m, 2), Apop_mrv(m, 3)));
Parameters
ina,inbTwo vectors of equal length (no default, must not be NULL)
weightsReplicate weights for the observations. (default: equal weights for all observations)
double apop_vector_cov ( const gsl_vector *  v1,
const gsl_vector *  v2,
const gsl_vector *  weights 
)

Find the sample covariance of a pair of vectors, with an optional weighting. This only makes sense if the weightings are identical, so the function takes only one weighting vector for both.

Parameters
v1,v2The data vectors (no default; must not be NULL)
weightsThe weight vector. (default equal weights for all elements)
Returns
The sample covariance
double apop_vector_distance ( const gsl_vector *  ina,
const gsl_vector *  inb,
const char  metric,
const double  norm 
)

Returns the distance between two vectors, where distance is defined based on the third (optional) parameter:

  • 'e' (the default): scalar distance (standard Euclidean metric) between two vectors. $\sqrt{\sum_i{(a_i - b_i)^2}},$ where $i$ iterates over dimensions.
  • 'm' Returns the Manhattan metric distance between two vectors: $\sum_i{|a_i - b_i|},$ where $i$ iterates over dimensions.
  • 'd' The discrete norm: if $a = b$, return zero, else return one.
  • 's' The sup norm: find the dimension where $|a_i - b_i|$ is largest, return the distance along that one dimension.
  • 'l' or 'L' The $L_p$ norm, $\left(\sum_i{|a_i - b_i|^2}\right)^{1/p}$. The value of $p$ is set by the fourth (optional) argument.
Parameters
inaFirst vector (No default, must not be NULL)
inbSecond vector (Default = zero)
metricThe type of metric, as above.
normIf you are using an $L_p$ norm, this is $p$. Must be strictly greater than zero. (default = 2)
  • The defaults are such that
    1 apop_vector_distance(v);
    2 apop_vector_distance(v, .metric = 's');
    3 apop_vector_distance(v, .metric = 'm');
    gives you the standard Euclidean length of v, its longest element, and its sum.
  • This function uses the Designated initializers syntax for inputs.
#include <apop.h>
/* Test distance calculations using a 3-4-5 triangle */
int main(){
gsl_vector *v1 = gsl_vector_alloc(2);
gsl_vector *v2 = gsl_vector_alloc(2);
apop_vector_fill(v1, 2, 2);
apop_vector_fill(v2, 5, 6);
assert(apop_vector_distance(v1, v1, 'd') == 0);
assert(apop_vector_distance(v1, v2, 'd') == 1);
assert(apop_vector_distance(v1, .metric='m') == 4);
assert(apop_vector_distance(v2, .metric='s') == 6);
assert(apop_vector_distance(v1,v2) == 5.); //the hypotenuse of the 3-4-5 triangle
assert(apop_vector_distance(v1,v2, 'm') == 7.);
assert(apop_vector_distance(v1,v2, 'L', 2) == 5.); //L_2 norm == Euclidean
}
long double apop_vector_entropy ( gsl_vector *  in)

Given a vector representing a probability distribution of observations, calculate the entropy, $\sum_i -\ln(v_i)v_i$.

  • You may input a vector giving frequencies (normalized to sum to one) or counts (arbitrary sum).
  • The entropy of a data set depends only on the frequency with which elements are observed, not the value of the elements themselves. The apop_data_pmf_compress function will reduce an input apop_data set to one weighted line per observation, and the weights would determine the entropy:
1 apop_data *data = apop_text_to_data("indata");
2 apop_data_pmf_compress(data);
3 data_entropy = apop_vector_entropy(d->weights);
  • The entropy is calculated using natural logs. To convert to base 2, divide by $\ln(2)$; see the example.
  • The entropy of an empty data set (NULL or a total weight of zero) is zero. Print a warning when given NULL input and apop_opts.verbose >=1.
  • If the input vector has negative elements, return NaN; print a warning when apop_opts.verbose >= 0.

Sample code:

#include <apop.h>
#define Diff(left, right, eps) Apop_stopif(fabs((left)-(right))>(eps), abort(), 0, "%g is too different from %g (abitrary limit=%g).", (double)(left), (double)(right), eps)
long double entropy_base_2(gsl_vector *x) {
return apop_vector_entropy(x)/log(2);
}
int main(){
apop_model *flip = apop_model_set_parameters(apop_bernoulli, .5);
//zero data => entropy zero
gsl_vector *v = gsl_vector_calloc(1);
assert(apop_vector_entropy(v) == 0);
//negative data => NaN
gsl_vector_set(v, 0, -1);
int v1 = apop_opts.verbose;
apop_opts.verbose = -1;
assert(isnan(apop_vector_entropy(v)));
apop_opts.verbose = v1;
//N equiprobable bins => entropy = log(N)
v = apop_vector_realloc(v, 100);
gsl_vector_set_all(v, 1./100);
Diff(log(100), apop_vector_entropy(v), 1e-5);
//Normalization is optional. You may send a vector of counts.
gsl_vector_set_all(v, 1);
Diff(log(100), apop_vector_entropy(v), 1e-5);
//flip two coins.
apop_data *coin_flips = apop_model_draws(flip, .count=10000);
apop_data *c2 = apop_model_draws(flip, .count=10000);
apop_data_stack(c2, coin_flips, 'c', .inplace='y');
//entropy of one coin flip in base2 == 1
Diff(entropy_base_2(coin_flips->weights), 1, 1e-3);
//entropy of two coin flips in base2 == 2
Diff(entropy_base_2(c2->weights), 2, 1e-3);
//flip three coins, via model cross products
apop_model_cross(flip, flip, flip) ,.count=10000))->weights), 3, 1e-3);
apop_data_free(coin_flips);
gsl_vector_free(v);
}
void apop_vector_exp ( gsl_vector *  v)

Replace every vector element $v_i$ with exp $(v_i)$.

  • If the input vector is NULL, do nothing.
double apop_vector_kurtosis ( const gsl_vector *  in)

Returns the sample fourth central moment of the data in the given vector. Corrections are made to produce an unbiased result as per Appendix M (PDF) of Modeling with data.

  • This is an estimate of the fourth central moment without normalization. The kurtosis of a ${\cal N}(0,1)$ is $3 \sigma^4$, not three, one, or zero.
    See also
    apop_vector_kurtosis_pop
double apop_vector_kurtosis_pop ( gsl_vector const *  v,
gsl_vector const *  weights 
)

Returns the population fourth central moment [ $\sum_i (x_i - \mu)^4/n)$] of the data in the given vector, with an optional weighting.

Parameters
vThe data vector
weightsThe weight vector. If NULL, assume equal weights.
Returns
The weighted kurtosis.
  • Some people like to normalize the fourth central moment by dividing by variance squared, or by subtracting three; those things are not done here, so you'll have to do them separately if desired.
  • This function uses the Designated initializers syntax for inputs.
    See also
    apop_vector_kurtosis for the unbiased sample version.
void apop_vector_log ( gsl_vector *  v)

Replace every vector element $v_i$ with ln $(v_i)$.

  • If the input vector is NULL, do nothing.
void apop_vector_log10 ( gsl_vector *  v)

Replace every vector element $v_i$ with log $_{10}(v_i)$.

  • If the input vector is NULL, do nothing.
gsl_vector * apop_vector_map ( const gsl_vector *  v,
double(*)(double)  fn 
)

Map a function onto every element of a vector. Thus function will send each element to the function you provide, and will output a gsl_vector holding your function's output for each row.

Parameters
vThe input vector
fnA function of the form double fn(double in)
Returns
A gsl_vector (allocated by this function) with the corresponding value for each row.
double apop_vector_map_sum ( const gsl_vector *  in,
double(*)(double)  fn 
)

Returns the sum of the output of apop_vector_map. For example, apop_vector_map_sum(v, isnan) returns the count of elements of v that are NaN.

double apop_vector_mean ( gsl_vector const *  v,
gsl_vector const *  weights 
)

Find the mean, weighted or unweighted.

Parameters
vThe data vector
weightsThe weight vector. Default: assume equal weights.
Returns
The weighted mean
int gsl_vector * apop_vector_moving_average ( gsl_vector *  v,
size_t  bandwidth 
)

Return a new vector that is the moving average of the input vector.

Parameters
vThe input vector, unsmoothed
bandwidthAn integer $\geq 1$ giving the number of elements to be averaged to produce one number.
Returns
A smoothed vector of size v->size - (bandwidth/2)*2.
void apop_vector_normalize ( gsl_vector *  in,
gsl_vector **  out,
const char  normalization_type 
)

This function will normalize a vector, either such that it has mean zero and variance one, or ranges between zero and one, or sums to one.

Parameters
inA gsl_vector with the un-normalized data. NULL input gives NULL output. (No default)
outIf normalizing in place, NULL. If not, the address of a gsl_vector*. Do not allocate. (default = NULL.)
normalization_type'p': normalized vector will sum to one. E.g., start with a set of observations in bins, end with the percentage of observations in each bin. (the default)
'r': normalized vector will range between zero and one. Replace each X with (X-min) / (max - min).
's': normalized vector will have mean zero and (sample) variance one. Replace each X with $(X-\mu) / \sigma$, where $\sigma$ is the sample standard deviation.
'm': normalize to mean zero: Replace each X with $(X-\mu)$

Example

double * apop_vector_percentiles ( gsl_vector *  data,
char  rounding 
)

Returns an array of size 101, where returned_vector[95] gives the value of the 95th percentile, for example. Returned_vector[100] is always the maximum value, and returned_vector[0] is always the min (regardless of rounding rule).

Parameters
dataA gsl_vector with the data. (No default, must not be NULL.)
roundingEither be 'u', 'd', or 'a'. Unless your data is exactly a multiple of 101, some percentiles will be ambiguous. If 'u', then round up (use the next highest value); if 'd', round down to the next lowest value; if 'a', take the mean of the two nearest points. (Default = 'd'.)
  • If the rounding method is 'u' or 'a', then you can say "5% or more of the sample is below returned_vector[5]"; if 'd' or 'a', then you can say "5% or more of the sample is above returned_vector[5]".
  • You may eventually want to free() the array returned by this function.
  • This function uses the Designated initializers syntax for inputs.
void apop_vector_print ( gsl_vector *  data,
Output_declares   
)

Print a vector to the screen, a file, a pipe, or the database.

gsl_vector * apop_vector_realloc ( gsl_vector *  v,
size_t  newheight 
)

This function will resize a gsl_vector to a new length.

Data in the vector will be retained. If the new height is smaller than the old, then data at the end of the vector will be cropped away (in a non–memory-leaking manner). If the new height is larger than the old, then new cells will be filled with garbage; it is your responsibility to zero out or otherwise fill them before use.

  • A large number of reallocs can take a noticeable amount of time. You are thus encouraged to make an effort to determine the size of your data and do one allocation, rather than writing for loops that resize a vector at every increment.
  • The gsl_vector is a versatile struct that can represent subvectors, matrix columns and other cuts from parent data. Resizing a portion of a parent matrix makes no sense, so return NULL and print an error if asked to resize a view.
Parameters
vThe already-allocated vector to resize. If you give me NULL, this is equivalent to gsl_vector_alloc
newheightThe height you'd like the vector to be.
Returns
v, now resized
double apop_vector_skew ( const gsl_vector *  in)

Returns an unbiased estimate of the sample skew of the data in the given vector.

double apop_vector_skew_pop ( gsl_vector const *  v,
gsl_vector const *  weights 
)

Returns the population skew $(\sum_i (x_i - \mu)^3/n))$ of the data in the given vector. Observations may be weighted.

Parameters
vThe data vector
weightsThe weight vector. Default: equal weights for all observations.
Returns
The weighted skew.
  • Some people like to normalize the skew by dividing by (variance) $^{3/2}$; that's not done here, so you'll have to do so separately if need be.
  • Apophenia tries to be smart about reading the weights. If weights sum to one, then the system uses w->size as the number of elements, and returns the usual sum over $n-1$. If weights > 1, then the system uses the total weights as $n$. Thus, you can use the weights as standard weightings or to represent elements that appear repeatedly.
gsl_vector * apop_vector_stack ( gsl_vector *  v1,
gsl_vector const *  v2,
char  inplace 
)

Put the first vector on top of the second vector.

Parameters
v1the upper vector (default=NULL, in which case this copies v2)
v2the second vector (default=NULL, in which case nothing is added)
inplaceIf 'y', use apop_vector_realloc to modify v1 in place; see the caveats on that function. Otherwise, allocate a new vector, leaving v1 undisturbed. (default='n')
Returns
the stacked data, either in a new vector or a pointer to v1.
long double apop_vector_sum ( const gsl_vector *  in)

Returns the sum of the data in the given vector.

gsl_matrix * apop_vector_to_matrix ( const gsl_vector *  in,
char  row_col 
)

This function copies the data in a vector to a new one-column (or one-row) matrix and returns the newly-allocated and filled matrix.

For the reverse, try apop_data_pack.

Parameters
ina gsl_vector (No default. If NULL, I return NULL, with a warning if apop_opts.verbose >=1 )
row_colIf 'r', then this will be a row (1 x N) instead of the default, a column (N x 1). (default: 'c')
Returns
a newly-allocated gsl_matrix with one column (or row).
  • If you send in a NULL vector, you get a NULL pointer in return. I warn you of this if apop_opts.verbosity >=2 .
  • If gsl_matrix_alloc fails you get a NULL pointer in return.
  • This function uses the Designated initializers syntax for inputs.
gsl_vector * apop_vector_unique_elements ( const gsl_vector *  v)

Give me a vector of numbers, and I'll give you a sorted list of the unique elements. This is basically running select distinct datacol from data order by datacol, but without the aid of the database.

Parameters
va vector of items
Returns
a sorted vector of the distinct elements that appear in the input.
  • NaNs (if any) appear at the end of the sort order.
See also
apop_text_unique_elements
double apop_vector_var ( gsl_vector const *  v,
gsl_vector const *  weights 
)

Find the sample variance of a vector, weighted or unweighted.

Parameters
vThe data vector
weightsThe weight vector. If NULL (the default), assume equal weights.
Returns
The weighted sample variance.
  • This uses (n-1) in the denominator of the sum; i.e., it corrects for the bias introduced by using $\bar x$ instead of $\mu$.
  • Multiply the output by (n-1)/n if you need population variance.
  • Apophenia tries to be smart about reading the weights. If weights sum to one, then the system uses w->size as the number of elements, and returns the usual sum over $n-1$. If weights > 1, then the system uses the total weights as $n$. Thus, you can use the weights as standard weightings or to represent elements that appear repeatedly.
  • This function uses the Designated initializers syntax for inputs.
    See also
    apop_vector_var_m for the case where you already have the vector's mean.
double apop_vector_var_m ( const gsl_vector *  in,
const double  mean 
)

Returns the variance of the data in the given vector, given that you've already calculated the mean.

Parameters
inthe vector in question
meanthe mean, which you've already calculated using apop_vector_mean.
See also
apop_vector_var

Variable Documentation

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.