Data Structures
struct	apop_arms_settings

struct	apop_cdf_settings

struct	apop_composition_settings

struct	apop_coordinate_transform_settings

struct	apop_cross_settings

struct	apop_data

struct	apop_dconstrain_settings

struct	apop_kernel_density_settings

struct	apop_lm_settings

struct	apop_loess_settings

struct	apop_mcmc_proposal_s

struct	apop_mcmc_settings

struct	apop_mixture_settings

struct	apop_mle_settings

struct	apop_model

struct	apop_name

struct	apop_opts_type

struct	apop_parts_wanted_settings

struct	apop_pm_settings

struct	apop_pmf_settings

struct	apop_settings_type

Macros
#define	apop_ANOVA

#define	apop_ANOVA

#define	apop_ANOVA

#define	apop_ANOVA

#define	Apop_c(d, col)

#define	Apop_c(d, col)

#define	Apop_c(d, col)

#define	Apop_c(d, col)

#define	Apop_col_t(d, colname, outd)

#define	Apop_col_t(d, colname, outd)

#define	Apop_col_t(d, colname, outd)

#define	Apop_col_t(d, colname, outd)

#define	Apop_col_tv(m, col, v)

#define	Apop_col_tv(m, col, v)

#define	Apop_col_tv(m, col, v)

#define	Apop_col_tv(m, col, v)

#define	Apop_cs(d, colnum, len)

#define	Apop_cs(d, colnum, len)

#define	Apop_cs(d, colnum, len)

#define	Apop_cs(d, colnum, len)

#define	Apop_cv(data_to_view, col)

#define	Apop_cv(data_to_view, col)

#define	Apop_cv(data_to_view, col)

#define	Apop_cv(data_to_view, col)

#define	apop_data_add_names(dataset, type, ...)

#define	apop_data_add_names(dataset, type, ...)

#define	apop_data_add_names(dataset, type, ...)

#define	apop_data_add_names(dataset, type, ...)

#define	apop_data_falloc(sizes, ...)

#define	apop_data_falloc(sizes, ...)

#define	apop_data_falloc(sizes, ...)

#define	apop_data_falloc(sizes, ...)

#define	apop_data_fill(adfin, ...)

#define	apop_data_fill(adfin, ...)

#define	apop_data_fill(adfin, ...)

#define	apop_data_fill(adfin, ...)

#define	apop_data_free(freeme)

#define	apop_data_free(freeme)

#define	apop_data_free(freeme)

#define	apop_data_free(freeme)

#define	apop_data_prune_columns(in, ...)

#define	apop_data_prune_columns(in, ...)

#define	apop_data_prune_columns(in, ...)

#define	apop_data_prune_columns(in, ...)

#define	apop_errorlevel

#define	apop_errorlevel

#define	apop_errorlevel

#define	apop_errorlevel

#define	apop_estimate_r_squared(in)

#define	apop_estimate_r_squared(in)

#define	apop_estimate_r_squared(in)

#define	apop_estimate_r_squared(in)

#define	apop_F_distribution

#define	apop_F_distribution

#define	apop_F_distribution

#define	apop_F_distribution

#define	apop_F_test

#define	apop_F_test

#define	apop_F_test

#define	apop_F_test

#define	apop_gaussian

#define	apop_gaussian

#define	apop_gaussian

#define	apop_gaussian

#define	apop_IV

#define	apop_IV

#define	apop_IV

#define	apop_IV

#define	Apop_mcv(matrix_to_view, col)

#define	Apop_mcv(matrix_to_view, col)

#define	Apop_mcv(matrix_to_view, col)

#define	Apop_mcv(matrix_to_view, col)

#define	apop_mean

#define	apop_mean

#define	apop_mean

#define	apop_mean

#define	apop_model_coordinate_transform(...)

#define	apop_model_coordinate_transform(...)

#define	apop_model_coordinate_transform(...)

#define	apop_model_coordinate_transform(...)

#define	apop_model_copy_set(model, type, ...)

#define	apop_model_copy_set(model, type, ...)

#define	apop_model_copy_set(model, type, ...)

#define	apop_model_copy_set(model, type, ...)

#define	apop_model_cross(...)

#define	apop_model_cross(...)

#define	apop_model_cross(...)

#define	apop_model_cross(...)

#define	apop_model_dcompose(...)

#define	apop_model_dcompose(...)

#define	apop_model_dcompose(...)

#define	apop_model_dcompose(...)

#define	apop_model_dconstrain(...)

#define	apop_model_dconstrain(...)

#define	apop_model_dconstrain(...)

#define	apop_model_dconstrain(...)

#define	apop_model_mixture(...)

#define	apop_model_mixture(...)

#define	apop_model_mixture(...)

#define	apop_model_mixture(...)

#define	apop_model_set_parameters(in, ...)

#define	apop_model_set_parameters(in, ...)

#define	apop_model_set_parameters(in, ...)

#define	apop_model_set_parameters(in, ...)

#define	Apop_model_set_settings(model, ...)

#define	Apop_model_set_settings(model, ...)

#define	Apop_model_set_settings(model, ...)

#define	Apop_model_set_settings(model, ...)

#define	apop_model_set_settings

#define	apop_model_set_settings

#define	apop_model_set_settings

#define	apop_model_set_settings

#define	Apop_mrv(matrix_to_view, row)

#define	Apop_mrv(matrix_to_view, row)

#define	Apop_mrv(matrix_to_view, row)

#define	Apop_mrv(matrix_to_view, row)

#define	Apop_notify(verbosity, ...)

#define	Apop_notify(verbosity, ...)

#define	Apop_notify(verbosity, ...)

#define	Apop_notify(verbosity, ...)

#define	apop_OLS

#define	apop_OLS

#define	apop_OLS

#define	apop_OLS

#define	apop_PMF

#define	apop_PMF

#define	apop_PMF

#define	apop_PMF

#define	Apop_r(d, rownum)

#define	Apop_r(d, rownum)

#define	Apop_r(d, rownum)

#define	Apop_r(d, rownum)

#define	apop_rng_get_thread(thread_in)

#define	apop_rng_get_thread(thread_in)

#define	apop_rng_get_thread(thread_in)

#define	apop_rng_get_thread(thread_in)

#define	Apop_row_t(d, rowname, outd)

#define	Apop_row_t(d, rowname, outd)

#define	Apop_row_t(d, rowname, outd)

#define	Apop_row_t(d, rowname, outd)

#define	Apop_row_tv(m, row, v)

#define	Apop_row_tv(m, row, v)

#define	Apop_row_tv(m, row, v)

#define	Apop_row_tv(m, row, v)

#define	Apop_rs(d, rownum, len)

#define	Apop_rs(d, rownum, len)

#define	Apop_rs(d, rownum, len)

#define	Apop_rs(d, rownum, len)

#define	Apop_rv(data_to_view, row)

#define	Apop_rv(data_to_view, row)

#define	Apop_rv(data_to_view, row)

#define	Apop_rv(data_to_view, row)

#define	Apop_settings_add_group(model, type, ...)

#define	Apop_settings_add_group(model, type, ...)

#define	Apop_settings_add_group(model, type, ...)

#define	Apop_settings_add_group(model, type, ...)

#define	Apop_settings_copy(name, ...)

#define	Apop_settings_copy(name, ...)

#define	Apop_settings_copy(name, ...)

#define	Apop_settings_copy(name, ...)

#define	Apop_settings_declarations(ysg)

#define	Apop_settings_declarations(ysg)

#define	Apop_settings_declarations(ysg)

#define	Apop_settings_declarations(ysg)

#define	Apop_settings_free(name, ...)

#define	Apop_settings_free(name, ...)

#define	Apop_settings_free(name, ...)

#define	Apop_settings_free(name, ...)

#define	Apop_settings_get(model, type, setting)

#define	Apop_settings_get(model, type, setting)

#define	Apop_settings_get(model, type, setting)

#define	Apop_settings_get(model, type, setting)

#define	Apop_settings_get_group(m, type)

#define	Apop_settings_get_group(m, type)

#define	Apop_settings_get_group(m, type)

#define	Apop_settings_get_group(m, type)

#define	Apop_settings_init(name, ...)

#define	Apop_settings_init(name, ...)

#define	Apop_settings_init(name, ...)

#define	Apop_settings_init(name, ...)

#define	Apop_settings_rm_group(m, type)

#define	Apop_settings_rm_group(m, type)

#define	Apop_settings_rm_group(m, type)

#define	Apop_settings_rm_group(m, type)

#define	Apop_settings_set(model, type, setting, data)

#define	Apop_settings_set(model, type, setting, data)

#define	Apop_settings_set(model, type, setting, data)

#define	Apop_settings_set(model, type, setting, data)

#define	Apop_stopif(test, onfail, level, ...)

#define	Apop_stopif(test, onfail, level, ...)

#define	Apop_stopif(test, onfail, level, ...)

#define	Apop_stopif(test, onfail, level, ...)

#define	Apop_subm(matrix_to_view, srow, scol, nrows, ncols)

#define	Apop_subm(matrix_to_view, srow, scol, nrows, ncols)

#define	Apop_subm(matrix_to_view, srow, scol, nrows, ncols)

#define	Apop_subm(matrix_to_view, srow, scol, nrows, ncols)

#define	apop_sum

#define	apop_sum

#define	apop_sum

#define	apop_sum

#define	apop_test_ANOVA_independence(d)

#define	apop_test_ANOVA_independence(d)

#define	apop_test_ANOVA_independence(d)

#define	apop_test_ANOVA_independence(d)

#define	apop_text_fill(dataset, ...)

#define	apop_text_fill(dataset, ...)

#define	apop_text_fill(dataset, ...)

#define	apop_text_fill(dataset, ...)

#define	apop_var

#define	apop_var

#define	apop_var

#define	apop_var

#define	apop_vector_fill(avfin, ...)

#define	apop_vector_fill(avfin, ...)

#define	apop_vector_fill(avfin, ...)

#define	apop_vector_fill(avfin, ...)

Functions
apop_data *	apop_anova (char table, char data, char grouping1, char grouping2)

int	apop_arms_draw (double out, gsl_rng r, apop_model *m)

gsl_vector *	apop_array_to_vector (double *in, int size)

apop_model *	apop_beta_from_mean_var (double m, double v)

apop_data *	apop_bootstrap_cov (apop_data data, apop_model model, gsl_rng rng, int iterations, char keep_boots, char ignore_nans, apop_data *boot_store)

double	apop_cdf (apop_data d, apop_model m)

void	apop_crosstab_to_db (apop_data in, char tabname, char row_col_name, char col_col_name, char *data_col_name)

void	apop_data_add_named_elmt (apop_data d, char name, double val)

void	apop_data_add_names_base (apop_data d, const char type, char const *names)

apop_data *	apop_data_add_page (apop_data dataset, apop_data newpage, const char *title)

apop_data *	apop_data_alloc (const size_t size1, const size_t size2, const int size3)

apop_data *	apop_data_calloc (const size_t size1, const size_t size2, const int size3)

apop_data *	apop_data_copy (const apop_data *in)

apop_data *	apop_data_correlation (const apop_data *in)

apop_data *	apop_data_covariance (const apop_data *in)

apop_data *	apop_data_fill_base (apop_data *in, double[])

char	apop_data_free_base (apop_data *freeme)

double	apop_data_get (const apop_data data, size_t row, int col, const char rowname, const char colname, const char page)

apop_data *	apop_data_get_factor_names (apop_data *data, int col, char type)

apop_data *	apop_data_get_page (const apop_data data, const char title, const char match)

apop_data *	apop_data_listwise_delete (apop_data *d, char inplace)

void	apop_data_memcpy (apop_data out, const apop_data in)

gsl_vector *	apop_data_pack (const apop_data in, gsl_vector out, char more_pages, char use_info_pages)

apop_data *	apop_data_pmf_compress (apop_data *in)

void	apop_data_print (const apop_data *data, Output_declares)

void	apop_data_print (const apop_data data, char const output_name, FILE *output_pipe, char output_type, char output_append)

apop_data *	apop_data_prune_columns_base (apop_data d, char *colnames)

double *	apop_data_ptr (apop_data data, int row, int col, const char rowname, const char colname, const char page)

apop_data *	apop_data_rank_compress (apop_data *in, int min_bins)

apop_data *	apop_data_rank_expand (apop_data *in)

void	apop_data_rm_columns (apop_data d, int drop)

apop_data *	apop_data_rm_page (apop_data data, const char title, const char free_p)

apop_data *	apop_data_rm_rows (apop_data in, int drop, int(do_drop)(apop_data , void ), void drop_parameter)

int	apop_data_set (apop_data data, size_t row, int col, const double val, const char rowname, const char colname, const char page)

void	apop_data_show (const apop_data *data)

apop_data *	apop_data_sort (apop_data data, apop_data sort_order, char asc, char inplace, double *col_order)

apop_data **	apop_data_split (apop_data *in, int splitpoint, char r_or_c)

apop_data *	apop_data_stack (apop_data m1, apop_data m2, char posn, char inplace)

apop_data *	apop_data_summarize (apop_data *data)

apop_data *	apop_data_to_bins (apop_data const indata, apop_data const binspec, int bin_count, char close_top_bin)

int	apop_data_to_db (const apop_data set, const char tabname, char)

apop_data *	apop_data_to_dummies (apop_data *d, int col, char type, int keep_first, char append, char remove)

apop_data *	apop_data_to_factors (apop_data *data, char intype, int incol, int outcol)

apop_data *	apop_data_transpose (apop_data *in, char transpose_text, char inplace)

void	apop_data_unpack (const gsl_vector in, apop_data d, char use_info_pages)

int	apop_db_close (char vacuum)

int	apop_db_open (char const *filename)

apop_data *	apop_db_to_crosstab (char const tabname, char const row, char const col, char const data, char is_aggregate)

double	apop_det_and_inv (const gsl_matrix in, gsl_matrix *out, int calc_det, int calc_inv)

apop_data *	apop_dot (const apop_data d1, const apop_data d2, char form1, char form2)

int	apop_draw (double out, gsl_rng r, apop_model *m)

apop_model *	apop_estimate (apop_data d, apop_model m)

apop_data *	apop_estimate_coefficient_of_determination (apop_model *)

void	apop_estimate_parameter_tests (apop_model *est)

apop_model *	apop_estimate_restart (apop_model e, apop_model copy, char *starting_pt, double boundary)

apop_data *	apop_f_test (apop_model est, apop_data contrast)

long double	apop_generalized_harmonic (int N, double s)

apop_data *	apop_histograms_test_goodness_of_fit (apop_model h0, apop_model h1)

apop_data *	apop_jackknife_cov (apop_data data, apop_model model)

long double	apop_kl_divergence (apop_model from, apop_model to, int draw_ct, gsl_rng *rng)

long double	apop_linear_constraint (gsl_vector beta, apop_data constraint, double margin)

double	apop_log_likelihood (apop_data d, apop_model m)

apop_data *	apop_map (apop_data in, apop_fn_d fn_d, apop_fn_v fn_v, apop_fn_r fn_r, apop_fn_dp fn_dp, apop_fn_vp fn_vp, apop_fn_rp fn_rp, apop_fn_dpi fn_dpi, apop_fn_vpi fn_vpi, apop_fn_rpi fn_rpi, apop_fn_di fn_di, apop_fn_vi fn_vi, apop_fn_ri fn_ri, void param, int inplace, char part, int all_pages)

apop_data *	apop_map (apop_data in, double(fn_d)(double), double(fn_v)(gsl_vector ), double(fn_r)(apop_data ), double(fn_dp)(double, void ), double(fn_vp)(gsl_vector , void ), double(fn_rp)(apop_data , void ), double(fn_dpi)(double, void , int), double(fn_vpi)(gsl_vector , void , int), double(fn_rpi)(apop_data , void , int), double(fn_di)(double, int), double(fn_vi)(gsl_vector , int), double(fn_ri)(apop_data , int), void param, int inplace, char part, int all_pages)

double	apop_map_sum (apop_data in, apop_fn_d fn_d, apop_fn_v fn_v, apop_fn_r fn_r, apop_fn_dp fn_dp, apop_fn_vp fn_vp, apop_fn_rp fn_rp, apop_fn_dpi fn_dpi, apop_fn_vpi fn_vpi, apop_fn_rpi fn_rpi, apop_fn_di fn_di, apop_fn_vi fn_vi, apop_fn_ri fn_ri, void param, char part, int all_pages)

double	apop_map_sum (apop_data in, double(fn_d)(double), double(fn_v)(gsl_vector ), double(fn_r)(apop_data ), double(fn_dp)(double, void ), double(fn_vp)(gsl_vector , void ), double(fn_rp)(apop_data , void ), double(fn_dpi)(double, void , int), double(fn_vpi)(gsl_vector , void , int), double(fn_rpi)(apop_data , void , int), double(fn_di)(double, int), double(fn_vi)(gsl_vector , int), double(fn_ri)(apop_data , int), void param, char part, int all_pages)

void	apop_matrix_apply (gsl_matrix m, void(fn)(gsl_vector *))

void	apop_matrix_apply_all (gsl_matrix in, void(fn)(double *))

gsl_matrix *	apop_matrix_copy (const gsl_matrix *in)

double	apop_matrix_determinant (const gsl_matrix *in)

gsl_matrix *	apop_matrix_inverse (const gsl_matrix *in)

int	apop_matrix_is_positive_semidefinite (gsl_matrix *m, char semi)

gsl_vector *	apop_matrix_map (const gsl_matrix m, double(fn)(gsl_vector *))

gsl_matrix *	apop_matrix_map_all (const gsl_matrix in, double(fn)(double))

double	apop_matrix_map_all_sum (const gsl_matrix in, double(fn)(double))

double	apop_matrix_map_sum (const gsl_matrix in, double(fn)(gsl_vector *))

double	apop_matrix_mean (const gsl_matrix *data)

void	apop_matrix_mean_and_var (const gsl_matrix data, double mean, double *var)

apop_data *	apop_matrix_pca (gsl_matrix *data, int const dimensions_we_want)

void	apop_matrix_print (const gsl_matrix *data, Output_declares)

void	apop_matrix_print (const gsl_matrix data, char const output_name, FILE *output_pipe, char output_type, char output_append)

gsl_matrix *	apop_matrix_realloc (gsl_matrix *m, size_t newheight, size_t newwidth)

void	apop_matrix_show (const gsl_matrix *data)

gsl_matrix *	apop_matrix_stack (gsl_matrix m1, gsl_matrix const m2, char posn, char inplace)

long double	apop_matrix_sum (const gsl_matrix *m)

double	apop_matrix_to_positive_semidefinite (gsl_matrix *m)

void	apop_maximum_likelihood (apop_data data, apop_model dist)

apop_model *	apop_ml_impute (apop_data d, apop_model meanvar)

apop_model *	apop_model_clear (apop_data data, apop_model model)

apop_model *	apop_model_copy (apop_model *in)

apop_model *	apop_model_cross_base (apop_model *mlist[])

apop_data *	apop_model_draws (apop_model model, int count, apop_data draws)

long double	apop_model_entropy (apop_model *in, int draws)

apop_model *	apop_model_fix_params (apop_model *model_in)

apop_model *	apop_model_fix_params_get_base (apop_model *model_in)

void	apop_model_free (apop_model *free_me)

apop_data *	apop_model_hessian (apop_data data, apop_model model, double delta)

apop_model *	apop_model_metropolis (apop_data d, gsl_rng rng, apop_model *m)

int	apop_model_metropolis_draw (double out, gsl_rng rng, apop_model *model)

apop_model *	apop_model_mixture_base (apop_model **inlist)

apop_data *	apop_model_numerical_covariance (apop_data data, apop_model model, double delta)

void	apop_model_print (apop_model model, FILE output_pipe)

apop_model *	apop_model_set_parameters_base (apop_model *in, double ap[])

void	apop_model_show (apop_model *print_me)

apop_model *	apop_model_to_pmf (apop_model model, apop_data binspec, long int draws, int bin_count)

long double	apop_multivariate_gamma (double a, int p)

long double	apop_multivariate_lngamma (double a, int p)

int	apop_name_add (apop_name n, char const add_me, char type)

apop_name *	apop_name_alloc (void)

apop_name *	apop_name_copy (apop_name *in)

int	apop_name_find (const apop_name n, const char findme, const char type)

void	apop_name_free (apop_name *free_me)

void	apop_name_print (apop_name *n)

void	apop_name_stack (apop_name n1, apop_name nadd, char type1, char typeadd)

gsl_vector *	apop_numerical_gradient (apop_data data, apop_model model, double delta)

double	apop_p (apop_data d, apop_model m)

apop_data *	apop_paired_t_test (gsl_vector a, gsl_vector b)

apop_model *	apop_parameter_model (apop_data d, apop_model m)

apop_data *	apop_predict (apop_data d, apop_model m)

void	apop_prep (apop_data d, apop_model m)

int	apop_prep_output (char const output_name, FILE output_pipe, char output_type, char *output_append)

int	apop_query (const char *q,...)

apop_data *	apop_query_to_data (const char *fmt,...)

double	apop_query_to_float (const char *fmt,...)

apop_data *	apop_query_to_mixed_data (const char typelist, const char fmt,...)

apop_data *	apop_query_to_text (const char *fmt,...)

gsl_vector *	apop_query_to_vector (const char *fmt,...)

apop_data *	apop_rake (char const margin_table, char const var_list, int var_ct, char const contrasts, int contrast_ct, char const structural_zeros, int max_iterations, double tolerance, char const count_col, char const init_table, char const *init_count_col, double nudge)

int	apop_regex (const char string, const char regex, apop_data **substrings, const char use_case)

gsl_rng *	apop_rng_alloc (int seed)

gsl_rng *	apop_rng_get_thread_base (int thread)

double	apop_rng_GHgB3 (gsl_rng r, double a)

void	apop_score (apop_data d, gsl_vector out, apop_model *m)

int	apop_system (const char *fmt,...)

apop_data *	apop_t_test (gsl_vector a, gsl_vector b)

int	apop_table_exists (char const *name, char remove)

double	apop_test (double statistic, char *distribution, double p1, double p2, char tail)

apop_data *	apop_test_anova_independence (apop_data *d)

apop_data *	apop_test_fisher_exact (apop_data *intab)

apop_data *	apop_test_kolmogorov (apop_model m1, apop_model m2)

apop_data *	apop_text_alloc (apop_data *in, const size_t row, const size_t col)

apop_data *	apop_text_fill_base (apop_data data, char text[])

void	apop_text_free (char ***freeme, int rows, int cols)

char *	apop_text_paste (apop_data const strings, char between, char before, char after, char between_cols, int(prune)(apop_data , int, int, void ), void *prune_parameter)

int	apop_text_set (apop_data in, const size_t row, const size_t col, const char fmt,...)

apop_data *	apop_text_to_data (char const text_file, int has_row_names, int has_col_names, int const field_ends, char const *delimiters)

int	apop_text_to_db (char const text_file, char tabname, int has_row_names, int has_col_names, char *field_names, int const field_ends, apop_data field_params, char table_params, char const *delimiters, char if_table_exists)

apop_data *	apop_text_unique_elements (const apop_data *d, size_t col)

apop_model *	apop_update (apop_data data, apop_model prior, apop_model likelihood, gsl_rng rng)

void	apop_vector_apply (gsl_vector v, void(fn)(double *))

int	apop_vector_bounded (const gsl_vector *in, long double max)

gsl_vector *	apop_vector_copy (const gsl_vector *in)

double	apop_vector_correlation (const gsl_vector ina, const gsl_vector inb, const gsl_vector *weights)

double	apop_vector_cov (gsl_vector const v1, gsl_vector const v2, gsl_vector const *weights)

double	apop_vector_distance (const gsl_vector ina, const gsl_vector inb, const char metric, const double norm)

long double	apop_vector_entropy (gsl_vector *in)

void	apop_vector_exp (gsl_vector *v)

gsl_vector *	apop_vector_fill_base (gsl_vector *in, double[])

double	apop_vector_kurtosis (const gsl_vector *in)

double	apop_vector_kurtosis_pop (gsl_vector const v, gsl_vector const weights)

void	apop_vector_log (gsl_vector *v)

void	apop_vector_log10 (gsl_vector *v)

gsl_vector *	apop_vector_map (const gsl_vector v, double(fn)(double))

double	apop_vector_map_sum (const gsl_vector in, double(fn)(double))

double	apop_vector_mean (gsl_vector const v, gsl_vector const weights)

gsl_vector *	apop_vector_moving_average (gsl_vector *, size_t)

void	apop_vector_normalize (gsl_vector in, gsl_vector *out, const char normalization_type)

double *	apop_vector_percentiles (gsl_vector *data, char rounding)

void	apop_vector_print (gsl_vector *data, Output_declares)

void	apop_vector_print (gsl_vector data, char const output_name, FILE *output_pipe, char output_type, char output_append)

gsl_vector *	apop_vector_realloc (gsl_vector *v, size_t newheight)

void	apop_vector_show (const gsl_vector *data)

double	apop_vector_skew (const gsl_vector *in)

double	apop_vector_skew_pop (gsl_vector const v, gsl_vector const weights)

gsl_vector *	apop_vector_stack (gsl_vector v1, gsl_vector const v2, char inplace)

long double	apop_vector_sum (const gsl_vector *in)

gsl_matrix *	apop_vector_to_matrix (const gsl_vector *in, char row_col)

gsl_vector *	apop_vector_unique_elements (const gsl_vector *v)

double	apop_vector_var (gsl_vector const v, gsl_vector const weights)

double	apop_vector_var_m (const gsl_vector *in, const double mean)

Variables
apop_model *	apop_bernoulli

apop_model *	apop_bernoulli

apop_model *	apop_bernoulli

apop_model *	apop_bernoulli

apop_model *	apop_beta

apop_model *	apop_beta

apop_model *	apop_beta

apop_model *	apop_beta

apop_model *	apop_binomial

apop_model *	apop_binomial

apop_model *	apop_binomial

apop_model *	apop_binomial

apop_model *	apop_chi_squared

apop_model *	apop_chi_squared

apop_model *	apop_chi_squared

apop_model *	apop_chi_squared

apop_model *	apop_composition

apop_model *	apop_composition

apop_model *	apop_composition

apop_model *	apop_composition

apop_model *	apop_coordinate_transform

apop_model *	apop_coordinate_transform

apop_model *	apop_coordinate_transform

apop_model *	apop_coordinate_transform

apop_model *	apop_cross

apop_model *	apop_cross

apop_model *	apop_cross

apop_model *	apop_cross

apop_model *	apop_dconstrain

apop_model *	apop_dconstrain

apop_model *	apop_dconstrain

apop_model *	apop_dconstrain

apop_model *	apop_dirichlet

apop_model *	apop_dirichlet

apop_model *	apop_dirichlet

apop_model *	apop_dirichlet

apop_model *	apop_exponential

apop_model *	apop_exponential

apop_model *	apop_exponential

apop_model *	apop_exponential

apop_model *	apop_f_distribution

apop_model *	apop_f_distribution

apop_model *	apop_f_distribution

apop_model *	apop_f_distribution

apop_model *	apop_gamma

apop_model *	apop_gamma

apop_model *	apop_gamma

apop_model *	apop_gamma

apop_model *	apop_improper_uniform

apop_model *	apop_improper_uniform

apop_model *	apop_improper_uniform

apop_model *	apop_improper_uniform

apop_model *	apop_iv

apop_model *	apop_iv

apop_model *	apop_iv

apop_model *	apop_iv

apop_model *	apop_kernel_density

apop_model *	apop_kernel_density

apop_model *	apop_kernel_density

apop_model *	apop_kernel_density

apop_model *	apop_loess

apop_model *	apop_loess

apop_model *	apop_loess

apop_model *	apop_loess

apop_model *	apop_logit

apop_model *	apop_logit

apop_model *	apop_logit

apop_model *	apop_logit

apop_model *	apop_lognormal

apop_model *	apop_lognormal

apop_model *	apop_lognormal

apop_model *	apop_lognormal

apop_model *	apop_mixture

apop_model *	apop_mixture

apop_model *	apop_mixture

apop_model *	apop_mixture

apop_model *	apop_multinomial

apop_model *	apop_multinomial

apop_model *	apop_multinomial

apop_model *	apop_multinomial

apop_model *	apop_multivariate_normal

apop_model *	apop_multivariate_normal

apop_model *	apop_multivariate_normal

apop_model *	apop_multivariate_normal

apop_model *	apop_normal

apop_model *	apop_normal

apop_model *	apop_normal

apop_model *	apop_normal

apop_model *	apop_ols

apop_model *	apop_ols

apop_model *	apop_ols

apop_model *	apop_ols

apop_opts_type	apop_opts

apop_opts_type	apop_opts

apop_opts_type	apop_opts

apop_opts_type	apop_opts

apop_opts_type	apop_opts

apop_opts_type	apop_opts

apop_model *	apop_pmf

apop_model *	apop_pmf

apop_model *	apop_pmf

apop_model *	apop_pmf

apop_model *	apop_poisson

apop_model *	apop_poisson

apop_model *	apop_poisson

apop_model *	apop_poisson

apop_model *	apop_probit

apop_model *	apop_probit

apop_model *	apop_probit

apop_model *	apop_probit

apop_model *	apop_t_distribution

apop_model *	apop_t_distribution

apop_model *	apop_t_distribution

apop_model *	apop_t_distribution

apop_model *	apop_uniform

apop_model *	apop_uniform

apop_model *	apop_uniform

apop_model *	apop_uniform

apop_model *	apop_wls

apop_model *	apop_wls

apop_model *	apop_wls

apop_model *	apop_wls

apop_model *	apop_yule

apop_model *	apop_yule

apop_model *	apop_yule

apop_model *	apop_yule

apop_model *	apop_zipf

apop_model *	apop_zipf

apop_model *	apop_zipf

apop_model *	apop_zipf

Detailed Description

Macro Definition Documentation

#define Apop_c	(	d,
		col
	)

A macro to generate a temporary one-column view of apop_data set d, pulling out only column col. After this call, outd will be a pointer to this temporary view, that you can use as you would any apop_data set.

See also: Apop_cs, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_c	(	d,
		col
	)

A macro to generate a temporary one-column view of apop_data set d, pulling out only column col. After this call, outd will be a pointer to this temporary view, that you can use as you would any apop_data set.

See also: Apop_cs, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_c	(	d,
		col
	)

A macro to generate a temporary one-column view of apop_data set d, pulling out only column col. After this call, outd will be a pointer to this temporary view, that you can use as you would any apop_data set.

See also: Apop_cs, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_c	(	d,
		col
	)

A macro to generate a temporary one-column view of apop_data set d, pulling out only column col. After this call, outd will be a pointer to this temporary view, that you can use as you would any apop_data set.

See also: Apop_cs, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_col_t	(	d,
		colname,
		outd
	)

After this call, v will hold a view of the apop_data set m. The view will consist only of a gsl_vector view of the column of the apop_data set m with name col_name. Unlike Apop_c, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_cs, Apop_c, Apop_cv, Apop_col_tv, Apop_mcv

#define Apop_col_t	(	d,
		colname,
		outd
	)

After this call, v will hold a view of the apop_data set m. The view will consist only of a gsl_vector view of the column of the apop_data set m with name col_name. Unlike Apop_c, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_cs, Apop_c, Apop_cv, Apop_col_tv, Apop_mcv

#define Apop_col_t	(	d,
		colname,
		outd
	)

After this call, v will hold a view of the apop_data set m. The view will consist only of a gsl_vector view of the column of the apop_data set m with name col_name. Unlike Apop_c, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_cs, Apop_c, Apop_cv, Apop_col_tv, Apop_mcv

#define Apop_col_t	(	d,
		colname,
		outd
	)

After this call, v will hold a view of the apop_data set m. The view will consist only of a gsl_vector view of the column of the apop_data set m with name col_name. Unlike Apop_c, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_cs, Apop_c, Apop_cv, Apop_col_tv, Apop_mcv

#define Apop_col_tv	(	m,
		col,
		v
	)

After this call, v will hold a gsl_vector view of the apop_data set m. The view will consist only of the column with name col_name. Unlike Apop_cv, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_cs, Apop_c, Apop_cv, Apop_col_t, Apop_mcv

#define Apop_col_tv	(	m,
		col,
		v
	)

After this call, v will hold a gsl_vector view of the apop_data set m. The view will consist only of the column with name col_name. Unlike Apop_cv, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_cs, Apop_c, Apop_cv, Apop_col_t, Apop_mcv

#define Apop_col_tv	(	m,
		col,
		v
	)

After this call, v will hold a gsl_vector view of the apop_data set m. The view will consist only of the column with name col_name. Unlike Apop_cv, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_cs, Apop_c, Apop_cv, Apop_col_t, Apop_mcv

#define Apop_col_tv	(	m,
		col,
		v
	)

After this call, v will hold a gsl_vector view of the apop_data set m. The view will consist only of the column with name col_name. Unlike Apop_cv, the second argument is a column name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_cs, Apop_c, Apop_cv, Apop_col_t, Apop_mcv

#define Apop_cs	(	d,
		colnum,
		len
	)

A macro to generate a temporary view of apop_data set d including only certain columns, beginning at column col and having length len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_c, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_cs	(	d,
		colnum,
		len
	)

A macro to generate a temporary view of apop_data set d including only certain columns, beginning at column col and having length len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_c, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_cs	(	d,
		colnum,
		len
	)

A macro to generate a temporary view of apop_data set d including only certain columns, beginning at column col and having length len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_c, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_cs	(	d,
		colnum,
		len
	)

A macro to generate a temporary view of apop_data set d including only certain columns, beginning at column col and having length len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_c, Apop_cv, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_cv	(	data_to_view,
		col
	)

A macro to generate a temporary one-column view of the matrix in an apop_data set d, pulling out only column col. The view is a gsl_vector set.

As usual, column -1 is the vector element of the apop_data set.

 gsl_vector *v = Apop_cv(your_data, i);
 
 for (int i=0; i< your_data->matrix->size2; i++)
     printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_c(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_cs, Apop_c, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_cv	(	data_to_view,
		col
	)

A macro to generate a temporary one-column view of the matrix in an apop_data set d, pulling out only column col. The view is a gsl_vector set.

As usual, column -1 is the vector element of the apop_data set.

 gsl_vector *v = Apop_cv(your_data, i);
 
 for (int i=0; i< your_data->matrix->size2; i++)
     printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_c(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_cs, Apop_c, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_cv	(	data_to_view,
		col
	)

A macro to generate a temporary one-column view of the matrix in an apop_data set d, pulling out only column col. The view is a gsl_vector set.

As usual, column -1 is the vector element of the apop_data set.

 gsl_vector *v = Apop_cv(your_data, i);
 
 for (int i=0; i< your_data->matrix->size2; i++)
     printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_c(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_cs, Apop_c, Apop_col_tv, Apop_col_t, Apop_mcv

#define Apop_cv	(	data_to_view,
		col
	)

A macro to generate a temporary one-column view of the matrix in an apop_data set d, pulling out only column col. The view is a gsl_vector set.

As usual, column -1 is the vector element of the apop_data set.

 gsl_vector *v = Apop_cv(your_data, i);
 
 for (int i=0; i< your_data->matrix->size2; i++)
     printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_c(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_cs, Apop_c, Apop_col_tv, Apop_col_t, Apop_mcv

#define apop_data_add_names	(	dataset,
		type,
		...
	)

Add a list of names to a data set.

Use this with a list of names that you type in yourself, like
1 apop_data_add_names(mydata, 'c', "age", "sex", "height");
Notice the lack of curly braces around the list.

You may have an array of names, probably autogenerated, that you would like to add. In this case, make certain that the last element of the array is NULL, and call the base function:
1 char **[] colnames = {"age", "sex", "height", NULL};
2 apop_data_add_names_base(mydata, 'c', colnames);
But if you forget the NULL marker, this has good odds of segfaulting. You may prefer to use a for loop that inserts each name in turn using apop_name_add.

See also: apop_name_add, although apop_data_add_names will be more useful in most cases.

#define apop_data_add_names	(	dataset,
		type,
		...
	)

Add a list of names to a data set.

Use this with a list of names that you type in yourself, like
1 apop_data_add_names(mydata, 'c', "age", "sex", "height");
Notice the lack of curly braces around the list.

You may have an array of names, probably autogenerated, that you would like to add. In this case, make certain that the last element of the array is NULL, and call the base function:
1 char **[] colnames = {"age", "sex", "height", NULL};
2 apop_data_add_names_base(mydata, 'c', colnames);
But if you forget the NULL marker, this has good odds of segfaulting. You may prefer to use a for loop that inserts each name in turn using apop_name_add.

See also: apop_name_add, although apop_data_add_names will be more useful in most cases.

#define apop_data_add_names	(	dataset,
		type,
		...
	)

Add a list of names to a data set.

Use this with a list of names that you type in yourself, like
1 apop_data_add_names(mydata, 'c', "age", "sex", "height");
Notice the lack of curly braces around the list.

You may have an array of names, probably autogenerated, that you would like to add. In this case, make certain that the last element of the array is NULL, and call the base function:
1 char **[] colnames = {"age", "sex", "height", NULL};
2 apop_data_add_names_base(mydata, 'c', colnames);
But if you forget the NULL marker, this has good odds of segfaulting. You may prefer to use a for loop that inserts each name in turn using apop_name_add.

See also: apop_name_add, although apop_data_add_names will be more useful in most cases.

#define apop_data_add_names	(	dataset,
		type,
		...
	)

Add a list of names to a data set.

Use this with a list of names that you type in yourself, like
1 apop_data_add_names(mydata, 'c', "age", "sex", "height");
Notice the lack of curly braces around the list.

You may have an array of names, probably autogenerated, that you would like to add. In this case, make certain that the last element of the array is NULL, and call the base function:
1 char **[] colnames = {"age", "sex", "height", NULL};
2 apop_data_add_names_base(mydata, 'c', colnames);
But if you forget the NULL marker, this has good odds of segfaulting. You may prefer to use a for loop that inserts each name in turn using apop_name_add.

See also: apop_name_add, although apop_data_add_names will be more useful in most cases.

#define apop_data_free ( freeme )

Free an apop_data structure.

As with free(), it is safe to send in a NULL pointer (in which case the function does nothing).
If the more pointer is not NULL, I will free the pointed-to data set first. If you don't want to free data sets down the chain, set more=NULL before calling this.
This is actually a macro (that calls apop_data_free_base). It sets freeme to NULL when it's done, because there's nothing safe you can do with the freed location, and you can later safely test conditions like if (data) ....

#define apop_data_free ( freeme )

Free an apop_data structure.

As with free(), it is safe to send in a NULL pointer (in which case the function does nothing).
If the more pointer is not NULL, I will free the pointed-to data set first. If you don't want to free data sets down the chain, set more=NULL before calling this.
This is actually a macro (that calls apop_data_free_base). It sets freeme to NULL when it's done, because there's nothing safe you can do with the freed location, and you can later safely test conditions like if (data) ....

#define apop_data_free ( freeme )

Free an apop_data structure.

As with free(), it is safe to send in a NULL pointer (in which case the function does nothing).
If the more pointer is not NULL, I will free the pointed-to data set first. If you don't want to free data sets down the chain, set more=NULL before calling this.
This is actually a macro (that calls apop_data_free_base). It sets freeme to NULL when it's done, because there's nothing safe you can do with the freed location, and you can later safely test conditions like if (data) ....

#define apop_data_free ( freeme )

Free an apop_data structure.

As with free(), it is safe to send in a NULL pointer (in which case the function does nothing).
If the more pointer is not NULL, I will free the pointed-to data set first. If you don't want to free data sets down the chain, set more=NULL before calling this.
This is actually a macro (that calls apop_data_free_base). It sets freeme to NULL when it's done, because there's nothing safe you can do with the freed location, and you can later safely test conditions like if (data) ....

#define apop_gaussian

Alias for the apop_normal distribution, qv.

#define apop_gaussian

Alias for the apop_normal distribution, qv.

#define apop_gaussian

Alias for the apop_normal distribution, qv.

#define apop_gaussian

Alias for the apop_normal distribution, qv.

#define Apop_mcv	(	matrix_to_view,
		col
	)

Get a vector view of a single column of a gsl_matrix.

Parameters

matrix_to_vew	A gsl_matrix.
row	An integer giving the column to be viewed.

Returns: A gsl_vector view of the given column. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

 gsl_matrix *m = apop_query_to_data("select col1, col2, col3 from data")->matrix;
 printf("The correlation coefficient between columns two "
        "and three is %g.\n", apop_vector_correlation(Apop_mcv(m, 2), Apop_mcv(m, 3)));

See also: Apop_r, Apop_cv

#define Apop_mcv	(	matrix_to_view,
		col
	)

Get a vector view of a single column of a gsl_matrix.

Parameters

matrix_to_vew	A gsl_matrix.
row	An integer giving the column to be viewed.

Returns: A gsl_vector view of the given column. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

 gsl_matrix *m = apop_query_to_data("select col1, col2, col3 from data")->matrix;
 printf("The correlation coefficient between columns two "
        "and three is %g.\n", apop_vector_correlation(Apop_mcv(m, 2), Apop_mcv(m, 3)));

See also: Apop_r, Apop_cv

#define Apop_mcv	(	matrix_to_view,
		col
	)

Get a vector view of a single column of a gsl_matrix.

Parameters

matrix_to_vew	A gsl_matrix.
row	An integer giving the column to be viewed.

Returns: A gsl_vector view of the given column. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

 gsl_matrix *m = apop_query_to_data("select col1, col2, col3 from data")->matrix;
 printf("The correlation coefficient between columns two "
        "and three is %g.\n", apop_vector_correlation(Apop_mcv(m, 2), Apop_mcv(m, 3)));

See also: Apop_r, Apop_cv

#define Apop_mcv	(	matrix_to_view,
		col
	)

Get a vector view of a single column of a gsl_matrix.

Parameters

matrix_to_vew	A gsl_matrix.
row	An integer giving the column to be viewed.

Returns: A gsl_vector view of the given column. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

 gsl_matrix *m = apop_query_to_data("select col1, col2, col3 from data")->matrix;
 printf("The correlation coefficient between columns two "
        "and three is %g.\n", apop_vector_correlation(Apop_mcv(m, 2), Apop_mcv(m, 3)));

See also: Apop_r, Apop_cv

#define apop_model_copy_set	(	model,
		type,
		...
	)

Copy a model and add a settings group. Useful for models that require a settings group to function. See Apop_settings_add_group.

Returns: A pointer to the newly-prepped model.

#define apop_model_copy_set	(	model,
		type,
		...
	)

Copy a model and add a settings group. Useful for models that require a settings group to function. See Apop_settings_add_group.

Returns: A pointer to the newly-prepped model.

#define apop_model_copy_set	(	model,
		type,
		...
	)

Copy a model and add a settings group. Useful for models that require a settings group to function. See Apop_settings_add_group.

Returns: A pointer to the newly-prepped model.

#define apop_model_copy_set	(	model,
		type,
		...
	)

Copy a model and add a settings group. Useful for models that require a settings group to function. See Apop_settings_add_group.

Returns: A pointer to the newly-prepped model.

#define apop_model_cross ( ... )

Generate a model consisting of the cross product of several independent models. The output apop_model is a copy of apop_cross; see that model's documentation for details.

If you input only one model, return a copy of that model; print a warning iff apop_opts.verbose >= 2.

Exceptions

error=='n' First model input is NULL.

Examples:

#include <apop.h>
/* In this initial example, build a cross product of two Normal(2,.1) distributions.
Make 10,000 draws from it.
 
Then, build a cross product of two unparameterized Normals and estimate the parameters
of the combined model; check that they match the (2, .1) we started with.
*/
void cross_normals(){
    double mu = 2;
    double sigma = .1;
    apop_model *n1 = apop_model_set_parameters(apop_normal, mu, sigma);
    apop_model *n2 = apop_model_copy(n1);
    apop_model *two_independent_normals = apop_model_cross(n1, n2);
    //
    //We don't use it, but the cross product of three is just as easy:
    apop_model *n3 = apop_model_copy(n1);
    apop_model *three_independent_normals = apop_model_cross(n1, n2, n3);
    apop_data *draws = apop_model_draws(two_independent_normals, .count=10000);
    //The unparameterized cross product:
    apop_model *two_n = apop_model_cross(
                    apop_model_copy(apop_normal),
                    apop_model_copy(apop_normal)
                    );
    apop_model *estimated_norms = apop_estimate(draws, two_n);
    apop_model_print(estimated_norms);
    apop_data *estp1 = Apop_settings_get(estimated_norms, apop_cross, model1)->parameters;
    apop_data *estp2 = Apop_settings_get(estimated_norms, apop_cross, model2)->parameters;
    assert(fabs(apop_data_get(estp1, 0) - mu)    < 2e-3);
    assert(fabs(apop_data_get(estp2, 0) - mu)    < 2e-3);
    assert(fabs(apop_data_get(estp1, 1) - sigma) < 2e-3);
    assert(fabs(apop_data_get(estp2, 1) - sigma) < 2e-3);
}
//bind together a Poisson and a Normal
void norm_cross_poisson(){
    apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
    apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
    apop_model *mm = apop_model_cross(m1, m2);
    int len = 1e5;
    apop_data *draws = apop_model_draws(mm, len);
    for (int i=0; i< len; i++){
        Apop_row_v(draws, i, onev);
        assert((int)onev->data[0] == onev->data[0]);
        assert(onev->data[1]<0);
    }
    /*The rest of the test script recovers the parameters.
    Input data to an apop_cross model can take two formats. In cross_normals, the
    draws are in a single matrix. Here, the data for the Poisson (col 0 of the draws)
    will be put in an apop_data set, and the data for the Normal (col 1 of the draws)
    on a second page appended to the first. Then, set the .splitpage element of the
    apop_cross settings group to the name of the second page.
    */
    apop_data *comeback = apop_data_alloc();
    comeback->vector = apop_vector_copy(Apop_cv(draws, 0));
    apop_data_add_page(comeback, apop_data_alloc(), "p2");
    comeback->more->vector = apop_vector_copy(Apop_cv(draws, 1));
    //set up the un-parameterized crossed model, including
    //the name at which to split the data set
    apop_model *estme = apop_model_cross(apop_model_copy(apop_poisson), apop_model_copy(apop_normal));
    Apop_settings_add(estme, apop_cross, splitpage, "p2");
    apop_model *ested = apop_estimate(comeback, estme);
    //test that the parameters are as promised.
    apop_model *m1back = apop_settings_get(ested, apop_cross, model1);
    apop_model *m2back = apop_settings_get(ested, apop_cross, model2);
    assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 5e-1);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 5e-1);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 5e-1);
    //You can cross as many models as you'd like.
    apop_model *m3 = apop_model_set_parameters(apop_poisson, 8);
    apop_model *mmm = apop_model_cross(m1, m2, m3);
    apop_data *sum = apop_data_summarize(apop_model_draws(mmm, 1e5));
    assert(fabs(apop_data_get(sum, .row=0, .colname="mean") - 3) < 2e-2);
    assert(fabs(apop_data_get(sum, .row=1, .colname="mean") - -5) < 2e-2);
    assert(fabs(apop_data_get(sum, .row=2, .colname="mean") - 8) < 4e-2);
    assert(apop_data_get(sum, .row=0, .colname="median") == 3);
    assert(apop_data_get(sum, .row=2, .colname="median") == 8);
}
int main(){
    cross_normals();
    norm_cross_poisson();
}

#define apop_model_cross ( ... )

Generate a model consisting of the cross product of several independent models. The output apop_model is a copy of apop_cross; see that model's documentation for details.

If you input only one model, return a copy of that model; print a warning iff apop_opts.verbose >= 2.

Exceptions

error=='n' First model input is NULL.

Examples:

#include <apop.h>
/* In this initial example, build a cross product of two Normal(2,.1) distributions.
Make 10,000 draws from it.
 
Then, build a cross product of two unparameterized Normals and estimate the parameters
of the combined model; check that they match the (2, .1) we started with.
*/
void cross_normals(){
    double mu = 2;
    double sigma = .1;
    apop_model *n1 = apop_model_set_parameters(apop_normal, mu, sigma);
    apop_model *n2 = apop_model_copy(n1);
    apop_model *two_independent_normals = apop_model_cross(n1, n2);
    //
    //We don't use it, but the cross product of three is just as easy:
    apop_model *n3 = apop_model_copy(n1);
    apop_model *three_independent_normals = apop_model_cross(n1, n2, n3);
    apop_data *draws = apop_model_draws(two_independent_normals, .count=10000);
    //The unparameterized cross product:
    apop_model *two_n = apop_model_cross(
                    apop_model_copy(apop_normal),
                    apop_model_copy(apop_normal)
                    );
    apop_model *estimated_norms = apop_estimate(draws, two_n);
    apop_model_print(estimated_norms);
    apop_data *estp1 = Apop_settings_get(estimated_norms, apop_cross, model1)->parameters;
    apop_data *estp2 = Apop_settings_get(estimated_norms, apop_cross, model2)->parameters;
    assert(fabs(apop_data_get(estp1, 0) - mu)    < 2e-3);
    assert(fabs(apop_data_get(estp2, 0) - mu)    < 2e-3);
    assert(fabs(apop_data_get(estp1, 1) - sigma) < 2e-3);
    assert(fabs(apop_data_get(estp2, 1) - sigma) < 2e-3);
}
//bind together a Poisson and a Normal
void norm_cross_poisson(){
    apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
    apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
    apop_model *mm = apop_model_cross(m1, m2);
    int len = 1e5;
    apop_data *draws = apop_model_draws(mm, len);
    for (int i=0; i< len; i++){
        Apop_row_v(draws, i, onev);
        assert((int)onev->data[0] == onev->data[0]);
        assert(onev->data[1]<0);
    }
    /*The rest of the test script recovers the parameters.
    Input data to an apop_cross model can take two formats. In cross_normals, the
    draws are in a single matrix. Here, the data for the Poisson (col 0 of the draws)
    will be put in an apop_data set, and the data for the Normal (col 1 of the draws)
    on a second page appended to the first. Then, set the .splitpage element of the
    apop_cross settings group to the name of the second page.
    */
    apop_data *comeback = apop_data_alloc();
    comeback->vector = apop_vector_copy(Apop_cv(draws, 0));
    apop_data_add_page(comeback, apop_data_alloc(), "p2");
    comeback->more->vector = apop_vector_copy(Apop_cv(draws, 1));
    //set up the un-parameterized crossed model, including
    //the name at which to split the data set
    apop_model *estme = apop_model_cross(apop_model_copy(apop_poisson), apop_model_copy(apop_normal));
    Apop_settings_add(estme, apop_cross, splitpage, "p2");
    apop_model *ested = apop_estimate(comeback, estme);
    //test that the parameters are as promised.
    apop_model *m1back = apop_settings_get(ested, apop_cross, model1);
    apop_model *m2back = apop_settings_get(ested, apop_cross, model2);
    assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 5e-1);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 5e-1);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 5e-1);
    //You can cross as many models as you'd like.
    apop_model *m3 = apop_model_set_parameters(apop_poisson, 8);
    apop_model *mmm = apop_model_cross(m1, m2, m3);
    apop_data *sum = apop_data_summarize(apop_model_draws(mmm, 1e5));
    assert(fabs(apop_data_get(sum, .row=0, .colname="mean") - 3) < 2e-2);
    assert(fabs(apop_data_get(sum, .row=1, .colname="mean") - -5) < 2e-2);
    assert(fabs(apop_data_get(sum, .row=2, .colname="mean") - 8) < 4e-2);
    assert(apop_data_get(sum, .row=0, .colname="median") == 3);
    assert(apop_data_get(sum, .row=2, .colname="median") == 8);
}
int main(){
    cross_normals();
    norm_cross_poisson();
}

#define apop_model_cross ( ... )

Generate a model consisting of the cross product of several independent models. The output apop_model is a copy of apop_cross; see that model's documentation for details.

If you input only one model, return a copy of that model; print a warning iff apop_opts.verbose >= 2.

Exceptions

error=='n' First model input is NULL.

Examples:

#include <apop.h>
/* In this initial example, build a cross product of two Normal(2,.1) distributions.
Make 10,000 draws from it.
 
Then, build a cross product of two unparameterized Normals and estimate the parameters
of the combined model; check that they match the (2, .1) we started with.
*/
void cross_normals(){
    double mu = 2;
    double sigma = .1;
    apop_model *n1 = apop_model_set_parameters(apop_normal, mu, sigma);
    apop_model *n2 = apop_model_copy(n1);
    apop_model *two_independent_normals = apop_model_cross(n1, n2);
    //
    //We don't use it, but the cross product of three is just as easy:
    apop_model *n3 = apop_model_copy(n1);
    apop_model *three_independent_normals = apop_model_cross(n1, n2, n3);
    apop_data *draws = apop_model_draws(two_independent_normals, .count=10000);
    //The unparameterized cross product:
    apop_model *two_n = apop_model_cross(
                    apop_model_copy(apop_normal),
                    apop_model_copy(apop_normal)
                    );
    apop_model *estimated_norms = apop_estimate(draws, two_n);
    apop_model_print(estimated_norms);
    apop_data *estp1 = Apop_settings_get(estimated_norms, apop_cross, model1)->parameters;
    apop_data *estp2 = Apop_settings_get(estimated_norms, apop_cross, model2)->parameters;
    assert(fabs(apop_data_get(estp1, 0) - mu)    < 2e-3);
    assert(fabs(apop_data_get(estp2, 0) - mu)    < 2e-3);
    assert(fabs(apop_data_get(estp1, 1) - sigma) < 2e-3);
    assert(fabs(apop_data_get(estp2, 1) - sigma) < 2e-3);
}
//bind together a Poisson and a Normal
void norm_cross_poisson(){
    apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
    apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
    apop_model *mm = apop_model_cross(m1, m2);
    int len = 1e5;
    apop_data *draws = apop_model_draws(mm, len);
    for (int i=0; i< len; i++){
        Apop_row_v(draws, i, onev);
        assert((int)onev->data[0] == onev->data[0]);
        assert(onev->data[1]<0);
    }
    /*The rest of the test script recovers the parameters.
    Input data to an apop_cross model can take two formats. In cross_normals, the
    draws are in a single matrix. Here, the data for the Poisson (col 0 of the draws)
    will be put in an apop_data set, and the data for the Normal (col 1 of the draws)
    on a second page appended to the first. Then, set the .splitpage element of the
    apop_cross settings group to the name of the second page.
    */
    apop_data *comeback = apop_data_alloc();
    comeback->vector = apop_vector_copy(Apop_cv(draws, 0));
    apop_data_add_page(comeback, apop_data_alloc(), "p2");
    comeback->more->vector = apop_vector_copy(Apop_cv(draws, 1));
    //set up the un-parameterized crossed model, including
    //the name at which to split the data set
    apop_model *estme = apop_model_cross(apop_model_copy(apop_poisson), apop_model_copy(apop_normal));
    Apop_settings_add(estme, apop_cross, splitpage, "p2");
    apop_model *ested = apop_estimate(comeback, estme);
    //test that the parameters are as promised.
    apop_model *m1back = apop_settings_get(ested, apop_cross, model1);
    apop_model *m2back = apop_settings_get(ested, apop_cross, model2);
    assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 5e-1);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 5e-1);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 5e-1);
    //You can cross as many models as you'd like.
    apop_model *m3 = apop_model_set_parameters(apop_poisson, 8);
    apop_model *mmm = apop_model_cross(m1, m2, m3);
    apop_data *sum = apop_data_summarize(apop_model_draws(mmm, 1e5));
    assert(fabs(apop_data_get(sum, .row=0, .colname="mean") - 3) < 2e-2);
    assert(fabs(apop_data_get(sum, .row=1, .colname="mean") - -5) < 2e-2);
    assert(fabs(apop_data_get(sum, .row=2, .colname="mean") - 8) < 4e-2);
    assert(apop_data_get(sum, .row=0, .colname="median") == 3);
    assert(apop_data_get(sum, .row=2, .colname="median") == 8);
}
int main(){
    cross_normals();
    norm_cross_poisson();
}

#define apop_model_cross ( ... )

Generate a model consisting of the cross product of several independent models. The output apop_model is a copy of apop_cross; see that model's documentation for details.

If you input only one model, return a copy of that model; print a warning iff apop_opts.verbose >= 2.

Exceptions

error=='n' First model input is NULL.

Examples:

#include <apop.h>
/* In this initial example, build a cross product of two Normal(2,.1) distributions.
Make 10,000 draws from it.
 
Then, build a cross product of two unparameterized Normals and estimate the parameters
of the combined model; check that they match the (2, .1) we started with.
*/
void cross_normals(){
    double mu = 2;
    double sigma = .1;
    apop_model *n1 = apop_model_set_parameters(apop_normal, mu, sigma);
    apop_model *n2 = apop_model_copy(n1);
    apop_model *two_independent_normals = apop_model_cross(n1, n2);
    //
    //We don't use it, but the cross product of three is just as easy:
    apop_model *n3 = apop_model_copy(n1);
    apop_model *three_independent_normals = apop_model_cross(n1, n2, n3);
    apop_data *draws = apop_model_draws(two_independent_normals, .count=10000);
    //The unparameterized cross product:
    apop_model *two_n = apop_model_cross(
                    apop_model_copy(apop_normal),
                    apop_model_copy(apop_normal)
                    );
    apop_model *estimated_norms = apop_estimate(draws, two_n);
    apop_model_print(estimated_norms);
    apop_data *estp1 = Apop_settings_get(estimated_norms, apop_cross, model1)->parameters;
    apop_data *estp2 = Apop_settings_get(estimated_norms, apop_cross, model2)->parameters;
    assert(fabs(apop_data_get(estp1, 0) - mu)    < 2e-3);
    assert(fabs(apop_data_get(estp2, 0) - mu)    < 2e-3);
    assert(fabs(apop_data_get(estp1, 1) - sigma) < 2e-3);
    assert(fabs(apop_data_get(estp2, 1) - sigma) < 2e-3);
}
//bind together a Poisson and a Normal
void norm_cross_poisson(){
    apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
    apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
    apop_model *mm = apop_model_cross(m1, m2);
    int len = 1e5;
    apop_data *draws = apop_model_draws(mm, len);
    for (int i=0; i< len; i++){
        Apop_row_v(draws, i, onev);
        assert((int)onev->data[0] == onev->data[0]);
        assert(onev->data[1]<0);
    }
    /*The rest of the test script recovers the parameters.
    Input data to an apop_cross model can take two formats. In cross_normals, the
    draws are in a single matrix. Here, the data for the Poisson (col 0 of the draws)
    will be put in an apop_data set, and the data for the Normal (col 1 of the draws)
    on a second page appended to the first. Then, set the .splitpage element of the
    apop_cross settings group to the name of the second page.
    */
    apop_data *comeback = apop_data_alloc();
    comeback->vector = apop_vector_copy(Apop_cv(draws, 0));
    apop_data_add_page(comeback, apop_data_alloc(), "p2");
    comeback->more->vector = apop_vector_copy(Apop_cv(draws, 1));
    //set up the un-parameterized crossed model, including
    //the name at which to split the data set
    apop_model *estme = apop_model_cross(apop_model_copy(apop_poisson), apop_model_copy(apop_normal));
    Apop_settings_add(estme, apop_cross, splitpage, "p2");
    apop_model *ested = apop_estimate(comeback, estme);
    //test that the parameters are as promised.
    apop_model *m1back = apop_settings_get(ested, apop_cross, model1);
    apop_model *m2back = apop_settings_get(ested, apop_cross, model2);
    assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 5e-1);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 5e-1);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 5e-1);
    //You can cross as many models as you'd like.
    apop_model *m3 = apop_model_set_parameters(apop_poisson, 8);
    apop_model *mmm = apop_model_cross(m1, m2, m3);
    apop_data *sum = apop_data_summarize(apop_model_draws(mmm, 1e5));
    assert(fabs(apop_data_get(sum, .row=0, .colname="mean") - 3) < 2e-2);
    assert(fabs(apop_data_get(sum, .row=1, .colname="mean") - -5) < 2e-2);
    assert(fabs(apop_data_get(sum, .row=2, .colname="mean") - 8) < 4e-2);
    assert(apop_data_get(sum, .row=0, .colname="median") == 3);
    assert(apop_data_get(sum, .row=2, .colname="median") == 8);
}
int main(){
    cross_normals();
    norm_cross_poisson();
}

#define apop_model_mixture ( ... )

Produce a model as a linear combination of other models. See the documentation for the apop_mixture model.

Parameters

...	A list of models, either all parameterized or all unparameterized. See examples in the apop_mixture documentation.

#define apop_model_mixture ( ... )

Produce a model as a linear combination of other models. See the documentation for the apop_mixture model.

Parameters

...	A list of models, either all parameterized or all unparameterized. See examples in the apop_mixture documentation.

#define apop_model_mixture ( ... )

Produce a model as a linear combination of other models. See the documentation for the apop_mixture model.

Parameters

...	A list of models, either all parameterized or all unparameterized. See examples in the apop_mixture documentation.

#define apop_model_mixture ( ... )

Produce a model as a linear combination of other models. See the documentation for the apop_mixture model.

Parameters

...	A list of models, either all parameterized or all unparameterized. See examples in the apop_mixture documentation.

#define Apop_model_set_settings	(	model,
		...
	)

This is the complement to apop_model_set_parameters, for those models that are set up by adding settings group, rather than filling in a list of parameters.

For example, the apop_kernel_density model is built by adding a apop_kernel_density_settings group. From the example on the apop_kernel_density page:

 apop_model *k2 = apop_model_set_settings(apop_kernel_density,
                     .base_data=d,
                     .set_fn = set_uniform_edges,
                     .kernel = apop_uniform);

The name of the model and the settings group to be built must match, which is the case for many model transformations, including apop_dconstrain and apop_cross. If the names do not match, use apop_model_copy_set.

#define Apop_model_set_settings	(	model,
		...
	)

This is the complement to apop_model_set_parameters, for those models that are set up by adding settings group, rather than filling in a list of parameters.

For example, the apop_kernel_density model is built by adding a apop_kernel_density_settings group. From the example on the apop_kernel_density page:

 apop_model *k2 = apop_model_set_settings(apop_kernel_density,
                     .base_data=d,
                     .set_fn = set_uniform_edges,
                     .kernel = apop_uniform);

The name of the model and the settings group to be built must match, which is the case for many model transformations, including apop_dconstrain and apop_cross. If the names do not match, use apop_model_copy_set.

#define Apop_model_set_settings	(	model,
		...
	)

This is the complement to apop_model_set_parameters, for those models that are set up by adding settings group, rather than filling in a list of parameters.

For example, the apop_kernel_density model is built by adding a apop_kernel_density_settings group. From the example on the apop_kernel_density page:

 apop_model *k2 = apop_model_set_settings(apop_kernel_density,
                     .base_data=d,
                     .set_fn = set_uniform_edges,
                     .kernel = apop_uniform);

The name of the model and the settings group to be built must match, which is the case for many model transformations, including apop_dconstrain and apop_cross. If the names do not match, use apop_model_copy_set.

#define Apop_model_set_settings	(	model,
		...
	)

This is the complement to apop_model_set_parameters, for those models that are set up by adding settings group, rather than filling in a list of parameters.

For example, the apop_kernel_density model is built by adding a apop_kernel_density_settings group. From the example on the apop_kernel_density page:

 apop_model *k2 = apop_model_set_settings(apop_kernel_density,
                     .base_data=d,
                     .set_fn = set_uniform_edges,
                     .kernel = apop_uniform);

The name of the model and the settings group to be built must match, which is the case for many model transformations, including apop_dconstrain and apop_cross. If the names do not match, use apop_model_copy_set.

#define Apop_mrv	(	matrix_to_view,
		row
	)

Get a vector view of a single row of a gsl_matrix.

Parameters

matrix_to_vew	A gsl_matrix.
row	An integer giving the row to be viewed.

Returns: A gsl_vector view of the given row. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See apop_vector_correlation for an example of use.

See also: Apop_r, Apop_rv

#define Apop_mrv	(	matrix_to_view,
		row
	)

Get a vector view of a single row of a gsl_matrix.

Parameters

matrix_to_vew	A gsl_matrix.
row	An integer giving the row to be viewed.

Returns: A gsl_vector view of the given row. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See apop_vector_correlation for an example of use.

See also: Apop_r, Apop_rv

#define Apop_mrv	(	matrix_to_view,
		row
	)

Get a vector view of a single row of a gsl_matrix.

Parameters

matrix_to_vew	A gsl_matrix.
row	An integer giving the row to be viewed.

Returns: A gsl_vector view of the given row. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See apop_vector_correlation for an example of use.

See also: Apop_r, Apop_rv

#define Apop_mrv	(	matrix_to_view,
		row
	)

Get a vector view of a single row of a gsl_matrix.

Parameters

matrix_to_vew	A gsl_matrix.
row	An integer giving the row to be viewed.

Returns: A gsl_vector view of the given row. The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See apop_vector_correlation for an example of use.

See also: Apop_r, Apop_rv

#define Apop_notify	(	verbosity,
		...
	)

Notify the user of errors, warning, or debug info.

writes to apop_opts.log_file, which is a FILE handle. The default is stderr, but use fopen to attach to a file.

Parameters

verbosity	At what verbosity level should the user be warned? E.g., if level==2, then print iff apop_opts.verbosity >= 2.
...	The message to write to the log (presuming the verbosity level is high enough). This can be a printf-style format with following arguments, e.g., `apop_notify(0, "Beta is currently %g", beta)`.

#define Apop_notify	(	verbosity,
		...
	)

Notify the user of errors, warning, or debug info.

writes to apop_opts.log_file, which is a FILE handle. The default is stderr, but use fopen to attach to a file.

Parameters

verbosity	At what verbosity level should the user be warned? E.g., if level==2, then print iff apop_opts.verbosity >= 2.
...	The message to write to the log (presuming the verbosity level is high enough). This can be a printf-style format with following arguments, e.g., `apop_notify(0, "Beta is currently %g", beta)`.

#define Apop_notify	(	verbosity,
		...
	)

Notify the user of errors, warning, or debug info.

writes to apop_opts.log_file, which is a FILE handle. The default is stderr, but use fopen to attach to a file.

Parameters

verbosity	At what verbosity level should the user be warned? E.g., if level==2, then print iff apop_opts.verbosity >= 2.
...	The message to write to the log (presuming the verbosity level is high enough). This can be a printf-style format with following arguments, e.g., `apop_notify(0, "Beta is currently %g", beta)`.

#define Apop_notify	(	verbosity,
		...
	)

Notify the user of errors, warning, or debug info.

writes to apop_opts.log_file, which is a FILE handle. The default is stderr, but use fopen to attach to a file.

Parameters

verbosity	At what verbosity level should the user be warned? E.g., if level==2, then print iff apop_opts.verbosity >= 2.
...	The message to write to the log (presuming the verbosity level is high enough). This can be a printf-style format with following arguments, e.g., `apop_notify(0, "Beta is currently %g", beta)`.

#define Apop_r	(	d,
		rownum
	)

A macro to generate a temporary one-row view of apop_data set d, pulling out only row row. The view is also an apop_data set, with names and other decorations.

 //pull a single row
 apop_data *v = Apop_r(your_data, 7);
 
 //or loop through a sequence of one-row data sets.
 apop_model *std = apop_model_set_parameters(apop_normal, 0, 1);
 for (int i=0; i< your_data->matrix->size1; i++)
     printf("Std Normal CDF up to observation %i is %g\n",
                        i, apop_cdf(Apop_r(your_data, i), std));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_rs, Apop_row_v, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_r	(	d,
		rownum
	)

A macro to generate a temporary one-row view of apop_data set d, pulling out only row row. The view is also an apop_data set, with names and other decorations.

 //pull a single row
 apop_data *v = Apop_r(your_data, 7);
 
 //or loop through a sequence of one-row data sets.
 apop_model *std = apop_model_set_parameters(apop_normal, 0, 1);
 for (int i=0; i< your_data->matrix->size1; i++)
     printf("Std Normal CDF up to observation %i is %g\n",
                        i, apop_cdf(Apop_r(your_data, i), std));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_rs, Apop_row_v, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_r	(	d,
		rownum
	)

A macro to generate a temporary one-row view of apop_data set d, pulling out only row row. The view is also an apop_data set, with names and other decorations.

 //pull a single row
 apop_data *v = Apop_r(your_data, 7);
 
 //or loop through a sequence of one-row data sets.
 apop_model *std = apop_model_set_parameters(apop_normal, 0, 1);
 for (int i=0; i< your_data->matrix->size1; i++)
     printf("Std Normal CDF up to observation %i is %g\n",
                        i, apop_cdf(Apop_r(your_data, i), std));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_rs, Apop_row_v, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_r	(	d,
		rownum
	)

A macro to generate a temporary one-row view of apop_data set d, pulling out only row row. The view is also an apop_data set, with names and other decorations.

 //pull a single row
 apop_data *v = Apop_r(your_data, 7);
 
 //or loop through a sequence of one-row data sets.
 apop_model *std = apop_model_set_parameters(apop_normal, 0, 1);
 for (int i=0; i< your_data->matrix->size1; i++)
     printf("Std Normal CDF up to observation %i is %g\n",
                        i, apop_cdf(Apop_r(your_data, i), std));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_rs, Apop_row_v, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_row_t	(	d,
		rowname,
		outd
	)

After this call, v will hold an apop_data view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_r, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_rs, Apop_r, Apop_rv, Apop_row_tv, Apop_mrv

#define Apop_row_t	(	d,
		rowname,
		outd
	)

After this call, v will hold an apop_data view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_r, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_rs, Apop_r, Apop_rv, Apop_row_tv, Apop_mrv

#define Apop_row_t	(	d,
		rowname,
		outd
	)

After this call, v will hold an apop_data view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_r, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_rs, Apop_r, Apop_rv, Apop_row_tv, Apop_mrv

#define Apop_row_t	(	d,
		rowname,
		outd
	)

After this call, v will hold an apop_data view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_r, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_rs, Apop_r, Apop_rv, Apop_row_tv, Apop_mrv

#define Apop_row_tv	(	m,
		row,
		v
	)

After this call, v will hold a gsl_vector view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_rv, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_rs, Apop_r, Apop_rv, Apop_row_t, Apop_mrv

#define Apop_row_tv	(	m,
		row,
		v
	)

After this call, v will hold a gsl_vector view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_rv, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_rs, Apop_r, Apop_rv, Apop_row_t, Apop_mrv

#define Apop_row_tv	(	m,
		row,
		v
	)

After this call, v will hold a gsl_vector view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_rv, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_rs, Apop_r, Apop_rv, Apop_row_t, Apop_mrv

#define Apop_row_tv	(	m,
		row,
		v
	)

After this call, v will hold a gsl_vector view of an apop_data set m. The view will consist only of the row with name row_name. Unlike Apop_rv, the second argument is a row name, that I'll look up using apop_name_find, and the third is the name of the view to be generated.

See also: Apop_rs, Apop_r, Apop_rv, Apop_row_t, Apop_mrv

#define Apop_rs	(	d,
		rownum,
		len
	)

A macro to generate a temporary view of apop_data set d pulling only certain rows, beginning at row row and having height len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_rs	(	d,
		rownum,
		len
	)

A macro to generate a temporary view of apop_data set d pulling only certain rows, beginning at row row and having height len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_rs	(	d,
		rownum,
		len
	)

A macro to generate a temporary view of apop_data set d pulling only certain rows, beginning at row row and having height len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_rs	(	d,
		rownum,
		len
	)

A macro to generate a temporary view of apop_data set d pulling only certain rows, beginning at row row and having height len.

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_rv	(	data_to_view,
		row
	)

A macro to generate a temporary one-row view of the matrix in an apop_data set d, pulling out only row row. The view is a gsl_vector set.

 gsl_vector *v = Apop_rv(your_data, i);
 
 for (int i=0; i< your_data->matrix->size1; i++)
     printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_r(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_rv	(	data_to_view,
		row
	)

A macro to generate a temporary one-row view of the matrix in an apop_data set d, pulling out only row row. The view is a gsl_vector set.

 gsl_vector *v = Apop_rv(your_data, i);
 
 for (int i=0; i< your_data->matrix->size1; i++)
     printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_r(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_rv	(	data_to_view,
		row
	)

A macro to generate a temporary one-row view of the matrix in an apop_data set d, pulling out only row row. The view is a gsl_vector set.

 gsl_vector *v = Apop_rv(your_data, i);
 
 for (int i=0; i< your_data->matrix->size1; i++)
     printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_r(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_rv	(	data_to_view,
		row
	)

A macro to generate a temporary one-row view of the matrix in an apop_data set d, pulling out only row row. The view is a gsl_vector set.

 gsl_vector *v = Apop_rv(your_data, i);
 
 for (int i=0; i< your_data->matrix->size1; i++)
     printf("Σ_%i = %g\n", i, apop_vector_sum(Apop_r(your_data, i)));

The view is automatically allocated, and disappears as soon as the program leaves the scope in which it is declared.

See also: Apop_r, Apop_rv, Apop_row_tv, Apop_row_t, Apop_mrv

#define Apop_settings_add_group	(	model,
		type,
		...
	)

Add a settings group. The first two arguments (the model you are attaching to and the settings group name) are mandatory, and then you can use the Designated initializers syntax to specify default values (if any).

Returns: A pointer to the newly-prepped group.

See Settings groups, Optimization, or Apop_settting_set for examples.

If a settings group of the given type is already attached to the model, the previous version is removed. Use Apop_settings_get to check whether a group of the given type is already attached to a model, and Apop_settings_set to modify an existing group.

#define Apop_settings_add_group	(	model,
		type,
		...
	)

Add a settings group. The first two arguments (the model you are attaching to and the settings group name) are mandatory, and then you can use the Designated initializers syntax to specify default values (if any).

Returns: A pointer to the newly-prepped group.

See Settings groups, Optimization, or Apop_settting_set for examples.

If a settings group of the given type is already attached to the model, the previous version is removed. Use Apop_settings_get to check whether a group of the given type is already attached to a model, and Apop_settings_set to modify an existing group.

#define Apop_settings_add_group	(	model,
		type,
		...
	)

Add a settings group. The first two arguments (the model you are attaching to and the settings group name) are mandatory, and then you can use the Designated initializers syntax to specify default values (if any).

Returns: A pointer to the newly-prepped group.

See Settings groups, Optimization, or Apop_settting_set for examples.

If a settings group of the given type is already attached to the model, the previous version is removed. Use Apop_settings_get to check whether a group of the given type is already attached to a model, and Apop_settings_set to modify an existing group.

#define Apop_settings_add_group	(	model,
		type,
		...
	)

Add a settings group. The first two arguments (the model you are attaching to and the settings group name) are mandatory, and then you can use the Designated initializers syntax to specify default values (if any).

Returns: A pointer to the newly-prepped group.

See Settings groups, Optimization, or Apop_settting_set for examples.

If a settings group of the given type is already attached to the model, the previous version is removed. Use Apop_settings_get to check whether a group of the given type is already attached to a model, and Apop_settings_set to modify an existing group.

#define Apop_settings_copy	(	name,
		...
	)

A convenience macro for declaring the copy function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_copy	(	name,
		...
	)

A convenience macro for declaring the copy function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_copy	(	name,
		...
	)

A convenience macro for declaring the copy function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_copy	(	name,
		...
	)

A convenience macro for declaring the copy function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_declarations ( ysg )

Put this in your header file to declare the init, copy, and free functions for ysg_settings. Of course, these functions will also have to be defined in a .c file using Apop_settings_init, Apop_settings_copy, and Apop_settings_free.

#define Apop_settings_declarations ( ysg )

Put this in your header file to declare the init, copy, and free functions for ysg_settings. Of course, these functions will also have to be defined in a .c file using Apop_settings_init, Apop_settings_copy, and Apop_settings_free.

#define Apop_settings_declarations ( ysg )

Put this in your header file to declare the init, copy, and free functions for ysg_settings. Of course, these functions will also have to be defined in a .c file using Apop_settings_init, Apop_settings_copy, and Apop_settings_free.

#define Apop_settings_declarations ( ysg )

Put this in your header file to declare the init, copy, and free functions for ysg_settings. Of course, these functions will also have to be defined in a .c file using Apop_settings_init, Apop_settings_copy, and Apop_settings_free.

#define Apop_settings_free	(	name,
		...
	)

A convenience macro for declaring the delete function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_free	(	name,
		...
	)

A convenience macro for declaring the delete function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_free	(	name,
		...
	)

A convenience macro for declaring the delete function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_free	(	name,
		...
	)

A convenience macro for declaring the delete function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_get	(	model,
		type,
		setting
	)

Retrieves a setting from a model. See Apop_settings_get_group to pull the entire group.

Parameters

model	An apop_model.
type	A string giving the type of the settings group you are retrieving, without the `_settings` ending. E.g., for an apop_mle_settings group, use `apop_mle`.
setting	The struct element you want to retrieve.

#define Apop_settings_get	(	model,
		type,
		setting
	)

Retrieves a setting from a model. See Apop_settings_get_group to pull the entire group.

Parameters

model	An apop_model.
type	A string giving the type of the settings group you are retrieving, without the `_settings` ending. E.g., for an apop_mle_settings group, use `apop_mle`.
setting	The struct element you want to retrieve.

#define Apop_settings_get	(	model,
		type,
		setting
	)

Retrieves a setting from a model. See Apop_settings_get_group to pull the entire group.

Parameters

model	An apop_model.
type	A string giving the type of the settings group you are retrieving, without the `_settings` ending. E.g., for an apop_mle_settings group, use `apop_mle`.
setting	The struct element you want to retrieve.

#define Apop_settings_get	(	model,
		type,
		setting
	)

Retrieves a setting from a model. See Apop_settings_get_group to pull the entire group.

Parameters

model	An apop_model.
type	A string giving the type of the settings group you are retrieving, without the `_settings` ending. E.g., for an apop_mle_settings group, use `apop_mle`.
setting	The struct element you want to retrieve.

#define Apop_settings_get_group	(	m,
		type
	)

Retrieves a settings group from a model. See Apop_settings_get to just pull a single item from within the settings group.

This macro returns NULL if a group of type type_settings isn't found attached to model m, so you can easily put it in a conditional like

1 if (!apop_settings_get_group(m, "apop_ols")) ...

Parameters

m	An apop_model
type	A string giving the type of the settings group you are retrieving. E.g., for an apop_mle_settings group, use only `apop_mle`.

Returns: A void pointer to the desired struct (or NULL if not found).

#define Apop_settings_get_group	(	m,
		type
	)

Retrieves a settings group from a model. See Apop_settings_get to just pull a single item from within the settings group.

This macro returns NULL if a group of type type_settings isn't found attached to model m, so you can easily put it in a conditional like

1 if (!apop_settings_get_group(m, "apop_ols")) ...

Parameters

m	An apop_model
type	A string giving the type of the settings group you are retrieving. E.g., for an apop_mle_settings group, use only `apop_mle`.

Returns: A void pointer to the desired struct (or NULL if not found).

#define Apop_settings_get_group	(	m,
		type
	)

Retrieves a settings group from a model. See Apop_settings_get to just pull a single item from within the settings group.

This macro returns NULL if a group of type type_settings isn't found attached to model m, so you can easily put it in a conditional like

1 if (!apop_settings_get_group(m, "apop_ols")) ...

Parameters

m	An apop_model
type	A string giving the type of the settings group you are retrieving. E.g., for an apop_mle_settings group, use only `apop_mle`.

Returns: A void pointer to the desired struct (or NULL if not found).

#define Apop_settings_get_group	(	m,
		type
	)

Retrieves a settings group from a model. See Apop_settings_get to just pull a single item from within the settings group.

This macro returns NULL if a group of type type_settings isn't found attached to model m, so you can easily put it in a conditional like

1 if (!apop_settings_get_group(m, "apop_ols")) ...

Parameters

m	An apop_model
type	A string giving the type of the settings group you are retrieving. E.g., for an apop_mle_settings group, use only `apop_mle`.

Returns: A void pointer to the desired struct (or NULL if not found).

#define Apop_settings_init	(	name,
		...
	)

A convenience macro for declaring the initialization function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_init	(	name,
		...
	)

A convenience macro for declaring the initialization function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_init	(	name,
		...
	)

A convenience macro for declaring the initialization function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_init	(	name,
		...
	)

A convenience macro for declaring the initialization function for a new settings group. See Writing new settings groups for details and an example.

#define Apop_settings_rm_group	(	m,
		type
	)

Removes a settings group from a model's list.

If the so-named group is not found, do nothing.

#define Apop_settings_rm_group	(	m,
		type
	)

Removes a settings group from a model's list.

If the so-named group is not found, do nothing.

#define Apop_settings_rm_group	(	m,
		type
	)

Removes a settings group from a model's list.

If the so-named group is not found, do nothing.

#define Apop_settings_rm_group	(	m,
		type
	)

Removes a settings group from a model's list.

If the so-named group is not found, do nothing.

#define Apop_settings_set	(	model,
		type,
		setting,
		data
	)

Modifies a single element of a settings group to the given value.

For example,

 //set up a mixture of two Normals. This function initializes an apop_mixture_settings group
 apop_model *mix = apop_model_mixture(apop_model_copy(apop_normal), apop_model_copy(apop_normal));
 
 //Add an apop_mle_settings group to specify the search strategy
 Apop_settings_add_group(mix, apop_mle, .starting_pt=(double[]){.5, .5, 50, 5, 80, 5},
                                            .step_size=3, .tolerance=1e-6);
 
 //The mix model now has apop_mle and apop_mixture settings groups attached. Modify them:
 Apop_settings_set(mix, apop_mixture, find_weights, 'y');  //Search for optimal mixture weights
 Apop_settings_set(mix, apop_mle, method, "NM simplex");   //Nelder-Mead simplex algorithm
 apop_model *optimal_mix = apop_estimate(input_data, mix); //Everything is set up, so do the search.

If model==NULL, fails silently.
If model!=NULL but the given settings group is not found attached to the model, set model->error='s'.

#define Apop_settings_set	(	model,
		type,
		setting,
		data
	)

Modifies a single element of a settings group to the given value.

For example,

 //set up a mixture of two Normals. This function initializes an apop_mixture_settings group
 apop_model *mix = apop_model_mixture(apop_model_copy(apop_normal), apop_model_copy(apop_normal));
 
 //Add an apop_mle_settings group to specify the search strategy
 Apop_settings_add_group(mix, apop_mle, .starting_pt=(double[]){.5, .5, 50, 5, 80, 5},
                                            .step_size=3, .tolerance=1e-6);
 
 //The mix model now has apop_mle and apop_mixture settings groups attached. Modify them:
 Apop_settings_set(mix, apop_mixture, find_weights, 'y');  //Search for optimal mixture weights
 Apop_settings_set(mix, apop_mle, method, "NM simplex");   //Nelder-Mead simplex algorithm
 apop_model *optimal_mix = apop_estimate(input_data, mix); //Everything is set up, so do the search.

If model==NULL, fails silently.
If model!=NULL but the given settings group is not found attached to the model, set model->error='s'.

#define Apop_settings_set	(	model,
		type,
		setting,
		data
	)

Modifies a single element of a settings group to the given value.

For example,

 //set up a mixture of two Normals. This function initializes an apop_mixture_settings group
 apop_model *mix = apop_model_mixture(apop_model_copy(apop_normal), apop_model_copy(apop_normal));
 
 //Add an apop_mle_settings group to specify the search strategy
 Apop_settings_add_group(mix, apop_mle, .starting_pt=(double[]){.5, .5, 50, 5, 80, 5},
                                            .step_size=3, .tolerance=1e-6);
 
 //The mix model now has apop_mle and apop_mixture settings groups attached. Modify them:
 Apop_settings_set(mix, apop_mixture, find_weights, 'y');  //Search for optimal mixture weights
 Apop_settings_set(mix, apop_mle, method, "NM simplex");   //Nelder-Mead simplex algorithm
 apop_model *optimal_mix = apop_estimate(input_data, mix); //Everything is set up, so do the search.

If model==NULL, fails silently.
If model!=NULL but the given settings group is not found attached to the model, set model->error='s'.

#define Apop_settings_set	(	model,
		type,
		setting,
		data
	)

Modifies a single element of a settings group to the given value.

For example,

 //set up a mixture of two Normals. This function initializes an apop_mixture_settings group
 apop_model *mix = apop_model_mixture(apop_model_copy(apop_normal), apop_model_copy(apop_normal));
 
 //Add an apop_mle_settings group to specify the search strategy
 Apop_settings_add_group(mix, apop_mle, .starting_pt=(double[]){.5, .5, 50, 5, 80, 5},
                                            .step_size=3, .tolerance=1e-6);
 
 //The mix model now has apop_mle and apop_mixture settings groups attached. Modify them:
 Apop_settings_set(mix, apop_mixture, find_weights, 'y');  //Search for optimal mixture weights
 Apop_settings_set(mix, apop_mle, method, "NM simplex");   //Nelder-Mead simplex algorithm
 apop_model *optimal_mix = apop_estimate(input_data, mix); //Everything is set up, so do the search.

If model==NULL, fails silently.
If model!=NULL but the given settings group is not found attached to the model, set model->error='s'.

#define Apop_stopif	(	test,
		onfail,
		level,
		...
	)

Execute an action and print a message to the current FILE handle held by apop_opts.log_file (default: stderr).

Parameters

test	The expression that, if true, triggers the action.
onfail	If the assertion fails, do this. E.g., `out->error='x'; return GSL_NAN`. Notice that it is OK to include several lines of semicolon-separated code here, but if you have a lot to do, the most readable option may be `goto outro`, plus an appropriately-labeled section at the end of your function.
level	Print the warning message only if apop_opts.verbose is greater than or equal to this. Zero usually works, but for minor infractions use one, or for more verbose debugging output use 2.
...	The error message in printf form, plus any arguments to be inserted into the printf string. I'll provide the function name and a carriage return.

Some examples:

 //the typical case, stopping function execution:
 Apop_stopif(isnan(x), return NAN, 0, "x is NAN; failing");
 
 //Mark a flag, go to a cleanup step
 Apop_stopif(x < 0, needs_cleanup=1; goto cleanup, 0, "x is %g; cleaning up and exiting.", x);
 
 //Print a diagnostic iff <tt>apop_opts.verbose>=1</tt> and continue
 Apop_stopif(x < 0,  , 1, "warning: x is %g.", x);

If apop_opts.stop_on_warning is nonzero and not 'v', then a failed test halts via abort(), even if the apop_opts.verbose level is set so that the warning message doesn't print to screen. Use this when running via debugger.
If apop_opts.stop_on_warning is 'v', then a failed test halts via abort() iff the verbosity level is high enough to print the error.

#define Apop_stopif	(	test,
		onfail,
		level,
		...
	)

Execute an action and print a message to the current FILE handle held by apop_opts.log_file (default: stderr).

Parameters

test	The expression that, if true, triggers the action.
onfail	If the assertion fails, do this. E.g., `out->error='x'; return GSL_NAN`. Notice that it is OK to include several lines of semicolon-separated code here, but if you have a lot to do, the most readable option may be `goto outro`, plus an appropriately-labeled section at the end of your function.
level	Print the warning message only if apop_opts.verbose is greater than or equal to this. Zero usually works, but for minor infractions use one, or for more verbose debugging output use 2.
...	The error message in printf form, plus any arguments to be inserted into the printf string. I'll provide the function name and a carriage return.

Some examples:

 //the typical case, stopping function execution:
 Apop_stopif(isnan(x), return NAN, 0, "x is NAN; failing");
 
 //Mark a flag, go to a cleanup step
 Apop_stopif(x < 0, needs_cleanup=1; goto cleanup, 0, "x is %g; cleaning up and exiting.", x);
 
 //Print a diagnostic iff <tt>apop_opts.verbose>=1</tt> and continue
 Apop_stopif(x < 0,  , 1, "warning: x is %g.", x);

If apop_opts.stop_on_warning is nonzero and not 'v', then a failed test halts via abort(), even if the apop_opts.verbose level is set so that the warning message doesn't print to screen. Use this when running via debugger.
If apop_opts.stop_on_warning is 'v', then a failed test halts via abort() iff the verbosity level is high enough to print the error.

#define Apop_stopif	(	test,
		onfail,
		level,
		...
	)

Execute an action and print a message to the current FILE handle held by apop_opts.log_file (default: stderr).

Parameters

test	The expression that, if true, triggers the action.
onfail	If the assertion fails, do this. E.g., `out->error='x'; return GSL_NAN`. Notice that it is OK to include several lines of semicolon-separated code here, but if you have a lot to do, the most readable option may be `goto outro`, plus an appropriately-labeled section at the end of your function.
level	Print the warning message only if apop_opts.verbose is greater than or equal to this. Zero usually works, but for minor infractions use one, or for more verbose debugging output use 2.
...	The error message in printf form, plus any arguments to be inserted into the printf string. I'll provide the function name and a carriage return.

Some examples:

 //the typical case, stopping function execution:
 Apop_stopif(isnan(x), return NAN, 0, "x is NAN; failing");
 
 //Mark a flag, go to a cleanup step
 Apop_stopif(x < 0, needs_cleanup=1; goto cleanup, 0, "x is %g; cleaning up and exiting.", x);
 
 //Print a diagnostic iff <tt>apop_opts.verbose>=1</tt> and continue
 Apop_stopif(x < 0,  , 1, "warning: x is %g.", x);

If apop_opts.stop_on_warning is nonzero and not 'v', then a failed test halts via abort(), even if the apop_opts.verbose level is set so that the warning message doesn't print to screen. Use this when running via debugger.
If apop_opts.stop_on_warning is 'v', then a failed test halts via abort() iff the verbosity level is high enough to print the error.

#define Apop_stopif	(	test,
		onfail,
		level,
		...
	)

Execute an action and print a message to the current FILE handle held by apop_opts.log_file (default: stderr).

Parameters

test	The expression that, if true, triggers the action.
onfail	If the assertion fails, do this. E.g., `out->error='x'; return GSL_NAN`. Notice that it is OK to include several lines of semicolon-separated code here, but if you have a lot to do, the most readable option may be `goto outro`, plus an appropriately-labeled section at the end of your function.
level	Print the warning message only if apop_opts.verbose is greater than or equal to this. Zero usually works, but for minor infractions use one, or for more verbose debugging output use 2.
...	The error message in printf form, plus any arguments to be inserted into the printf string. I'll provide the function name and a carriage return.

Some examples:

 //the typical case, stopping function execution:
 Apop_stopif(isnan(x), return NAN, 0, "x is NAN; failing");
 
 //Mark a flag, go to a cleanup step
 Apop_stopif(x < 0, needs_cleanup=1; goto cleanup, 0, "x is %g; cleaning up and exiting.", x);
 
 //Print a diagnostic iff <tt>apop_opts.verbose>=1</tt> and continue
 Apop_stopif(x < 0,  , 1, "warning: x is %g.", x);

If apop_opts.stop_on_warning is nonzero and not 'v', then a failed test halts via abort(), even if the apop_opts.verbose level is set so that the warning message doesn't print to screen. Use this when running via debugger.
If apop_opts.stop_on_warning is 'v', then a failed test halts via abort() iff the verbosity level is high enough to print the error.

#define Apop_subm	(	matrix_to_view,
		srow,
		scol,
		nrows,
		ncols
	)

Generate a view of a submatrix within a gsl_matrix. Like Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.

Parameters

data_to_view	The root matrix
srow	the first row (in the root matrix) of the top of the submatrix
scol	the first column (in the root matrix) of the left edge of the submatrix
nrows	number of rows in the submatrix
ncols	number of columns in the submatrix

Returns: An automatically-allocated view of type gsl_matrix.

#define Apop_subm	(	matrix_to_view,
		srow,
		scol,
		nrows,
		ncols
	)

Generate a view of a submatrix within a gsl_matrix. Like Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.

Parameters

data_to_view	The root matrix
srow	the first row (in the root matrix) of the top of the submatrix
scol	the first column (in the root matrix) of the left edge of the submatrix
nrows	number of rows in the submatrix
ncols	number of columns in the submatrix

Returns: An automatically-allocated view of type gsl_matrix.

#define Apop_subm	(	matrix_to_view,
		srow,
		scol,
		nrows,
		ncols
	)

Generate a view of a submatrix within a gsl_matrix. Like Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.

Parameters

data_to_view	The root matrix
srow	the first row (in the root matrix) of the top of the submatrix
scol	the first column (in the root matrix) of the left edge of the submatrix
nrows	number of rows in the submatrix
ncols	number of columns in the submatrix

Returns: An automatically-allocated view of type gsl_matrix.

#define Apop_subm	(	matrix_to_view,
		srow,
		scol,
		nrows,
		ncols
	)

Generate a view of a submatrix within a gsl_matrix. Like Apop_r, et al., the view is an automatically-allocated variable that is lost once the program flow leaves the scope in which it is declared.

Parameters

data_to_view	The root matrix
srow	the first row (in the root matrix) of the top of the submatrix
scol	the first column (in the root matrix) of the left edge of the submatrix
nrows	number of rows in the submatrix
ncols	number of columns in the submatrix

Returns: An automatically-allocated view of type gsl_matrix.

Function Documentation

apop_data * apop_anova	(	char *	table,
		char *	data,
		char *	grouping1,
		char *	grouping2
	)

This function produces a traditional one- or two-way ANOVA table. It works from data in an SQL table, using queries of a form like select data from table group by grouping1, grouping2.

Parameters

table	The table to be queried. Anything that can go in an SQL `from` clause is OK, so this can be a plain table name or a temp table specification like `(select ... )`, with parens.
data	The name of the column holding the count or other such data
grouping1	The name of the first column by which to group data
grouping2	If this is `NULL`, then the function will return a one-way ANOVA. Otherwise, the name of the second column by which to group data in a two-way ANOVA.

int apop_arms_draw	(	double *	out,
		gsl_rng *	r,
		apop_model *	m
	)

Adaptive rejection Metropolis sampling, to make random draws from a univariate distribution.

The author, Wally Gilks, explains on http://www.amsta.leeds.ac.uk/~wally.gilks/adaptive.rejection/web_page/Welcome.html , that ``ARS works by constructing an envelope function of the log of the target density, which is then used in rejection sampling (see, for example, Ripley, 1987). Whenever a point is rejected by ARS, the envelope is updated to correspond more closely to the true log density, thereby reducing the chance of rejecting subsequent points. Fewer ARS rejection steps implies fewer point-evaluations of the log density.''

It accepts only functions with univariate inputs. I.e., it will put a single value into a 1x1 apop_data set, and then evaluate the log likelihood at that point. For multivariate situations, see apop_model_metropolis.

It is currently the default for the apop_draw function given a univariate model, so you can just call that if you prefer.

There are a great number of parameters, in the apop_arms_settings structure. The structure also holds a history of the points tested to date. That means that the system will be more accurate as more draws are made. It also means that if the parameters change, or you use apop_model_copy, you should call Apop_settings_rm_group(your_model, apop_arms) to clear the model of points that are not valid for a different situation.

gsl_vector * apop_array_to_vector	(	double *	in,
		int	size
	)

Copies a one-dimensional array to a gsl_vector. The input array is undisturbed.

Parameters

in	An array of `double`s. (No default. Must not be `NULL`);
size	How long `line` is. If this is zero or omitted, I'll guess using the `sizeof(line)/sizeof(line[0])` trick, which will work for most arrays allocated using `double []` and won't work for those allocated using `double *`. (default = auto-guess)

Returns: A gsl_vector, allocated and filled with a copy of (not a pointer to) the input data.

If you send in a NULL vector, you get a NULL pointer in return. I warn you of this if apop_opts.verbosity >=1 .

This function uses the Designated initializers syntax for inputs.
See also
apop_data_falloc

apop_model * apop_beta_from_mean_var	(	double	m,
		double	v
	)

The Beta distribution is useful for modeling because it is bounded between zero and one, and can be either unimodal (if the variance is low) or bimodal (if the variance is high), and can have either a slant toward the bottom or top of the range (depending on the mean).

The distribution has two parameters, typically named $\alpha$ and $\beta$ , which can be difficult to interpret. However, there is a one-to-one mapping between (alpha, beta) pairs and (mean, variance) pairs. Since we have good intuition about the meaning of means and variances, this function takes in a mean and variance, calculates alpha and beta behind the scenes, and returns the appropriate Beta distribution.

Parameters

m	The mean the Beta distribution should have. Notice that m is in [0,1].
v	The variance which the Beta distribution should have. It is in (0, 1/12), where (1/12) is the variance of a Uniform(0,1) distribution. Funny things happen with variance near 1/12 and mean far from 1/2.

Returns: Returns an apop_model produced by copying the apop_beta model and setting its parameters appropriately.

Exceptions

out->error=='r' Range error: mean is not within [0, 1].

apop_data * apop_bootstrap_cov	(	apop_data *	data,
		apop_model *	model,
		gsl_rng *	rng,
		int	iterations,
		char	keep_boots,
		char	ignore_nans,
		apop_data **	boot_store
	)

Give me a data set and a model, and I'll give you the bootstrapped covariance matrix of the parameter estimates.

Parameters

data	The data set. An `apop_data` set where each row is a single data point. (No default)
model	An apop_model, whose `estimate` method will be used here. (No default)
iterations	How many bootstrap draws should I make? (default: 1,000)
rng	An RNG that you have initialized, probably with `apop_rng_alloc`. (Default: an RNG from apop_rng_get_thread)
boot_store	If not `NULL`, put the list of drawn parameter values here, with one parameter set per row. Sample use: 1 apop_data *boots; 2 apop_bootstrap_cov(data, model, .boot_store=&boots); 3 apop_data_print(boots); The rows are packed via apop_data_pack, so use apop_data_unpack if needed. (Default: `NULL`)
ignore_nans	If `'y'` and any of the elements in the estimation return `NaN`, then I will throw out that draw and try again. If `'n'`, then I will write that set of statistics to the list, `NaN` and all. I keep count of throw-aways; if there are more than `iterations` elements thrown out, then I throw an error and return with estimates using data I have so far. That is, I assume that `NaNs` are rare edge cases; if they are as common as good data, you might want to rethink how you are using the bootstrap mechanism. (Default: 'n')

Returns: An apop_data set whose matrix element is the estimated covariance matrix of the parameters.

Exceptions

out->error=='n'	`NULL` input data.
out->error=='N'	`too` many NaNs.

This function uses the Designated initializers syntax for inputs.

This example is a sort of demonstration of the Central Limit Theorem. The model is a simulation, where each call to the estimation routine produces the mean/std dev of a set of draws from a Uniform Distribution. Because the simulation takes no inputs, apop_bootstrap_cov simply re-runs the simulation and calculates a sequence of mean/std dev pairs, and reports the covariance of that generated data set.

#include <apop.h>
// Find the μ/σ  of a set of 10 draws from a Uniform(-1, 1)
void sim_step(apop_data *none, apop_model *m){
    int sub_draws = 20;
    static apop_model *unif;
    if (!unif) unif = apop_model_set_parameters(apop_uniform, -1, 1);
    apop_data *draws= apop_model_draws(unif, sub_draws);
    apop_data_set(m->parameters, 0, .val=apop_mean(Apop_cv(draws, 0)));
    apop_data_set(m->parameters, 1, .val=sqrt(apop_var(Apop_cv(draws, 0))));
    apop_data_add_names(m->parameters, 'r', "μ", "σ");
    apop_data_free(draws);
}
apop_model *clt_sim = &(apop_model){.name="CLT simulation", .vsize=2, .estimate=sim_step};
int main(){
    apop_data *boots;
    apop_data * boot_cov = apop_bootstrap_cov(NULL, clt_sim, .iterations=1000, .boot_store=&boots);
    apop_data_print(boot_cov);
    apop_data *means = Apop_c(boots, 0);
    printf("\nStats via Normal model:\n");
    apop_data *np = apop_estimate(means, apop_normal)->parameters;
    np->more = NULL; //rm covariance of statistics.
    apop_data_print(np);
    //σ from the Normal should == sqrt(cov(μ_boot))
    assert(fabs(sqrt(apop_data_get(boot_cov,0,0)) - apop_data_get(np, 1)) < 1e-4);
}

See also: apop_jackknife_cov

double apop_cdf	(	apop_data *	d,
		apop_model *	m
	)

Input a one-row data point/vector and a model; returns the area of the model's PDF beneath the given point.

By default, make random draws from the PDF and return the percentage of those draws beneath or equal to the given point. Many models have closed-form solutions that make no use of random draws.

See also apop_cdf_settings, which is the structure used to store draws already made (which means the second, third, ... calls to this function will take much less time than the first), the gsl_rng, and the number of draws to be made. These are handled without your involvement, but if you would like to change the number of draws from the default, add this group before calling apop_cdf :

1 Apop_model_add_group(your_model, apop_cdf, .draws=1e5, .rng=my_rng);

2 double cdf_value = apop_cdf(your_data_point, your_model);

Only the first row of the input apop_data set is used. Note that if you need to view row 20 of a data set as a one-row data set, use Apop_r.

Here are many examples using common, mostly symmetric distributions.

#include <apop.h>
int main(){
    //Set up an apop_data set with only one number.
    //Most of these functions will only look at the first data point encountered.
    apop_data *onept = apop_data_falloc((1), 23);
    apop_model *norm = apop_model_set_parameters(apop_normal, 23, 138.8);
    double val = apop_cdf(onept, norm);
    assert(fabs(val - 0.5) < 1e-4);
    double tolerance = 1e-8;
    //Macroizing the sample routine above:
    #define model_val_cdf(model, value, cdf_result) {   \
        apop_data_set(onept, .val=(value));             \
        assert(fabs((apop_cdf(onept, model))-(cdf_result))< tolerance);   \
    }
    apop_model *uni = apop_model_set_parameters(apop_uniform, 20, 26);
    model_val_cdf(uni, 0, 0);
    model_val_cdf(uni, 20, 0);
    model_val_cdf(uni, 21, 1./6);
    model_val_cdf(uni, 23, 0.5);
    model_val_cdf(uni, 25, 5./6);
    model_val_cdf(uni, 26, 1);
    model_val_cdf(uni, 260, 1);
    //Improper uniform always returns 1/2.
    model_val_cdf(apop_improper_uniform, 0, 0.5);
    model_val_cdf(apop_improper_uniform, 228, 0.5);
    model_val_cdf(apop_improper_uniform, INFINITY, 0.5);
    apop_model *binom = apop_model_set_parameters(apop_binomial, 2001, 0.5);
    model_val_cdf(binom, 0, 0);
    model_val_cdf(binom, 1000, .5);
    model_val_cdf(binom, 2000, 1);
    apop_model *bernie = apop_model_set_parameters(apop_bernoulli, 0.75);
    //p(0)=.25; p(1)=.75; that determines the CDF.
    //Notice that the CDF's integral is over a closed interval.
    model_val_cdf(bernie, -1, 0);
    model_val_cdf(bernie, 0, 0.25);
    model_val_cdf(bernie, 0.1, 0.25);
    model_val_cdf(bernie, .99, 0.25);
    model_val_cdf(bernie, 1, 1);
    model_val_cdf(bernie, INFINITY, 1);
    //alpha=beta -> symmetry
    apop_model *beta = apop_model_set_parameters(apop_beta, 2, 2);
    model_val_cdf(beta, -INFINITY, 0);
    model_val_cdf(beta, 0.5, 0.5);
    model_val_cdf(beta, INFINITY, 1);
    //This beta distribution -> uniform
    apop_model *beta_uni = apop_model_set_parameters(apop_beta, 1, 1);
    model_val_cdf(beta_uni, 0, 0);
    model_val_cdf(beta_uni, 1./6, 1./6);
    model_val_cdf(beta_uni, 0.5, 0.5);
    model_val_cdf(beta_uni, 1, 1);
    beta_uni->cdf = NULL; //With no closed-form CDF; make random draws to estimate the CDF.
    Apop_model_add_group(beta_uni, apop_cdf, .draws=1e6); //extra draws to improve accuracy, but we have to lower our tolerance anyway.
    tolerance=1e-3;
    model_val_cdf(beta_uni, 0, 0);
    model_val_cdf(beta_uni, 1./6, 1./6);
    model_val_cdf(beta_uni, 0.5, 0.5);
    model_val_cdf(beta_uni, 1, 1);
    //sum of three symmetric distributions: still symmetric.
    apop_model *sum_of_three = apop_model_mixture(beta, apop_improper_uniform, beta_uni);
    model_val_cdf(sum_of_three, 0.5, 0.5);
    apop_data *threepts = apop_data_falloc((3,1), -1, 0, 1);
    apop_model *kernels = apop_estimate(threepts, apop_kernel_density);
    model_val_cdf(kernels, -5, 0);
    model_val_cdf(kernels, 0, 0.5);
    model_val_cdf(kernels, 10, 1);
}

void apop_crosstab_to_db	(	apop_data *	in,
		char *	tabname,
		char *	row_col_name,
		char *	col_col_name,
		char *	data_col_name
	)

See apop_db_to_crosstab for the storyline; this is the complement, which takes a crosstab and writes its values to the database.

For example, I would take

	c0	c1
r0	2	3
r1	0	4

and do the following writes to the database:

 insert into your_table values ('r0', 'c0', 2);
 insert into your_table values ('r0', 'c1', 3);
 insert into your_table values ('r1', 'c0', 3);
 insert into your_table values ('r1', 'c1', 4);

If your data set does not have names (or not enough names), I will use the scheme above, filling in names of the form r0, r1, ... c0, c1, .... Text columns get their own names, t0, t1.

This function handles only the matrix and text.

void apop_data_add_named_elmt	(	apop_data *	d,
		char *	name,
		double	val
	)

A convenience function to add a named element to a data set. Many of Apophenia's testing procedures use this to easily produce a column of named parameters. It is public as a convenience.

Parameters

d	The apop_data structure. Must not be `NULL`, but may be blank (as per allocation via apop_data_alloc `( )` ).
name	The name to add
val	the value to add to the set.

I use the position of the last non-empty row name to know where to put the value. If there are two names in the data set, then I will put the new name in the third name slot and the data in the third slot in the vector. If you use this function from start to finish in building your list, then you'll be fine.
If the vector is too short (or NULL), I will call apop_vector_realloc internally to make space.
This fits well with the defaults for apop_data_get. An example:

 apop_data *list = apop_data_alloc();
 apop_data_add_named_elmt(list, "height", 165);
 apop_data_add_named_elmt(list, "weight", 60);
 
 double height = apop_data_get(list, .rowname="height");
 
 //or
 #define Lookup(dataset, key) apop_data_get(dataset, .rowname=#key)
 height = Lookup(list, height);

apop_data * apop_data_add_page	(	apop_data *	dataset,
		apop_data *	newpage,
		const char *	title
	)

Add a page to an apop_data set. It gets a name so you can find it later.

Parameters

dataset	The input data set, to which a page will be added.
newpage	The page to append
title	The name of the new page.

Returns: The new page. I post a warning if I am appending or appending to a NULL data set and apop_opts.verbose >=1 .

See Pages for further notes.

apop_data * apop_data_alloc	(	const size_t	size1,
		const size_t	size2,
		const int	size3
	)

Allocate an apop_data structure.

The typical case is three arguments, like apop_data_alloc(2,3,4): vector size, matrix rows, matrix cols. If the first argument is zero, you get a NULL vector.
Two arguments, apop_data_alloc(2,3), would allocate just a matrix, leaving the vector NULL.
One argument, apop_data_alloc(2), would allocate just a vector, leaving the matrix NULL.
Zero arguments, apop_data_alloc(), will produce a basically blank set, with out->matrix and out->vector set to NULL.

For allocating the text part, see apop_text_alloc.

The weights vector is set to NULL. If you need it, allocate it via

1 d->weights = gsl_vector_alloc(row_ct);

Returns: The apop_data structure, allocated and ready to be populated with data.

Exceptions

out->error=='a' Allocation error. The matrix, vector, or names couldn't be malloced, which probably means that you requested a very large data set.

An apop_data struct, by itself, is about 72 bytes. If I can't allocate that much memory, I return NULL. But if even this much fails, your computer may be on fire and you should go put it out.

This function uses the Designated initializers syntax for inputs.

See also: apop_data_calloc

apop_data * apop_data_calloc	(	const size_t	size1,
		const size_t	size2,
		const int	size3
	)

Allocate a apop_data structure, to be filled with data; set everything in the allocated portion to zero. See apop_data_alloc for details.

Returns: The apop_data structure, allocated and zeroed out.

Exceptions

out->error=='a'

allocation error; probably out of memory.

This function uses the Designated initializers syntax for inputs.

See also: apop_data_alloc

apop_data * apop_data_copy ( const apop_data * in )

Copy one apop_data structure to another. That is, all data is duplicated.

Basically a front-end for apop_data_memcpy for those who prefer this sort of syntax.

If the data set has a more pointer, that will be followed and subsequent pages copied as well.

Parameters

in	the input data

Returns: a structure that this function will allocate and fill. If input is NULL, then this will be NULL.

Exceptions

out.error='a'	Allocation error.
out.error='c'	Cyclic link: `D->more == D` (may be later in the chain, e.g., `D->more->more = D->more`) You'll have only a partial copy.
out.error='d'	Dimension error; should never happen.
out.error='p'	Missing part error; should never happen.

If the input data set has an error, then I will copy it anyway, including the error flag (which might be overwritten). I print a warning if the verbosity level is >=1.

apop_data * apop_data_correlation ( const apop_data * in )

Returns the matrix of correlation coefficients $(\sigma^2_{xy}/(\sigma_x\sigma_y))$ relating each column with each other.

Parameters

in	A data matrix: rows are observations, columns are variables. If you give me a weights vector, I'll use it.

Returns: Returns the square variance/covariance matrix with dimensions equal to the number of input columns.

Exceptions

out->error='a' Allocation error.

apop_data * apop_data_covariance ( const apop_data * in )

Returns the sample variance/covariance matrix relating each column of the matrix to each other column.

Parameters

in	An apop_data set. If the weights vector is set, I'll take it into account.

This is the sample covariance—dividing by , not . If you need the population variance, use
1 apop_data *popcov = apop_data_covariance(indata);
2 int size=indata->matrix->size1;
3 gsl_matrix_scale(popcov->matrix, size/(size-1.));

Returns: Returns an apop_data set the variance/covariance matrix.

Exceptions

out->error='a' Allocation error.

char apop_data_free_base ( apop_data * freeme )

Free the elements of the given apop_data set and then the apop_data set itself. Intended to be used by apop_data_free, a macro that calls this to free elements, then sets the value to NULL.

apop_data_free is a macro that calls this function and, on success, sets the input pointer to NULL. For typical cases, that's slightly more useful than this function.

Exceptions

freeme.error='c' Circular linking is against the rules. If freeme->more == freeme, then I set freeme.error='c' and return. If you send in a structure like A -> B -> B, then both data sets A and B will be marked.

Returns: 0 on OK, 'c' on error.

double apop_data_get	(	const apop_data *	data,
		size_t	row,
		int	col,
		const char *	rowname,
		const char *	colname,
		const char *	page
	)

Returns the data element at the given point.

In case of error (probably that you asked for a data point out of bounds), returns NAN. See the set/get page for details and examples.

Parameters

data	The data set. Must not be `NULL`.
row	The row number of the desired element. If `rowname==NULL`, default is zero.
col	The column number of the desired element. -1 indicates the vector. If `colname==NULL`, default is zero if the `->matrix` element is not `NULL` and -1 if the `->matrix` element is `NULL` and the `->vector` element is not.
rowname	The row name of the desired element. If `NULL`, use the row number.
colname	The column name of the desired element. If `NULL`, use the column number.
page	The case-insensitive name of the page on which the element is found. If `NULL`, use first page.

Returns: The value at the given location.

apop_data * apop_data_get_factor_names	(	apop_data *	data,
		int	col,
		char	type
	)

Factor names are stored in an auxiliary table with a name like "<categories for your_var>". Producing this name is annoying (and prevents us from eventually making it human-language independent), so use this function to get the list of factor names.

Parameters

data	The data set. (No default, must not be `NULL`)
col	The column in the main data set whose name I'll use to check for the factor name list. Vector==-1. (default=0)
type	If you are referring to a text column, use 't'. (default='d')

Returns: A pointer to the page in the data set with the given factor names.

This function uses the Designated initializers syntax for inputs.

apop_data * apop_data_get_page	(	const apop_data *	data,
		const char *	title,
		const char	match
	)

It's good form to get a page from your data set by name, because you may not know the order for the pages, and the stepping through makes for dull code anyway (apop_data *page = dataset; while (page->more) page= page->more;).

Parameters

data	The apop_data set to use. No default; if `NULL`, gives a warning if `apop_opts.verbose >=1` and returns `NULL`.
title	The name of the page to retrieve. Default=`"<Info>"`, which is the name of the page of additional estimation information returned by estimation routines (log likelihood, status, AIC, BIC, confidence intervals, ...).
match	If `'c'`, case-insensitive match (via `strcasecmp`); if `'e'`, exact match, if `'r'` regular expression substring search (via apop_regex). Default=`'c'`.

Returns: The page whose title matches what you gave me. If I don't find a match, return NULL.

This function uses the Designated initializers syntax for inputs.

apop_data * apop_data_listwise_delete	(	apop_data *	d,
		char	inplace
	)

If there is an NaN anywhere in the row of data (including the matrix, the vector, the weights, and the text) then delete the row from the data set.

If every row has a NaN, then this returns NULL.
If apop_opts.nan_string is not NULL, then I will make case-insensitive comparisons to the text elements to check for bad data as well.
If inplace = 'y', then I'll free each element of the input data set and refill it with the pruned elements. I'll still take up (up to) twice the size of the data set in memory during the function. If every row has a NaN, then your apop_data set will end up with NULL vector, matrix, .... if inplace = 'n', then the original data set is left where it was, though internal elements may be moved.
I only look at the first page of data (i.e. the more element is ignored).
Listwise deletion is often not a statistically valid means of dealing with missing data. It is typically better to impute the data (preferably multiple times). See apop_ml_impute for a less-invalid means, or Tea for survey imputation for heavy-duty survey editing and imputation.
This function uses the Designated initializers syntax for inputs.

Parameters

d	The data, with NaNs
inplace	If `'y'`, clear out the pointer-to-apop_data that you sent in and refill with the pruned data. If `'n'`, leave the set alone and return a new data set. Default=`'n'`.

Returns: A (potentially shorter) copy of the data set, without NaNs. If inplace=='y', a pointer to the input, which was shortened in place. If the entire data set is cleared out, then this will be NULL.

See also: apop_data_rm_rows

void apop_data_memcpy	(	apop_data *	out,
		const apop_data *	in
	)

Copy one apop_data structure to another.

This function does not allocate the output structure or the vector, matrix, text, or weights elements—I assume you have already done this and got the dimensions right. I will assert that there is at least enough room in the destination for your data, and fail if the copy would write more elements than there are bins.

If you want space allocated or are unsure about dimensions, use apop_data_copy.
If both in and out have a more pointer, also copy subsequent page(s).
You can use the subsetting macros, Apop_r, Apop_rs, Apop_c, and so on, to copy within a data set:

 //Copy the contents of row i of mydata to row j.
 apop_data *fromrow = Apop_r(mydata, i);
 apop_data *torow = Apop_r(mydata, j);
 apop_data_memcpy(torow, fromrow);
 
 // or just
 apop_data_memcpy(Apop_r(mydata, i), Apop_r(mydata, j));

Parameters

out	A structure that this function will fill. Must be preallocated with the appropriate sizes.
in	The input data.

Exceptions

out.error='d'	Dimension error.
out.error='p'	Part missing; e.g., in->matrix exists but out->matrix doesn't.

gsl_vector * apop_data_pack	(	const apop_data *	in,
		gsl_vector *	out,
		char	more_pages,
		char	use_info_pages
	)

This function takes in an apop_data set and writes it as a single column of numbers, outputting a gsl_vector. It is valid to use the out_vector->data element as an array of doubles of size out_vector->data->size (i.e. its stride==1).

The complement is apop_data_unpack. I.e.,

1 apop_data_unpack(apop_data_pack(in_data), data_copy)

will return the original data set (stripped of text and names).

Parameters

in	an `apop_data` set. No default; if `NULL`, return `NULL`.
out	If this is not `NULL`, then put the output here. The dimensions must match exactly. If `NULL`, then allocate a new data set. Default = `NULL`.
more_pages	If `'y'`, then follow the `->more` pointer to fill subsequent pages; else fill only the first page. Informational pages will still be ignored, unless you set `.use_info_pages='y'` as well. Default = `'y'`.
use_info_pages	Pages in XML-style brackets, such as `<Covariance>` will be ignored unless you set `.use_info_pages='y'`. Be sure that this is set to the same thing when you both pack and unpack. Default: `'n'`.

Returns: A gsl_vector with the vector data (if any), then each row of data (if any), then the weights (if any), then the same for subsequent pages (if any && .more_pages=='y'). If out is not NULL, then this is out.

Exceptions

NULL	If you give me a vector as input, and its size is not correct, returns `NULL`.

This function uses the Designated initializers syntax for inputs.

apop_data * apop_data_pmf_compress ( apop_data * in )

Say that you have added a long list of observations to a single apop_data set, meaning that each row has weight one. There are a huge number of duplicates, perhaps because there are a handful of types that keep repeating:

Vector value	Text name	Weights

12	Dozen	1
1	Single	1
2	Pair	1
2	Pair	1
1	Single	1
1	Single	1
2	Pair	1
2	Pair	1

Use this function to reduce this to a set of distinct values, with their weights adjusted accordingly:

Vector value	Text name	Weights

12	Dozen	1
1	Single	3
2	Pair	4

Parameters

in	An apop_data set that may have duplicate rows. As above, the data may be in text and/or numeric formats.

Returns: Your input is changed in place, via apop_data_rm_rows, so use apop_data_copy before calling this function if you need to retain the original format. For your convenience, this function returns a pointer to your original data, which has now been pruned. If there is a weights vector, I will add those weights together as duplicates are merged. If there is no weights vector, I will create one, which is initially set to one for all values, and then aggregated as above.

void apop_data_print	(	const apop_data *	data,
		Output_declares
	)

Print an apop_data set to a file, the database, or the screen, as determined by the .output_type.

See apop_prep_output for more on how printing settings are set.
See Legible output for more details and examples.
See About SQL, the syntax for querying databases for notes on writing an apop_data set to the database.
This function uses the Designated initializers syntax for inputs.

apop_data * apop_data_prune_columns_base	(	apop_data *	d,
		char **	colnames
	)

Keep only the columns of a data set that you name. This is the function called internally by the apop_data_prune_columns macro. In most cases, you'll want to use that macro. An example of the two uses demonstrating the difference:

 apop_data_prune_columns(d, "mean", "median");
 
 char *list[] = {"mean", "median", NULL};
 apop_data_prune_columns_base(d, list);

Parameters

d	The data set to prune.
colnames	A NULL-terminated list of names to retain.

Returns: A pointer to the input data set, now pruned.

See also: apop_data_rm_columns

double * apop_data_ptr	(	apop_data *	data,
		int	row,
		int	col,
		const char *	rowname,
		const char *	colname,
		const char *	page
	)

Get a pointer to an element of an apop_data set.

If a NULL vector or matrix (as the case may be), or the row/column you requested is outside bounds, return NULL.
See the set/get page for details.

Parameters

data	The data set. Must not be `NULL`.
row	The row number of the desired element. If `rowname==NULL`, default is zero.
col	The column number of the desired element. -1 indicates the vector. If `colname==NULL`, default is zero.
rowname	The row name of the desired element. If `NULL`, use the row number.
colname	The column name of the desired element. If `NULL`, use the column number.
page	The case-insensitive name of the page on which the element is found. If `NULL`, use first page.

Returns: A pointer to the element.

apop_data * apop_data_rank_compress	(	apop_data *	in,
		int	min_bins
	)

One often finds data where the column indicates the value of the data point. There may be two columns, and a mark in the first indicates a miss while a mark in the second is a hit. Or say that we have the following list of observations:

1 2 3 3 2 1 1 2 1 1 2 1 1

Then we could write this as:

 0  1  2  3
 ----------
 0  6  4  2

because there are six 1s observed, four 2s observed, and two 3s observed. We call this rank format, because 1 (or zero) is typically the most common, 2 is second most common, et cetera.

This function takes in a list of observations, and aggregates them into a single row in rank format.

For the complement, see apop_data_rank_expand.

See also apop_data_to_factors to convert real numbers or text into a matrix of categories.

Parameters

in	The input apop_data set. If `NULL`, return `NULL`.
min_bins	If this is omitted, the number of bins is simply the largest number found. So if there are bins {0, 1, 2} and your data set happens to consist of `0 0 1 1 0`, then I won't know to generate results with three bins where the last bin has a count of zero. Set `.min_bins=2` to ensure that bin is included.

/* A round trip: generate Zipf-distributed draws, summarize them to a single list of
rankings, then expand the rankings to a list of single entries. The sorted list at the end
of this should be identical to the (sorted) original list. */
#include <apop.h>
int main(){
    gsl_rng *r = apop_rng_alloc(2342);
    int i, length = 1e4;
    apop_model *a_zipf = apop_model_set_parameters(apop_zipf, 3.2);
    apop_data *draws = apop_data_alloc(length);
    for (i=0; i< length; i++)
        apop_draw(apop_data_ptr(draws, i, -1), r, a_zipf);
    apop_data *by_rankings = apop_data_rank_compress(draws);
    //The first row of the matrix is suitable for plotting.
    //apop_data_show(by_rankings);
    assert(apop_matrix_sum(by_rankings->matrix) == length);
    apop_data *re_expanded = apop_data_rank_expand(by_rankings);
    gsl_sort_vector(draws->vector);
    gsl_sort_vector(re_expanded->vector);
    assert(apop_vector_distance(draws->vector, re_expanded->vector) < 1e-5);
}

This function uses the Designated initializers syntax for inputs.

apop_data * apop_data_rank_expand ( apop_data * in )

The complement to this is apop_data_rank_compress; see that function's documentation for the story and an example.

This function takes in a data set where the zeroth column includes the count(s) of times that zero was observed, the first gives the count(s) of times that one was observed, et cetera. It outputs a data set whose vector element includes a list that has exactly the given frequency of zeros, ones, et cetera.

void apop_data_rm_columns	(	apop_data *	d,
		int *	drop
	)

Remove the columns of the apop_data set corresponding to a nonzero value in the drop vector.

The returned data structure looks like it was modified in place, but the data matrix and the names are duplicated before being pared down, so if your data is taking up more than half of your memory, this may not work.

Parameters

d	The apop_data structure to be pared down.
drop	An array of ints. If use[7]==1, then column seven will be cut from the output. A reminder: `calloc(in->size2 , sizeof(int))` will fill your array with zeros on allocation, and `memset(use, 1, in->size2 * sizeof(int))` will quickly fill an array of ints with nonzero values. apop_data_rm_rows

apop_data * apop_data_rm_page	(	apop_data *	data,
		const char *	title,
		const char	free_p
	)

Remove the first page from an apop_data set that matches a given name.

Parameters

data	The input data set, from which a page will be removed. No default. If `NULL`, maybe print a warning (see below).
title	The case-insensitive name of the page to remove. Default: `"<Info>"`
free_p	If `'y'`, then apop_data_free the page. Default: `'y'`.

Returns: If not freed, a pointer to the apop_data page that I just pulled out. Thus, you can use this to pull a single page from a data set. I set that page's more pointer to NULL, to minimize any confusion about more-than-linear linked list topologies. If free_p=='y' (the default) or the page is not found, return NULL.

I don't check the first page, so there's no concern that the head of your list of pages will move. Again, the intent of the ->more pointer in the apop_data set is not to fully implement a linked list, but primarily to allow you to staple auxiliary information to a main data set.

If I don't find the page you want, I return NULL, and maybe print a warning; see below.

For the two above cases where a warning may be printed, if the page is to be returned and apop_opts.verbose >= 1 , print a warning. If the page is to be freed and apop_opts.verbose >= 2 , print a warning.

The remaining more pointers in the apop_data set are adjusted accordingly.

apop_data * apop_data_rm_rows	(	apop_data *	in,
		int *	drop,
		apop_fn_ir	do_drop,
		void *	drop_parameter
	)

Remove the rows set to one in the drop vector or for which the do_drop function returns one.

Parameters

in	the apop_data structure to be pared down
drop	a vector with as many elements as the max of the vector, matrix, or text parts of `in`, with a one marking those rows to be removed.
do_drop	A function that returns one for rows to drop and zero for rows to not drop. A sample function: 1 int your_drop_function(apop_data onerow, void extra_param){ 2 return gsl_isnan(apop_data_get(onerow)) \|\| 3 !strcmp(onerow->text[0][0], "Uninteresting data point"); 4 } apop_data_rm_rows will use Apop_r to get a subview of the input data set of height one, and send that subview to this function (and since arguments typically default to zero, you don't have to write out things like apop_data_get `(onerow, .row=0, .col=0)`, which can help to keep things readable).
drop_parameter	If your `do_drop` function requires additional input, put it here and it will be passed through.

Returns: Returns a pointer to the input data set, now pruned.

If all the rows are to be removed, then you will wind up with the same apop_data set, with NULL vector, matrix, weight, and text. Therefore, you may wish to check for NULL elements after use. I remove rownames, but leave the other names, in case you want to add new data rows.
The typical use is to provide only a list or only a function. If both are NULL, I return without doing anything, and print a warning if apop_opts.verbose >=2. If you provide both, I will drop the row if either the vector has a one in that row's position, or if the function returns a nonzero value.
This function uses the Designated initializers syntax for inputs.
See also
apop_data_listwise_delete, apop_data_rm_columns

int apop_data_set	(	apop_data *	data,
		size_t	row,
		int	col,
		const double	val,
		const char *	colname,
		const char *	rowname,
		const char *	page
	)

Set a data element. See the set/get page for details and examples.

Returns: 0=OK, -1=error: couldn't find row/column name, or you asked for a location outside the vector/matrix bounds.

The error codes for out-of-bounds errors are thread-safe iff you are have a C11-compliant compiler (thanks to the _Thread_local keyword) or a version of GCC with the __thread extension enabled.

Set weights via gsl_vector_set(your_data->weights, row, val);.
Set text elements via apop_text_set.

Parameters

data	The data set. Must not be `NULL`.
row	The row number of the desired element. If `rowname==NULL`, default is zero.
col	The column number of the desired element. -1 indicates the vector. If `colname==NULL`, default is zero.
rowname	The row name of the desired element. If `NULL`, use the row number.
colname	The column name of the desired element. If `NULL`, use the column number.
page	The case-insensitive name of the page on which the element is found. If `NULL`, use first page.
val	The value to give the point.

This function uses the Designated initializers syntax for inputs.

apop_data * apop_data_sort	(	apop_data *	data,
		apop_data *	sort_order,
		char	asc,
		char	inplace,
		double *	col_order
	)

Sort an apop_data set on an arbitrary sequence of columns.

The sort_order set is a one-row data set that should look like the data set being sorted. The easiest way to generate it is to use Apop_r to pull one row of the table, then copy and fill it. For each column you want used in the sort, assign a ranking giving whether the column should be sorted first, second, .... Columns you don't want used in the sorting should be set to NAN. Ties are broken by the earlier element in the default order (see below).

E.g., to sort by the last column of a five-column matrix first, then the next-to-last column, then the next-to-next-to-last, then by the first text column, then by the second text column:

 apop_data *sort_order = apop_data_copy(Apop_r(data, 0));
 sort_order->vector = NULL; //so it will be skipped.
 Apop_data_fill(sort_order, NAN, NAN, 3, 2, 1);
 apop_text_set(sort_order, 0, 0, "4");
 apop_text_set(sort_order, 0, 1, "5");
 apop_data_sort(data, sort_order);

To determine which columns are sorted at which step, I use only comparisons, not the actual numeric values. For example, (1, 2, 3) and (-1.32, 0, 27) work identically. For text, I use atof to convert the your text to a number, as in the example above that set text values of "4" and "5". A blank string, NaN numeric value, or NULL element in the apop_data set means that column will not be sorted.

Strings are sorted case-insensitively, using strcasecmp. [exercise for the reader: modify the source to use Glib's locale-correct string sorting.]

The setup generates a lexicographic sort using the columns you specify. If you would like a different sort order, such as Euclidian distance to the origin, you can generate a new column expressing your preferred metric, and then sorting on that. See the example below.

Parameters

data	The data set to be sorted. If `NULL`, this function is a no-op that returns `NULL`.
sort_order	An apop_data set describing the order in which columns are used for sorting, as above. If `NULL`, then sort by the vector, then each matrix column, then text, then weights, then row names.
inplace	If 'n', make a copy, else sort in place. (default: 'y').
asc	If 'a', ascending; if 'd', descending. This is applied to all columns; column-by-column application is to do. (default: 'a').
col_order	For internal use only. In your call, it should be `NULL`; you can leave this off your function call entirely and the Designated initializers syntax will takes care of it for you.

Returns: A pointer to the sorted data set. If inplace=='y' (the default), then this is the same as the input set.

A few examples:

#ifdef Datadir
#define DATADIR Datadir
#else
#define DATADIR "."
#endif
#include <apop.h>
#include <unistd.h>
#ifdef Testing
#include "sort_tests.c" //For Apophenia's test suite, some tedious checks that the sorts worked
#endif
//get_distance is for the sort-by-Euclidian distance example below.
double get_distance(gsl_vector *v) {return apop_vector_distance(v);}
int main(){
    apop_text_to_db( DATADIR "/" "amash_vote_analysis.csv" );
    apop_data *d = apop_query_to_mixed_data("mntmtm", "select 1,id,party,contribs/1000.0,vote,ideology from amash_vote_analysis " );
    //use the default order of columns for sorting
    apop_data *sorted = apop_data_sort(d, .inplace='n');
#ifndef Testing
    apop_data_print(sorted);
#else
    check_sorting1(sorted);
#endif
    //set up a specific column order
    apop_data *perm = apop_data_copy(Apop_r(d, 0));
    perm->vector = NULL;
    apop_data_fill(perm, 5, 3, 4);
    apop_text_set(perm, 0, 0, "2");
    apop_text_set(perm, 0, 1, "1");
    apop_data_sort(d, perm);
#ifndef Testing
    apop_data_print(d);
#else
    check_sorting2(d);
#endif
    //sort a list of names
    apop_data *blank = apop_data_alloc();
    apop_data_add_names(blank, 'r', "C", "E", "A");
    apop_data_sort(blank);
    assert(*blank->names->row[0] == 'A');
    assert(*blank->names->row[1] == 'C');
    assert(*blank->names->row[2] == 'E');
    //take each row of the matrix as a vector; store the Euclidian distance to the origin in the vector;
    //sort in descending order.
    apop_data *rowvectors = apop_text_to_data( DATADIR "/" "test_data" );
    apop_map(rowvectors, .fn_v=get_distance, .part='r', .inplace='y');
    apop_data *arow = apop_data_copy(Apop_r(rowvectors, 0));
    arow->matrix=NULL; //sort only by the distance vector
    apop_data_sort(rowvectors, arow, .asc='d');
#ifndef Testing
    apop_data_print(rowvectors);
#else
    double prev = INFINITY;
    for (int i=0; i< rowvectors->vector->size; i++){
        double this = apop_data_get(rowvectors, i, -1);
        assert(this < prev);
        prev = this;
    }
#endif
}

This function uses the Designated initializers syntax for inputs.

apop_data ** apop_data_split	(	apop_data *	in,
		int	splitpoint,
		char	r_or_c
	)

Split one input apop_data structure into two.

For the opposite operation, see apop_data_stack.

Parameters

in	The apop_data structure to split
splitpoint	The index of what will be the first row/column of the second data set. E.g., if this is -1 and `r_or_c=='c'`, then the whole data set will be in the second data set; if this is the length of the matrix then the whole data set will be in the first data set. Another way to put it is that for values between zero and the matrix's size, `splitpoint` will equal the number of rows/columns in the first matrix.
r_or_c	If this is 'r' or 'R', then put some rows in the first data set and some in the second; of 'c' or 'C', split columns into first and second data sets.

Returns: An array of two apop_data sets. If one is empty then a NULL pointer will be returned in that position. For example, for a data set of 50 rows, apop_data **out = apop_data_split(data, 100, 'r') sets out[0] = apop_data_copy(data) and out[1] = NULL.

When splitting at a row, the text is also split.
The more pointer is ignored.
The apop_data->vector is taken to be the -1st element of the matrix.
Weights will be preserved. If splitting by rows, then the top and bottom parts of the weights vector will be assigned to the top and bottom parts of the main data set. If splitting by columns, identical copies of the weights vector will be assigned to both parts.
Data is copied, so you may want to call apop_data_free(in) after this.

apop_data * apop_data_stack	(	apop_data *	m1,
		apop_data *	m2,
		char	posn,
		char	inplace
	)

Put the first data set either on top of or to the left of the second data set.

For the opposite operation, see apop_data_split.

Parameters

m1	the upper/rightmost data set (default = `NULL`)
m2	the second data set (default = `NULL`)
posn	If 'r', stack rows of m1 above rows of m2 if 'c', stack columns of m1 to left of m2's (default = 'r')
inplace	If `'y'`, use apop_matrix_realloc and apop_vector_realloc to modify `m1` in place. Otherwise, allocate a new apop_data set, leaving `m1` undisturbed. (default='n')

Returns: The stacked data, either in a new apop_data set or m1

Exceptions

out->error=='a'	Allocation error.
out->error=='d'	Dimension error; couldn't make a complete copy.

The function returns a new data set, meaning that until you apop_data_free() the original data sets, you will be taking up twice as much memory.
If m1 or m2 are NULL, returns a copy of the other element, and if both are NULL, returns NULL. If m2 is NULL and inplace is 'y', returns the original m1 pointer unmodified.
Text is handled as you'd expect: If 'r', one set of text is stacked on top of the other [number of columns must match]; if 'c', one set of text is set next to the other [number of rows must match].
more is ignored.
If stacking rows on rows, the output vector is the input vectors stacked accordingly. If stacking columns by columns, the output vector is just a copy of the vector of m1 and m2->vector doesn't appear in the output at all.
The same rules for dealing with the vector(s) hold for the vector(s) of weights.
Names are a copy of the names for m1, with the names for m2 appended to the row or column list, as appropriate.
This function uses the Designated initializers syntax for inputs.

apop_data * apop_data_summarize ( apop_data * indata )

Put summary information about the columns of a table (mean, std dev, variance, min, median, max) in a table.

Parameters

indata The table to be summarized. An apop_data structure. May have a weights element.

Returns: An apop_data structure with one row for each column in the original table, and a column for each summary statistic.

Exceptions

out->error='a' Allocation error.

This function gives more columns than you probably want; use apop_data_prune_columns to pick the ones you want to see.

See apop_data_prune_columns for an example.

apop_data * apop_data_to_bins	(	apop_data const *	indata,
		apop_data const *	binspec,
		int	bin_count,
		char	close_top_bin
	)

Create a histogram from data by putting data into bins of fixed width. Your input apop_data set may be multidimensional, and may include both vector and matrix parts, and the bins output will have corresponding dimension.

Parameters

indata	The input data that will be binned, one observation per row. This is copied and the copy will be modified. (No default)
binspec	This is an apop_data set with the same number of columns as `indata`. If you want a fixed size for the bins, then the first row of the bin spec is the bin width for each column. This allows you to specify a width for each dimension, or specify the same size for all with something like: 1 apop_data *binspec = apop_data_copy(Apop_r(indata, 0)); 2 gsl_matrix_set_all(binspec->matrix, 10); //bins of size 10 for all dim.s 3 apop_data_to_bins(indata, binspec); The presumption is that the first bin starts at zero in all cases. You can add a second row to the spec to give the offset for each dimension. (default: NULL)
bin_count	If you don't provide a bin spec, I'll provide this many evenly-sized bins to cover the data set. (Default: $\sqrt{N}$ )
close_top_bin	Normally, a bin covers the range from the point equal to its minimum to points strictly less than the minimum plus the width. if `'y'`, then the top bin includes points less than or equal to the upper bound. This solves the problem of displaying histograms where the top bin is just one point. (default: `'y'` if `binspec==NULL`, else `'n'`)

Returns: A pointer to an apop_data set with the same dimension as your input data. Each cell is an integer giving the bin number into which the cell falls.

If no binspec and no binlist, then a grid with offset equal to the min of the column, and bin size such that it takes $\sqrt{N}$ bins to cover the range to the max element.
The text segment is not binned. The more pointer, if any, is not followed.
If there are weights, they are copied to the output via apop_vector_copy.
Given NULL input, return NULL output. Print a warning if apop_opts.verbose >= 2.

Iff you didn't give me a binspec, then I attach one to the output set as a page named <binspec>. This means that you can snap a second data set to the same grid using

1 apop_data_to_bins(first_set, NULL);

2 apop_data_to_bins(second_set, apop_data_get_page(first_set, "<binspec>"));

The output has exactly as many rows as the input. Because many rows will be identical after binning, it may be fruitful to run it through apop_data_pmf_compress to produce a short list with one total weight per bin.

Here is a sample program highlighting apop_data_to_bins and apop_data_pmf_compress .

#define _GNU_SOURCE
#include <apop.h>
#define printdata(dataset)           \
        printf("\n-----------\n\n"); \
        apop_data_print(dataset);   
int main(){
    apop_data *d = apop_text_alloc(apop_data_alloc(6), 6, 1);
    apop_data_fill(d,   1,   2,   3,   3,   1,   2);
    apop_text_fill(d,  "A", "A", "A", "A", "A", "B");
    asprintf(&d->names->title, "Original data set");
    printdata(d);
        //binned, where bin ends are equidistant but not necessarily in the data
    apop_data *binned = apop_data_to_bins(d);
    asprintf(&binned->names->title, "Post binning");
    printdata(binned);
    assert(fabs(//equal distance between bins
              (apop_data_get(binned, 1) - apop_data_get(binned, 0))
            - (apop_data_get(binned, 2) - apop_data_get(binned, 1))) < 1e-5);
        //compressed, where the data is as in the original, but weights 
        //are redone to accommodate repeated observations.
    apop_data_pmf_compress(d);
    asprintf(&d->names->title, "Post compression");
    printdata(d);
    assert(apop_sum(d->weights)==6);
    apop_model *d_as_pmf = apop_estimate(d, apop_pmf);
    apop_data *firstrow = Apop_r(d, 0); //1A
    assert(fabs(apop_p(firstrow, d_as_pmf) - 2./6 < 1e-5));
}

This function uses the Designated initializers syntax for inputs.

apop_data * apop_data_to_dummies	(	apop_data *	d,
		int	col,
		char	type,
		int	keep_first,
		char	append,
		char	remove
	)

A utility to make a matrix of dummy variables. You give me a single vector that lists the category number for each item, and I'll produce a matrix with a single one in each row in the column specified.

After that, you have to decide what to do with the new matrix and the original data column.

You can manually join the dummy data set with your main data, e.g.:
1 apop_data *dummies = apop_data_to_dummies(main_regression_vars, .col=8, .type='t');
2 apop_data_stack(main_regression_vars, dummies, 'c', .inplace='y');

The .remove='y' option specifies that I should use apop_data_rm_columns to remove the column used to generate the dummies. Implemented only for type=='d'.

By specifying .append='y' or .append='e' I will run the above two lines for you. Your apop_data pointer will not change, but its matrix element will be reallocated (via apop_data_stack).

By specifying .append='i', I will place the matrix of dummies in place, immediately after the data column you had specified. You will probably use this with .remove='y' to replace the single column with the new set of dummy columns. Bear in mind that if there are two or more dummy columns, adding columns will change subsequent column numbers; use apop_name_find to find columns instead of giving an explicit column number.

If .append='i' and you asked for a text column, I will append to the end of the table, which is equivalent to append='e'.

Parameters

d	The data set with the column to be dummified (No default.)
col	The column number to be transformed; -1==vector (default = 0)
type	'd'==data column, 't'==text column. (default = 't')
keep_first	If `'n'`, return a matrix where each row has a one in the (column specified minus one). That is, the zeroth category is dropped, the first category has an entry in column zero, et cetera. If you don't know why this is useful, then this is what you need. If you know what you're doing and need something special, set this to `'y'` and the first category won't be dropped. (default = `'n'`)
append	If `'e'` or `'y'`, append the dummy grid to the end of the original data matrix. If `'i'`, insert in place, immediately after the original data column. (default = `'n'`)
remove	If `'y'`, remove the original data or text column. (default = `'n'`)

Returns: An apop_data set whose matrix element is the one-zero matrix of dummies. If you used .append, then this is the main matrix. Also, I add a page named "\<categories for your_var\>" giving a reference table of names and column numbers (where your_var is the appropriate column heading).

Exceptions

out->error=='a'	allocation error
out->error=='d'	dimension error

Use apop_data_get_factor_names to get the list of category names.
NaNs (if any) appear at the end of the sort order.
See Generating factors for further discussion.
See the documentation for apop_logit for a sample linear model using this function.
This function uses the Designated initializers syntax for inputs.

See also: apop_data_to_factors

apop_data * apop_data_to_factors	(	apop_data *	data,
		char	intype,
		int	incol,
		int	outcol
	)

Convert a column of text or numbers into a column of numeric factors, which you can use for a multinomial probit/logit, for example.

If you don't run this on your data first, apop_probit and apop_logit default to running it on the vector or (if no vector) zeroth column of the matrix of the input apop_data set, because those models need a list of the unique values of the dependent variable.

Parameters

data	The data set to be modified in place. (No default. If `NULL`, returns `NULL` and a warning)
intype	If `'t'`, then `incol` refers to text, if `'d'`, refers to the vector or matrix. (default = `'t'`)
incol	The column in the text that will be converted. -1 is the vector. (default = 0)
outcol	The column in the data set where the numeric factors will be written (-1 means the vector). (default = 0)

For example:

1 apop_data *d = apop_query_to_mixed_data("mmt", "select 0, year, color from data");

2 apop_data_to_factors(d);

Notice that the query pulled a column of zeros for the sake of saving room for the factors. It reads column zero of the text, and writes it to column zero of the matrix.

Another example:

1 apop_data *d = apop_query_to_data("mmt", "select type, year from data");

2 apop_data_to_factors(d, .intype='d', .incol=0, .outcol=0);

Here, the type column is converted to sequential integer factors and those factors overwrite the original data. Since a reference table is added as a second page of the apop_data set, you can recover the original values as needed.

Returns: A table of the factors used in the code. This is an apop_data set with only one column of text. Also, I add a page named "<categories for your_var>" giving a reference table of names and column numbers (where your_var is the appropriate column heading) use apop_data_get_factor_names to retrieve that table.

Exceptions

out->error=='a'	allocation error.
out->error=='d'	dimension error.

If the vector or matrix you wanted to write to is NULL, I will allocate it for you.
See Generating factors for further discussion.
See the documentation for apop_logit for a sample linear model using this function.
This function uses the Designated initializers syntax for inputs.

See also: apop_data_to_dummies

apop_data * apop_data_transpose	(	apop_data *	in,
		char	transpose_text,
		char	inplace
	)

Transpose the matrix and text elements of the input data set, including the row/column names.

The vector and weights elements of the input data set are completely ignored (but see also apop_vector_to_matrix, which can convert a vector to a 1 X N matrix.) If copying, these other elements won't be present; if .inplace='y', it is up to you to handle these not-transposed elements correctly.

Parameters

in	The input apop_data set. If `NULL`, I return `NULL`. (default: `NULL`)
transpose_text	If `'y'`, then also transpose the text element. (default: `'y'`)
inplace	If `'y'`, transpose the input in place; if `'n'`, produce a transposed copy, leaving the original untouched. Due to how `gsl_matrix_transpose_memcpy` works, a copy will still be made, then copied to the original location. (default: `'y'`)

Returns: If inplace=='n', a newly alloced apop_data set, with the appropriately transposed matrix and/or text. The vector and weights elements will be NULL. If transpose_text='n', then the text element of the output set will also be NULL.
if inplace=='y', a pointer to the original data set, with matrix and (if transpose_text='y', text) transposed and vector and weights left in place untouched.

Row names are written to column names of the output matrix, text, or both (whichever is not empty in the input).
If only the matrix or only the text have names, then the one set of names is written to the row names of the output.
If both matrix column names and text column names are present, text column names are lost.
if you have a gsl_matrix with no names or text, you may prefer to use gsl_matrix_transpose_memcpy.
This function uses the Designated initializers syntax for inputs.

void apop_data_unpack	(	const gsl_vector *	in,
		apop_data *	d,
		char	use_info_pages
	)

This is the complement to apop_data_pack, qv. It writes the gsl_vector produced by that function back to the apop_data set you provide. It overwrites the data in the vector and matrix elements and, if present, the weights (and that's it, so names or text are as before).

Parameters

in	A `gsl_vector` of the form produced by apop_data_pack. No default; must not be `NULL`.
d	That data set to be filled. Must be allocated to the correct size. No default; must not be `NULL`.
use_info_pages	Pages in XML-style brackets, such as `<Covariance>` will be ignored unless you set `.use_info_pages='y'`. Be sure that this is set to the same thing when you both pack and unpack. (Default: `'n'`).

If I get to the end of the first page of the apop_data set and have more entries in the vector to unpack, and the data to fill has a more element, then I will continue into subsequent pages.
This function uses the Designated initializers syntax for inputs.

int apop_db_close ( char vacuum )

Closes the database on disk. If you opened the database with apop_db_open(NULL), then this is basically optional.

Parameters

vacuum 'v': vacuum—do clean-up to minimize the size of the database on disk.
'q': Don't bother; just close the database. (default = 'q')

Returns

0 on OK, nonzero on error.

This function uses the Designated initializers syntax for inputs.

int apop_db_open ( char const * filename )

If you want to use a database on the hard drive instead of memory, then call this once and only once before using any other database utilities.

With SQLite, if you want a disposable database which you won't use after the program ends, don't bother with this function.

The trade-offs between an on-disk database and an in-memory db are as one would expect: memory is faster, but the database is destroyed when the program exits.

MySQL users: either set the environment variable APOP_DB_ENGINE=mysql or set apop_opts.db_engine = 'm'.

The Apophenia package assumes you are only using a single database at a time. You can use the SQL attach function to load other databases, or see this blog post for further suggestions and sample code.

When you are done doing your database manipulations, call apop_db_close if writing to disk.

Parameters

filename The name of a file on the hard drive on which to store the database. If NULL, then the database will be kept in memory (in which case, the other database functions will call this function for you and you don't need to bother).

See About SQL, the syntax for querying databases for mroe notes on using databases.

Returns: 0: everything OK
1: database did not open.

apop_data * apop_db_to_crosstab	(	char const *	tabname,
		char const *	row,
		char const *	col,
		char const *	data,
		char	is_aggregate
	)

Give the name of a table in the database, and optional names of three of its columns: the x-dimension, the y-dimension, and the data. The output is a 2D matrix with rows indexed by 'row' and cols by 'col' and the cells filled with the entry in the 'data' column.

Parameters

tabname	The database table I'm querying. Anything that will work inside a `from` clause is OK, such as a subquery in parens. (no default; must not be `NULL`)
row	The column of the data set that will indicate the rows of the output crosstab (no default; must not be `NULL`)
col	The column of the data set that will indicate the columns of the output crosstab (no default; must not be `NULL`)
data	The column of the data set holding the data for the cells of the crosstab (default: `count(*)`)
is_aggregate	Set to `'y'` if the `data` is a function like `count(*)` or `sum(col)`. That is, set to `'y'` if querying this would require a `group by` clause. (default: if I find an end-paren in `datacol`, `'y'`; else `'n'`.)

If the query to get data to fill the table (select row, col, data from tabname) returns an empty data set, then I will return a NULL data set and if apop_opts.verbosity >= 1 print a warning.

Exceptions

out->error='n'	Name not found error.
out->error='q'	Query returned an empty table (which might mean that it just failed).

The simplest use is to get a tally of how often (r1, r2) appears in the data via apop_db_to_crosstab("datatab", "r1", "r2").
If you want a 1-D crosstab, omit the other dimension. Or omit both to get a grand tally of your statistic for the entire table.
There is a commnad-line tool, apop_db_to_crosstab that calls this function.
This function uses the Designated initializers syntax for inputs.

double apop_det_and_inv	(	const gsl_matrix *	in,
		gsl_matrix **	out,
		int	calc_det,
		int	calc_inv
	)

Calculate the determinant of a matrix, its inverse, or both, via LU decomposition. The in matrix is not destroyed in the process.

See also: apop_matrix_determinant, apop_matrix_inverse

Parameters

in	The matrix to be inverted/determined.
out	If you want an inverse, this is where to place the matrix to be filled with the inverse. Will be allocated by the function.
calc_det	0: Do not calculate the determinant. 1: Do.
calc_inv	0: Do not calculate the inverse. 1: Do.

Returns: If calc_det == 1, then return the determinant. Otherwise, just returns zero. If calc_inv!=0, then *out is pointed to the matrix inverse. In case of difficulty, I will set *out=NULL and return NaN.

apop_data * apop_dot	(	const apop_data *	d1,
		const apop_data *	d2,
		char	form1,
		char	form2
	)

A convenience function for dot products, which requires less prep and typing than the gsl_cblas_dgexx functions.

It makes use of the semi-overloading of the apop_data structure. d1 may be a vector or a matrix, and the same for d2, so this function can do vector dot matrix, matrix dot matrix, and so on. If d1 includes both a vector and a matrix, then later parameters will indicate which to use.

Parameters

d1	the left part of $d1 \cdot d2$
d2	the right part of $d1 \cdot d2$
form1	't' or 'p': transpose or prime `d1->matrix`, or, if `d1->matrix` is `NULL`, read `d1->vector` as a row vector. 'n' or 0: use matrix if present; no transpose. (the default) 'v': ignore the matrix and use the vector.
form2	As above, with `d2`.

Returns: an apop_data set. If two matrices come in, the vector element is NULL and the matrix has the dot product; if either or both are vectors, the vector has the output and the matrix is NULL.

Exceptions

out->error='a'	Allocation error.
out->error='d'	dimension-matching error.
out->error='m'	GSL math error.
NULL	If you ask me to take the dot product of NULL, I return NULL.

Some systems auto-transpose non-conforming matrices. You input a $3 \times 5$ and a $3 \times 5$ matrix, and the system assumes that you meant to transpose the second, producing a $(3 \times 5) \cdot (5 \times 3) \rightarrow (3 \times 3)$ output. Apophenia does not do this. First, it's ambiguous whether the output should be $3 \times 3$ or $5 \times 5$ . Second, your next run might have three observations, and two $3 \times 3$ matrices don't require transposition; auto-transposition thus creates situations where bugs can pop up on only some iterations of a loop.
For a vector $\cdot$ a matrix, the vector is always treated as a row vector, meaning that a $(3\times 1)$ dot a $(3\times 4)$ matrix is correct, and produces a $(1 \times 4)$ vector. For a matrix $\cdot$ a vector, the vector is always treated as a column vector. Requests for transposing the vector are ignored in both cases.
As a corrollary to the above rule, a vector dot a vector always produces a scalar, which will be put in the zeroth element of the output vector; see the example.
If you want to multiply an $N \times 1$ vector $\cdot$ a $1 \times N$ vector to produce an $N \times N$ matrix, then use apop_vector_to_matrix to turn your vectors into matrices; see the example.
A note for readers of Modeling with Data: the awkward instructions on using this function on p 130 are now obsolete, thanks to the designated initializer syntax for function calls. Notably, in the case where d1 is a vector and d2 a matrix, then apop_dot(d1,d2,'t') won't work, because 't' now refers to d1. Instead use apop_dot(d1,d2,.form2='t') or apop_dot(d1,d2,0, 't')
This function uses the Designated initializers syntax for inputs.

Sample code:

/* A demonstration of dot products and various useful 
   transformations among types. */
#include <apop.h>
double eps=1e-3;//slow to converge series-->large tolerance.
#define Diff(L, R) Apop_assert(fabs((L)-(R)<(eps)), "%g is too different from %g (abitrary limit=%g).", (double)(L), (double)(R), eps);
int main(){
    int len = 3000;
    gsl_vector *v = gsl_vector_alloc(len);
    for (double i=0; i< len; i++) gsl_vector_set(v, i, 1./(i+1));
    double square;
    gsl_blas_ddot(v, v, &square);
    printf("1 + (1/2)^2 + (1/3)^2 + ...= %g\n", square);
    double pi_over_six = gsl_pow_2(M_PI)/6.;
    Diff(square, pi_over_six);
    /* Now using apop_dot, in a few forms.
       First, vector-as-data dot itself.
       If one of the inputs is a vector,
       apop_dot puts the output in a vector-as-data:*/
    apop_data *v_as_data = &(apop_data){.vector=v};
    apop_data *vdotv = apop_dot(v_as_data, v_as_data);
    Diff(gsl_vector_get(vdotv->vector, 0), pi_over_six);
    /* Wrap matrix in an apop_data set. */
    gsl_matrix *v_as_matrix = apop_vector_to_matrix(v);
    apop_data dm = (apop_data){.matrix=v_as_matrix};
    // (1 X len) vector dot (len X 1) matrix --- produce a scalar (one item vector).
    apop_data *mdotv = apop_dot(v_as_data, &dm);
    double scalarval = apop_data_get(mdotv);
    Diff(scalarval, pi_over_six);
    //(len X 1) dot (len X 1) --- bad dimensions.
    apop_opts.verbose=-1; //don't print an error.
    apop_data *mdotv2 = apop_dot(&dm, v_as_data);
    apop_opts.verbose=0; //back to safety.
    assert(mdotv2->error);
    // If we want (len X 1) dot (1 X len) --> (len X len),
    // use apop_vector_to_matrix.
    apop_data dmr = (apop_data){.matrix=apop_vector_to_matrix(v, .row_col='r')};
    apop_data *product_matrix = apop_dot(&dm, &dmr);
    //The trace is the sum of squares:
    gsl_vector_view trace = gsl_matrix_diagonal(product_matrix->matrix);
    double tracesum = apop_sum(&trace.vector);
    Diff(tracesum, pi_over_six);
    apop_data_free(product_matrix);
    gsl_matrix_free(dmr.matrix);
}

int apop_draw	(	double *	out,
		gsl_rng *	r,
		apop_model *	m
	)

Draw from a model.

Parameters

out	An already-allocated array of `double`s to be filled by the draw method. It must have size `m->dsize`.
r	A `gsl_rng`, probably allocated via apop_rng_alloc. Optional; if `NULL`, then I will call apop_rng_get_thread for an RNG.
m	The model from which to make draws.

If the model has its own draw method, then this function will call it.
Else, if the model is univariate, use apop_arms_draw to generate random draws.
Else, if the model is multivariate, use apop_model_metropolis to generate random draws.
This makes a single draw of the given size. See apop_model_draws to fill a matrix with draws.

Returns: Zero on success; nozero on failure. out[0] is probably NAN on failure.

apop_model * apop_estimate	(	apop_data *	d,
		apop_model *	m
	)

Estimate the parameters of a model given data.

This function copies the input model, preps it (see apop_prep), and calls m.estimate(d, m) (which users are encouraged to never call directly). If your model has no estimate method, then call apop_maximum_likelihood(d, m), with the default MLE settings.

Parameters

d	The data
m	The model

Returns: A pointer to an output model, which typically matches the input model but has its parameters element filled in.

apop_data * apop_estimate_coefficient_of_determination ( apop_model * m )

Also known as $R^2$ . Let $Y$ be the dependent variable, $\epsilon$ the residual, $n$ the number of data points, and $k$ the number of independent vars (including the constant). Returns an apop_data set with the following entries (in the vector element):

$SST \equiv \sum (Y_i - \bar Y) ^2$
$SSE \equiv \sum \epsilon ^2$
$R^2 \equiv 1 - {SSE\over SST}$
$R^2_{adj} \equiv R^2 - {(k-1)\over (n-k-1)}(1-R^2)$

Internally allocates (and frees) a vector the size of your data set.

Returns

A $5 \times 1$ apop_data table with the following fields:

"R squared"
"R squared adj"
"SSE"
"SST"
"SSR"

If the output is in sss, use apop_data_get(sss, .rowname="SSE") to get the SSE, and so on for the other items.

Parameters

m A model. I use the pointer to the data set used for estimation and the info page named "<Predicted>". The Predicted page should include observed, expected, and residual columns, which I use to generate the sums of squared errors and residuals, et cetera. All generalized linear models produce a page with this name and of this form, as do a host of other models. Nothing keeps you from finding the of, say, a kernel smooth; it is up to you to determine whether such a thing is appropriate to your given models and situation.

apop_estimate(yourdata, apop_ols) does this automatically
If I don't find a "<Predicted>" page, print an error (iff apop_opts.verbose >=0) and return NULL.
The number of observations equals the number of rows in the Predicted page
The number of independent variables, needed only for the adjusted , is from the number of columns in the main data set's matrix (i.e. the first page; i.e. the set of parameters if this is the parameters output from a model estimation).
If your data (first page again) has a weights vector, I will find weighted SSE, SST, and SSR (and calculate the s using those values).

apop_model * apop_estimate_restart	(	apop_model *	e,
		apop_model *	copy,
		char *	starting_pt,
		double	boundary
	)

Maximum likelihod searches are not guaranteed to find a global optimum, and it can be difficult to tune a search such that it covers a wide space, but also accurately hones in on the optimum. In both cases, one could restart the search using a different starting point or different parameters.

The simplest use of this function is to restart a model at the latest parameter estimates.

 apop_model *m = apop_estimate(data, model_using_an_MLE_search);
 for (int i=0; i< 10; i++)
     m = apop_estimate_restart(m);
 apop_data_show(m);

By adding a line to reduce the tolerance each round [e.g., Apop_settings_set(m, apop_mle, tolerance, pow(10,-i))], you can start broad and hone in on a precise optimum.

You may have a new estimation method, such as first doing a coarse simulated annealing search, then a fine conjugate gradient search. When reading this example, recall that the form for adding a new settings group differs from the form for modifying existing settings:

 Apop_model_add_settings(your_base_model, apop_mle, .method=APOP_SIMAN);
 apop_model *m = apop_estimate(data, your_base_model);
 Apop_settings_set(m, apop_mle, method, APOP_CG_PR);
 m = apop_estimate_restart(m);
 apop_data_show(m);

Only one estimate is returned, either the one you sent in or a new one. The loser (which may be the one you sent in) is freed, to prevent memory leaks.

Parameters

e	An apop_model that is the output from a prior MLE estimation. (No default, must not be `NULL`.)
copy	Another not-yet-parametrized model that will be re-estimated with (1) the same data and (2) a `starting_pt` as per the next setting (probably to the parameters of `e`). If this is `NULL`, then copy `e`. (Default = `NULL`)
starting_pt	"ep"=last estimate of the first model (i.e., its current parameter estimates) "es"= starting point originally used by the first model "np"=current parameters of the new (second) model "ns"=starting point specified by the new model's MLE settings. (default = "ep")
boundary	I test whether the starting point you give me has magintude greater than this bound, so I can warn you if there's divergence in your sequence of re-estimations. (default: 1e8)

Returns: If the new estimated parameters include any NaNs/Infs, then the old estimate is returned (even if the old estimate included NaNs/Infs). Otherwise, the estimate with the largest log likelihood is returned.

This function uses the Designated initializers syntax for inputs.

apop_data * apop_f_test	(	apop_model *	est,
		apop_data *	contrast
	)

Runs an F-test specified by q and c. See the chapter on hypothesis testing in Modeling With Data, p 309, which will tell you that:

${N-K\over q} {({\bf Q}'\hat\beta - {\bf c})' [{\bf Q}' ({\bf X}'{\bf X})^{-1} {\bf Q}]^{-1} ({\bf Q}' \hat\beta - {\bf c}) \over {\bf u}' {\bf u} } \sim F_{q,N-K},$

and that's what this function is based on.

Parameters

est An apop_model that you have already calculated. (No default)

contrast An apop_data set whose matrix represents ${\bf Q}$ and whose vector represents ${\bf c}$ . Each row represents a hypothesis. (Defaults: if matrix is NULL, it is set to the identity matrix with the top row missing. If the vector is NULL, it is set to a zero matrix of length equal to the height of the contrast matrix. Thus, if the entire apop_data set is NULL or omitted, we are testing the hypothesis that all but $\beta_1$ are zero.)

Returns: An apop_data set with a few variants on the confidence with which we can reject the joint hypothesis.

Exceptions

out->error='a'	Allocation error.
out->error='d'	dimension-matching error.
out->error='i'	matrix inversion error.
out->error='m'	GSL math error.

There are two approaches to an -test: the ANOVA approach, which is typically built around the claim that all effects but the mean are zero; and the more general regression form, which allows for any set of linear claims about the data. If you send a NULL contrast set, I will generate the set of linear contrasts that are equivalent to the ANOVA-type approach. This is why the top row of the default ${\bf Q}$ matrix is missing: there is no hypothesis test about the coefficient for the constant term. See the example below.
This function uses the Designated initializers syntax for inputs.

#ifdef Datadir
#define DATADIR Datadir
#else
#define DATADIR "."
#endif
#include <apop.h>
#define Diff(L, R, eps) {double left=(L), right=(R); Apop_stopif(isnan(left-right) || fabs((left)-(right))>(eps), abort(), 0, "%g is too different from %g (abitrary limit=%g).", (double)(left), (double)(right), eps);}
void test_f(apop_model *est){
    apop_data *rsq  = apop_estimate_coefficient_of_determination(est);
    apop_data *constr= apop_data_calloc(est->parameters->vector->size-1, est->parameters->vector->size);
    int i;
    for (i=1; i< est->parameters->vector->size; i++)
        apop_data_set(constr, i-1, i, 1);
    apop_data *ftab = apop_F_test(est, constr);
    apop_data *ftab2 = apop_F_test(est, NULL);
    //apop_data_show(ftab);
    //apop_data_show(ftab2);
    double n = est->data->matrix->size1;
    double K = est->parameters->vector->size-1;
    double r = apop_data_get(rsq, .rowname="R squared");
    double f = apop_data_get(ftab, .rowname="F statistic");
    double f2 = apop_data_get(ftab2, .rowname="F statistic");
    Diff (f , r*(n-K)/((1-r)*K) , 1e-3);
    Diff (f2 , r*(n-K)/((1-r)*K) , 1e-3);
}
int main(){
    apop_data *d = apop_text_to_data( DATADIR "/" "test_data2" );
    apop_model *an_ols_model = apop_model_copy(apop_ols);
    Apop_model_add_group(an_ols_model, apop_lm, .want_expected_value= 1);
    apop_model *e  = apop_estimate(d, an_ols_model);
    test_f(e);
}

Runs an F-test specified by q and c. See the chapter on hypothesis testing in Modeling With Data, p 309, which will tell you that:

${N-K\over q} {({\bf Q}'\hat\beta - {\bf c})' [{\bf Q}' ({\bf X}'{\bf X})^{-1} {\bf Q}]^{-1} ({\bf Q}' \hat\beta - {\bf c}) \over {\bf u}' {\bf u} } \sim F_{q,N-K},$

and that's what this function is based on.

Parameters

est An apop_model that you have already calculated. (No default)

contrast An apop_data set whose matrix represents ${\bf Q}$ and whose vector represents ${\bf c}$ . Each row represents a hypothesis. (Defaults: if matrix is NULL, it is set to the identity matrix with the top row missing. If the vector is NULL, it is set to a zero matrix of length equal to the height of the contrast matrix. Thus, if the entire apop_data set is NULL or omitted, we are testing the hypothesis that all but $\beta_1$ are zero.)

Returns: An apop_data set with a few variants on the confidence with which we can reject the joint hypothesis.

Exceptions

out->error='a'	Allocation error.
out->error='d'	dimension-matching error.
out->error='i'	matrix inversion error.
out->error='m'	GSL math error.

There are two approaches to an -test: the ANOVA approach, which is typically built around the claim that all effects but the mean are zero; and the more general regression form, which allows for any set of linear claims about the data. If you send a NULL contrast set, I will generate the set of linear contrasts that are equivalent to the ANOVA-type approach. This is why the top row of the default ${\bf Q}$ matrix is missing: there is no hypothesis test about the coefficient for the constant term. See the example below.
This function uses the Designated initializers syntax for inputs.

#ifdef Datadir
#define DATADIR Datadir
#else
#define DATADIR "."
#endif
#include <apop.h>
#define Diff(L, R, eps) {double left=(L), right=(R); Apop_stopif(isnan(left-right) || fabs((left)-(right))>(eps), abort(), 0, "%g is too different from %g (abitrary limit=%g).", (double)(left), (double)(right), eps);}
void test_f(apop_model *est){
    apop_data *rsq  = apop_estimate_coefficient_of_determination(est);
    apop_data *constr= apop_data_calloc(est->parameters->vector->size-1, est->parameters->vector->size);
    int i;
    for (i=1; i< est->parameters->vector->size; i++)
        apop_data_set(constr, i-1, i, 1);
    apop_data *ftab = apop_F_test(est, constr);
    apop_data *ftab2 = apop_F_test(est, NULL);
    //apop_data_show(ftab);
    //apop_data_show(ftab2);
    double n = est->data->matrix->size1;
    double K = est->parameters->vector->size-1;
    double r = apop_data_get(rsq, .rowname="R squared");
    double f = apop_data_get(ftab, .rowname="F statistic");
    double f2 = apop_data_get(ftab2, .rowname="F statistic");
    Diff (f , r*(n-K)/((1-r)*K) , 1e-3);
    Diff (f2 , r*(n-K)/((1-r)*K) , 1e-3);
}
int main(){
    apop_data *d = apop_text_to_data( DATADIR "/" "test_data2" );
    apop_model *an_ols_model = apop_model_copy(apop_ols);
    Apop_model_add_group(an_ols_model, apop_lm, .want_expected_value= 1);
    apop_model *e  = apop_estimate(d, an_ols_model);
    test_f(e);
}

long double apop_generalized_harmonic	(	int	N,
		double	s
	)

Calculate $\sum_{n=1}^N {1\over n^s}$

There are no doubt efficient shortcuts do doing this, but I use brute force. [Though Knuth's Art of Programming v1 doesn't offer anything, which is strong indication of nonexistence.] To speed things along, I save the results so that they can just be looked up should you request the same calculation.

If N is zero or negative, return NaN. Notify the user if apop_opts.verbosity >=0

For example:

#include <apop.h>
int main(){
    double out = apop_generalized_harmonic(270, 0.0);
     assert (out == 270);
     out  = apop_generalized_harmonic(370, -1.0);
     assert (out == 370*371/2);
     out  = apop_generalized_harmonic(12, -1.0);
     assert (out == 12*13/2);
}

apop_data * apop_histograms_test_goodness_of_fit	(	apop_model *	observed,
		apop_model *	expected
	)

Test the goodness-of-fit between two apop_pmf models.

Let $o_i$ be the $i$ th observed bin and $e_i$ the expected value of that bin; then under typical assumptions, $ $\Sum_i^N (o_i-e_i)^2/e_i \sim \Chi^2_{N-1}$ .

If you send two histograms, I assume that the histograms are synced: for PMFs, you've used apop_data_to_bins to generate two histograms using the same binspec, or you've used apop_data_pmf_compress to guarantee that each observation value appears exactly once in each data set.

In any case, all values in the observed set must appear in the expected set with nonzero weight; otherwise this will return a $\chi^2$ statistic of GSL_POSINF, indicating that it is impossible for the observed data to have been drawn from the expected distribution.

If an observation row has weight zero, I skip it. if apop_opts.verbose >=1 I will show a warning.

apop_data * apop_jackknife_cov	(	apop_data *	in,
		apop_model *	model
	)

Give me a data set and a model, and I'll give you the jackknifed covariance matrix of the model parameters.

The basic algorithm for the jackknife (glossing over the details): create a sequence of data sets, each with exactly one observation removed, and then produce a new set of parameter estimates using that slightly shortened data set. Then, find the covariance matrix of the derived parameters.

Jackknife or bootstrap? As a broad rule of thumb, the jackknife works best on models that are closer to linear. The worse a linear approximation does (at the given data), the worse the jackknife approximates the variance.

Parameters

in	The data set. An apop_data set where each row is a single data point.
model	An apop_model, that will be used internally by apop_estimate.

Exceptions

out->error=='n' NULL input data.

Returns: An apop_data set whose matrix element is the estimated covariance matrix of the parameters.

See also: apop_bootstrap_cov

For example:

#include <apop.h>
int main(){
    int draw_ct = 1000;
    apop_model *m = apop_model_set_parameters(apop_normal, 1, 3);
    double sigma = apop_data_get(m->parameters, 1);
    apop_data *d = apop_model_draws(m, draw_ct);
    apop_data *out = apop_jackknife_cov(d, m);
    double error = fabs(apop_data_get(out, 0,0)-gsl_pow_2(sigma)/draw_ct) //var(mu)
                + fabs(apop_data_get(out, 1,1)-gsl_pow_2(sigma)/(2*draw_ct))//var(sigma)
                +fabs(apop_data_get(out, 0,1)) +fabs(apop_data_get(out, 1,0));//cov(mu,sigma); should be 0.
    apop_data_free(d);
    apop_data_free(out);
    assert(error < 1e-2);//Not very accurate.
}

long double apop_kl_divergence	(	apop_model *	from,
		apop_model *	to,
		int	draw_ct,
		gsl_rng *	rng
	)

Kullback-Leibler divergence.

This measure of the divergence of one distribution from another has the form $D(p,q) = \sum_i \ln(p_i/q_i) p_i$ . Notice that it is not a distance, because there is an asymmetry between $p$ and $q$ , so one can expect that $D(p, q) \neq D(q, p)$ .

Parameters

from	the in the above formula. (No default; must not be `NULL`)
to	the in the above formula. (No default; must not be `NULL`)
draw_ct	If I do the calculation via random draws, how many? (Default = 1e5)
rng	A `gsl_rng`. If `NULL` or number of threads is greater than 1, I'll take care of the RNG; see apop_rng_get_thread. (Default = `NULL`)

This function can take empirical histogram-type models (apop_pmf) or continuous models like apop_loess or apop_normal.

If the from distribution is a PMF (determined by checking whether its p function is that of apop_pmf), then I'll step through it for the points in the summation.

If you have two empirical distributions in the form of apop_pmf, they must be synced: if but , then the function returns GSL_NEGINF. If apop_opts.verbose >=1 I print a message as well.

If the from distribution is not a PMF, then I will take draw_ct random draws from from and evaluate at those points.

Set apop_opts.verbose = 3 for observation-by-observation info.

This function uses the Designated initializers syntax for inputs.

long double apop_linear_constraint	(	gsl_vector *	beta,
		apop_data *	constraint,
		double	margin
	)

This is designed to be called from within the constraint method of your apop_model. Just write the constraint vector+matrix and this will do the rest. See Setting Constraints for detailed discussion.

Parameters

beta	The proposed vector about to be tested. No default, must not be `NULL`.
constraint	A vector/matrix pair [v \| m1 m2 ... mn] where each row is interpreted as a less-than inequality: . For example, say your constraints are and is positive, i.e. . Allocate and fill the matrix representing these two constraints via: 1 apop_data constr = apop_data_falloc((2,2,3), 3, 2, 4, 7, 2 0, 0, 1, 0); . Default: each elements is greater than zero. For three parameters this would be equivalent to setting 1 apop_data constr = apop_data_falloc((3,3,3), 0, 1, 0, 0, 2 0, 0, 1, 0, 3 0, 0, 0, 1);
margin	If zero, then this is a >= constraint, otherwise I will return a point this amount within the borders. You could try `GSL_DBL_EPSILON`, which is the smallest value a `double` can hold, or something like 1e-3. Default = 0.

Returns: The penalty: the distance between beta and the closest point that meets the constraints. If the constraint is met, the penalty is zero. If the constraint is not met, this beta is shifted by margin (Euclidean distance) to meet the constraints.

If your apop_data has more structure than a vector, try apop_data_pack to pack it into a vector. This is what apop_maximum_likelihood does.
The function doesn't check for odd cases like coplanar constraints.
This function uses the Designated initializers syntax for inputs.

double apop_log_likelihood	(	apop_data *	d,
		apop_model *	m
	)

Find the log likelihood of a data/parametrized model pair.

Parameters

d	The data
m	The parametrized model, which must have either a `log_likelihood` or a `p` method.

apop_data * apop_map	(	apop_data *	in,
		apop_fn_d *	fn_d,
		apop_fn_v *	fn_v,
		apop_fn_r *	fn_r,
		apop_fn_dp *	fn_dp,
		apop_fn_vp *	fn_vp,
		apop_fn_rp *	fn_rp,
		apop_fn_dpi *	fn_dpi,
		apop_fn_vpi *	fn_vpi,
		apop_fn_rpi *	fn_rpi,
		apop_fn_di *	fn_di,
		apop_fn_vi *	fn_vi,
		apop_fn_ri *	fn_ri,
		void *	param,
		int	inplace,
		char	part,
		int	all_pages
	)

Apply a function to every element of a data set, matrix or vector; or, apply a vector-taking function to every row or column of a matrix.

Your function could take any combination of a gsl_vector, a double, an apop_data, a parameter set, and the position of the element in the vector or matrix. As such, the function takes twelve function inputs, one for each combination of vector/matrix, params/no params, index/no index. Fortunately, because this function uses the Designated initializers syntax for inputs, you will specify only one.

For example, here is a function that will cut off each element of the input data to between $(-1, +1)$ . It takes in a lone double and a parameter in a void*, so it gets sent to apop_map via .fn_dp=cutoff.

 double cutoff(double in, void *limit_in){ 
     double *limit = limit_in;
     return GSL_MAX(-*limit, GSL_MIN(*limit, in)); 
 }
 
 double param = 1;
 apop_map(your_data, .fn_dp=cutoff, .param=&param, .inplace='y');

Parameters

fn_v	A function of the form `double your_fn(gsl_vector *in)`
fn_d	A function of the form `double your_fn(double in)`
fn_r	A function of the form `double your_fn(apop_data *in)`
fn_vp	A function of the form `double your_fn(gsl_vector in, void param)`
fn_dp	A function of the form `double your_fn(double in, void *param)`
fn_rp	A function of the form `double your_fn(apop_data in, void param)`
fn_vpi	A function of the form `double your_fn(gsl_vector in, void param, int index)`
fn_dpi	A function of the form `double your_fn(double in, void *param, int index)`
fn_rpi	A function of the form `double your_fn(apop_data in, void param, int index)`
fn_vi	A function of the form `double your_fn(gsl_vector *in, int index)`
fn_di	A function of the form `double your_fn(double in, int index)`
fn_ri	A function of the form `double your_fn(apop_data *in, int index)`
in	The input data set. If `NULL`, I'll return `NULL` immediately.
param	A pointer to the parameters to be passed to those function forms taking a `*param`.
part	Which part of the `apop_data` struct should I use? 'v'==Just the vector 'm'==Every element of the matrix, in turn 'a'==Both 'v' and 'm' 'r'==Apply a function `gsl_vector` $\to$ `double` to each row of the matrix 'c'==Apply a function `gsl_vector` $\to$ `double` to each column of the matrix Default is 'a', but notice that I'll ignore a `NULL` vector or matrix, so if your data set has only a vector or only a matrix, that's what I'll use.
all_pages	If `'y'`, then follow the `more` pointer to subsequent pages. If `'n'`, handle only the first page of data. Default: `'n'`.
inplace	If 'n' (the default), generate a new apop_data set for output, which will contain the mapped values (and the names from the original set). If 'y', modify in place. The `double` $\to$ `double` versions, `'v'`, `'m'`, and `'a'`, write to exactly the same location as before. The `gsl_vector` $\to$ `double` versions, `'r'`, and `'c'`, will write to the vector. Be careful: if you are writing in place and there is already a vector there, then the original vector is lost. If 'v' (as in void), return `NULL`. (Default = 'n')

Exceptions

out->error='p' missing or mismatched parts error, such as NULL matrix when you sent a function acting on the matrix element.

The function forms with r in them, like fn_ri, are row-by-row. I'll use Apop_r to get each row in turn, and send it to the function. The first implication is that your function should be expecting a apop_data set with exactly one row in it. The second is that part is ignored: it only makes sense to go row-by-row.
For these r functions, if you set inplace='y', then you will be modifying your input data set, row by row; if you set inplace='n', then I will return an apop_data set whose vector element is as long as your data set (i.e., as long as the longest of your text, vector, or matrix parts).
If you set omp_set_num_threads(n) using , split the data set into as many chunks as you specify and process them simultaneously. You need to watch out for the usual hang-ups about multithreaded programming, but if your data is iid, and each row's processing is independent of the others, you should have no problems. Bear in mind that generating threads takes some small overhead, so simple cases like adding a few hundred numbers will actually be slower when threading.
See Map/apply for many more examples and notes.
See also
apop_map_sum

double apop_map_sum	(	apop_data *	in,
		apop_fn_d *	fn_d,
		apop_fn_v *	fn_v,
		apop_fn_r *	fn_r,
		apop_fn_dp *	fn_dp,
		apop_fn_vp *	fn_vp,
		apop_fn_rp *	fn_rp,
		apop_fn_dpi *	fn_dpi,
		apop_fn_vpi *	fn_vpi,
		apop_fn_rpi *	fn_rpi,
		apop_fn_di *	fn_di,
		apop_fn_vi *	fn_vi,
		apop_fn_ri *	fn_ri,
		void *	param,
		char	part,
		int	all_pages
	)

A function that effectively calls apop_map and returns the sum of the resulting elements. Thus, this function returns a double. See the apop_map page for details of the inputs, which are the same here, except that inplace doesn't make sense—this function will always just add up the input function outputs.

I don't copy the input data to send to your input function. Therefore, if your function modifies its inputs as a side-effect, your data set will be modified as this function runs.
The sum of zero elements is zero, so that is what is returned if the input apop_data set is NULL. If apop_opts.verbose >= 2 print a warning.
See Map/apply for many more examples and notes.
This function uses the Designated initializers syntax for inputs.

void apop_matrix_apply	(	gsl_matrix *	m,
		void()(gsl_vector )	fn
	)

Apply a function to every row of a matrix. The function that you input takes in a gsl_vector and returns nothing. apop_matrix_apply will produce a vector view of each row, and send each row to your function.

Parameters

m	The matrix
fn	A function of the form `void fn(gsl_vector* in)` which may modify the data at the `in` pointer in place.

If the matrix is NULL, this is a no-op and returns immediately.
See the map/apply page for details.
See also
apop_map, apop_map_sum

void apop_matrix_apply_all	(	gsl_matrix *	in,
		void()(double )	fn
	)

Applies a function to every element in a matrix (as opposed to every row)

Parameters

in	The matrix whose elements will be inputs to the function
fn	A function with a form like `void f(double *in)` which may modify the data at the `in` pointer in place.

If the matrix is NULL, this is a no-op and returns immediately.
See the map/apply page for details.
See also
apop_map, apop_map_sum

gsl_matrix * apop_matrix_copy ( const gsl_matrix * in )

Copy one gsl_matrix to another. That is, all data are duplicated. Unlike gsl_matrix_memcpy, this function allocates and returns the destination, so you can use it like this:

1 gsl_matrix *a_copy = apop_matrix_copy(original);

Parameters

in	the input data

Returns: A structure that this function will allocate and fill. If gsl_matrix_alloc fails, returns NULL.

double apop_matrix_determinant ( const gsl_matrix * in )

Find the determinant of a matrix. The in matrix is not destroyed in the process.

See also apop_matrix_inverse , or apop_det_and_inv to do both at once.

Parameters

in	The matrix to be determined.

Returns: The determinant.

gsl_matrix * apop_matrix_inverse ( const gsl_matrix * in )

Inverts a matrix. The in matrix is not destroyed in the process. You may want to call apop_matrix_determinant first to check that your input is invertible, or use apop_det_and_inv to do both at once.

Parameters

in	The matrix to be inverted.

Returns: Its inverse.

int apop_matrix_is_positive_semidefinite	(	gsl_matrix *	m,
		char	semi
	)

Test whether the input matrix is positive semidefinite (PSD).

A covariance matrix will always be PSD, so this function can tell you whether your matrix is a valid covariance matrix.

Consider the 1x1 matrix in the upper left of the input, then the 2x2 matrix in the upper left, on up to the full matrix. If the matrix is PSD, then each of these has a positive determinant. This function thus calculates $N$ determinants for an $N$ x $N$ matrix.

Parameters

m	The matrix to test. If `NULL`, I will return zero—not PSD.
semi	If anything but `'s'`, check for positive definite, not semidefinite. (default 's')

See also apop_matrix_to_positive_semidefinite, which will change the input to something PSD.

This function uses the Designated initializers syntax for inputs.

gsl_vector * apop_matrix_map	(	const gsl_matrix *	m,
		double()(gsl_vector )	fn
	)

Map a function onto every row of a matrix. The function that you input takes in a gsl_vector and returns a double. This function will produce a sequence of vector views of each row of the input matrix, and send each to your function. It will output a gsl_vector holding your function's output for each row.

Parameters

m	The matrix
fn	A function of the form `double fn(gsl_vector* in)`

Returns: A gsl_vector with the corresponding value for each row.

If you input a NULL matrix, I return NULL.
See the map/apply page for details.
See also
apop_map, apop_map_sum

gsl_matrix * apop_matrix_map_all	(	const gsl_matrix *	in,
		double(*)(double)	fn
	)

Maps a function to every element in a matrix (as opposed to every row).

Parameters

in	The matrix whose elements will be inputs to the function
fn	A function with a form like `double f(double in)`.

Returns: a matrix of the same size as the original, with the function applied.

If you input a NULL matrix, I return NULL.
See the map/apply page for details.
See also
apop_map, apop_map_sum

double apop_matrix_map_all_sum	(	const gsl_matrix *	in,
		double(*)(double)	fn
	)

Like apop_matrix_map_all, but returns the sum of the resulting mapped function. For example, apop_matrix_map_all_sum(v, isnan) returns the number of elements of m that are NaN.

If you input a NULL matrix, I return the sum of zero items: zero.
See the map/apply page for details.
See also
apop_map, apop_map_sum

double apop_matrix_map_sum	(	const gsl_matrix *	in,
		double()(gsl_vector )	fn
	)

Like apop_matrix_map, but returns the sum of the resulting mapped vector. For example, let log_like be a function that returns the log likelihood of an input vector; then apop_matrix_map_sum(m, log_like) returns the total log likelihood of the rows of m.

If you input a NULL matrix, I return the sum of zero items: zero.
See the map/apply page for details.
See also
apop_map, apop_map_sum

double apop_matrix_mean ( const gsl_matrix * data )

Returns the mean of all elements of a matrix.

Parameters

data	The matrix to be averaged. If `NULL`, return zero.

Returns: The mean of all cells of the matrix.

void apop_matrix_mean_and_var	(	const gsl_matrix *	data,
		double *	mean,
		double *	var
	)

Returns the mean and population variance of all elements of a matrix.

If NULL, return $\mu=0, \sigma^2=NaN$ .
Gives the population variance (sum of squares divided by ). If you want sample variance, multiply the result by :
1 double mu, var;
2 apop_data *data= apop_query_to_data("select * from indata");
3 apop_matrix_mean_and_var(data->matrix, &mu, &var);
4 var *= (data->size1*data->size2)/(data->size1*data->size2-1.0);

Parameters

data	the matrix to be averaged.
mean	where to put the mean to be calculated.
var	where to put the variance to be calculated.

apop_data * apop_matrix_pca	(	gsl_matrix *	data,
		int const	dimensions_we_want
	)

Principal component analysis: hand in a matrix and (optionally) a number of desired dimensions, and I'll return a data set where each column of the matrix is an eigenvector. The columns are sorted, so column zero has the greatest weight. The vector element of the data set gives the weights.

You may also specify the number of elements your principal component space should have. If this is equal to the rank of the space in which the input data lives, then the sum of weights will be one. If the dimensions desired is less than that (probably so you can prepare a plot), then the weights will be accordingly smaller, giving you an indication of how much variation these dimensions explain.

Parameters

data	The input matrix. I modify int in place so that each column has mean zero. (No default. If `NULL`, return `NULL` and print a warning iff `apop_opts.verbose >= 1`.)
dimensions_we_want	The singular value decomposition will return this many of the eigenvectors with the largest eigenvalues. (default: the size of the covariance matrix, i.e. `data->size2`)

Returns: Returns an apop_data set whose matrix is the principal component space. Each column of the returned matrix will be another eigenvector; the columns will be ordered by the eigenvalues.

The data set's vector will be the largest eigenvalues, scaled by the total of all eigenvalues (including those that were thrown out). The sum of these returned values will give you the percentage of variance explained by the factor analysis.

Exceptions

out->error=='a' Allocation error.

void apop_matrix_print	(	const gsl_matrix *	data,
		Output_declares
	)

Print a gsl_matrix to the screen, a file, a pipe, or a database table.

See apop_prep_output for more on how printing settings are set.
See also Legible output for more details and examples.
This function uses the Designated initializers syntax for inputs.

gsl_matrix * apop_matrix_realloc	(	gsl_matrix *	m,
		size_t	newheight,
		size_t	newwidth
	)

This function will resize a gsl_matrix to a new height or width.

Data in the matrix will be retained. If the new height or width is smaller than the old, then data in the later rows/columns will be cropped away (in a non–memory-leaking manner). If the new height or width is larger than the old, then new cells will be filled with garbage; it is your responsibility to zero out or otherwise fill new rows/columns before use.

A large number of reallocs can take a noticeable amount of time. You are encouraged to determine the size of your data beforehand and avoid writing for loops that reallocate the matrix at every iteration.
The gsl_matrix is a versatile struct that can represent submatrices and other cuts from parent data. Resizing a subset of a parent matrix makes no sense, so return NULL and print a warning if asked to resize a view of a matrix.

Parameters

m	The already-allocated matrix to resize. If you give me `NULL`, this becomes equivalent to `gsl_matrix_alloc`
newheight,newwidth	The height and width you'd like the matrix to be.

Returns: m, now resized

gsl_matrix * apop_matrix_stack	(	gsl_matrix *	m1,
		gsl_matrix const *	m2,
		char	posn,
		char	inplace
	)

Put the first matrix either on top of or to the right of the second matrix. Returns a new matrix, meaning that at the end of this function, until you gsl_matrix_free() the original matrices, you will be taking up twice as much memory. Plan accordingly.

Parameters

m1	the upper/rightmost matrix (default: `NULL`, in which case this copies `m2`)
m2	the second matrix (default: `NULL`, in which case `m1` is returned)
posn	If `'r'`, stack rows on top of other rows. If `'c'` stack columns next to columns. (default: `'r'`)
inplace	If `'y'`, use apop_matrix_realloc to modify `m1` in place; see the caveats on that function. Otherwise, allocate a new matrix, leaving `m1` undisturbed. (default: `'n'`)

Returns: the stacked data, either in a new matrix or a pointer to m1.

For example, here is a function to merge four matrices into a single two-part-by-two-part matrix. The original matrices are unchanged.

 gsl_matrix *apop_stack_two_by_two(gsl_matrix *ul, gsl_matrix *ur, gsl_matrix *dl, gsl_matrix *dr){
   gsl_matrix *output, *t;
     output = apop_matrix_stack(ul, ur, 'c');
     t = apop_matrix_stack(dl, dr, 'c');
     apop_matrix_stack(output, t, 'r', .inplace='y');
     gsl_matrix_free(t);
     return output;
 }

This function uses the Designated initializers syntax for inputs.

long double apop_matrix_sum ( const gsl_matrix * m )

Returns the sum of the elements of a matrix. Occasionally convenient.

Parameters

m	the matrix to be summed.

double apop_matrix_to_positive_semidefinite ( gsl_matrix * m )

This function takes in a matrix and converts it in place to the `closest' positive semidefinite matrix.

Parameters

m	On input, any matrix; on output, a positive semidefinite matrix. If `NULL`, return `NaN` and print an error.

Returns: the distance between the original and new matrices.

See also the test function apop_matrix_is_positive_semidefinite.
This function can be used as the core of a model constraint.
Adapted from the R Matrix package's nearPD, which is Copyright (2007) Jens Oehlschlägel [under the GPL].

void apop_maximum_likelihood	(	apop_data *	data,
		apop_model *	dist
	)

Find the likelihood-maximizing parameters of a model given data.

I assume that apop_prep has been called on your model. The easiest way to guarantee this is to use apop_estimate, which calls this function if the input model has no estimate method.

All of the settings are specified by adding a apop_mle_settings struct to your model, so see the many notes there. Notably, the default method is the Fletcher-Reeves conjugate gradient method, and if your model does not have a dlog likelihood function, then a numeric gradient will be calculated via apop_numerical_gradient. Add an apop_mle_settings group to your model to set tuning parameters or select other methods, including the Nelder-Mead simplex, simulated annealing, and root-finding.

Parameters

data	An apop_data set.
dist	The apop_model object: apop_gamma, apop_probit, apop_zipf, &c. You can add an `apop_mle_settings` struct to it (`Apop_model_add_group(your_model, apop_mle, .verbose=1, .method="PR cg", and_so_on)`).

Returns: None, but the input model is modified to include the parameter estimates, &c.

There is auxiliary info in the ->info element of the post-estimation struct. Get elements via, e.g.:
1 apop_model *est = apop_estimate(your_data, apop_probit);
2
3
4 int status = apop_data_get(est->info, .rowname="status");
5 if (status)
6 //trouble
7 else
8 //optimum found
9 apop_data_print(est->parameters); //Here are the estimated parameters

During the search for an optimum, ctrl-C (SIGINT) will halt the search, and the function will return whatever parameters the search was on at the time.

apop_model * apop_ml_impute	(	apop_data *	d,
		apop_model *	mvn
	)

Impute the most likely data points to replace NaNs in the data, and insert them into the given data. That is, the data set is modified in place.

How it works: this uses the machinery for apop_model_fix_params. The only difference is that this searches over the data space and takes the parameter space as fixed, while basic fix params model searches parameters and takes data as fixed. So this function just does the necessary data-parameter switching to make that happen.

Parameters

d	The data set. It comes in with NaNs and leaves entirely filled in.
mvn	A parametrized apop_model from which you expect the data was derived. if `NULL`, then I'll use the Multivariate Normal that best fits the data after listwise deletion.

Returns: An estimated apop_model. Also, the data input will be filled in and ready to use.

apop_model * apop_model_clear	(	apop_data *	data,
		apop_model *	model
	)

Set up the parameters and info elements of the apop_model:

At close, the input model has parameters of the correct size.

This is the default action for apop_prep, and many models with a custom prep routine call apop_model_clear at the end. Also, apop_estimate calls this function internally, which means that you robably never have to call this function directly.
If the model has already been prepped, this function should be a no-op.

Parameters

data	If your params vary with the size of the data set, then the function needs a data set to calibrate against. Otherwise, it's OK to set this to `NULL`.
model	The model whose output elements will be modified.

Returns: A pointer to the same model, should you need it.

Exceptions

outmodel->error=='d' dimension error.

apop_model * apop_model_copy ( apop_model * in )

Outputs a copy of the apop_model input.

Parameters

in	The model to be copied

Returns: A copy of the original. Includes copies of all settings groups, and the parameters (if not NULL, copied via apop_data_copy).

If in.more_size > 0 I memcpy the more pointer from the original data set.
The data set at in->data is not copied, but is also pointed to.

Exceptions

out->error=='a'	Allocation error. In extreme cases, where there aren't even a few hundred bytes available, I will return `NULL`.
out->error=='s'	Error copying settings groups.
out->error=='p'	Error copying parameters or info page; the given apop_data struct may be `NULL` or may have its own `->error` element.

apop_data * apop_model_draws	(	apop_model *	model,
		int	count,
		apop_data *	draws
	)

Make a set of random draws from a model and write them to an apop_data set.

Parameters

model	The model from which draws will be made. Must already be prepared and/or estimated.
count	The number of draws to make. If `draw_matrix` is not `NULL`, then this is ignored and `count=draw_matrix->matrix->size1`. default=1000.
draws	If not `NULL`, a pre-allocated data set whose `matrix` element will be filled with draws.

Returns: An apop_data set with the matrix filled with size draws. If draw_matrix!=NULL, then return a pointer to it.

Exceptions

out->error=='m'	Input model isn't good for making draws: it is `NULL`, or `m->dsize=0`.
out->error=='s'	You gave me a `draws` matrix, but its size is less than the size of a single draw from the data, `model->dsize`.
out->error=='d'	Trouble drawing from the distribution for at least one row. That row is set to all `NAN`.

Prints a warning if you send in a non-NULL apop_data set, but its matrix element is NULL, when apop_opts.verbose>=1.
See also apop_draw, which makes a single draw.
Random numbers are generated using RNGs from apop_rng_get_thread, qv.

Here is a two-line program to draw a different set of ten Standard Normals on every run (provided runs are more than a second apart):

#include <apop.h>
#include <time.h>
int main(){
    apop_opts.rng_seed = time(NULL);
    apop_data_print(
            apop_model_draws(
                apop_model_set_parameters(apop_normal, 0, 1), 
                .count=10, 
            )
    );
}

This function uses the Designated initializers syntax for inputs.

long double apop_model_entropy	(	apop_model *	in,
		int	draws
	)

Calculate the entropy of a model: $\int -\ln(p(x))p(x)dx$ , which is the expected value of $-\ln(p(x))$ .

The default method is to make draws using apop_model_draws, then evaluate the log likelihood at those points using the model's log_likelihood method.

There are a number of routines for specific models, inlcuding the apop_normal and apop_pmf models.

If you want the entropy of a data set, see apop_vector_entropy.
The entropy is calculated using natural logs. If you prefer base-2 logs, just divide by $\ln(2)$ : apop_model_entropy(my_model)/log(2).

Parameters

in	A parameterized apop_model. That is, you have already used apop_estimate or apop_model_set_parameters to estimate/set the model parameters.
draws	If using the default method of making random draws, how many random draws to make (default=1,000)

Sample code:

#include <apop.h>
#define Diff(left, right, eps) Apop_stopif(fabs((left)-(right))>(eps), \
        abort(), 0, "%g is too different from %g (abitrary limit=%g).", \
        (double)(left), (double)(right), eps)
/* The entropy function, like some other functions (including apop_update) has a lookup
 table for known models like the Normal distribution. If the input model has
 \c log_likelihood, \c p, and \c draw functions that are the ones found in \ref
 apop_nomrmal, then use a known calculation to report entropy; else report based on
 random draws from the model.
If we make a copy of the \ref apop_normal model and replace the log likelihood with
a new function that produces identical values, the lookup table will not find the
modified model, and the calculation via random draws will be done. Of course, the
final entropy as calculated using both methods should differ only by a small amount.
*/
long double mask(apop_data *d, apop_model *m){
    return apop_normal->log_likelihood(d, m);
}
int main(){
    for (double i=0.1; i< 10; i+=.2){
        apop_model *n = apop_model_set_parameters(apop_normal, 8, i);
        long double v= apop_model_entropy(n);
        n->log_likelihood = mask;
        long double w= apop_model_entropy(n, 50000);
        Diff(v, w, 5e-2);
    }
}

apop_model * apop_model_fix_params ( apop_model * model_in )

Produce a model based on another model, but with some of the parameters fixed at a given value.

You will send me the model whose parameters you want fixed, with the parameters element set as follows. For the fixed parameters, simply give the values to which they will be fixed. Set the free parameters to NaN.

For example, here is a Binomial distribution with a fixed $n=30$ but $p_1$ allowed to float freely:

 apop_model *bi30 = apop_model_fix_params(apop_model_set_parameters(apop_binomial, 30, NAN));
 Apop_model_add_group(bi30, apop_mle, .starting_pt=(double[]){.5}); // The Binomial doesn't like the
                                                                    //  default starting point of 1.
 apop_model *out = apop_estimate(your_data, bi30);

The output is an apop_model that can be estimated, Bayesian updated, et cetera.

Rather than using this model, you may simply want a now-filled-in copy of the original model. Use apop_model_fix_params_get_base to retrieve the original model's parameters.
The estimate method always uses an MLE, and it never calls the base model's estimate method.
If the input model has an apop_mle_settings group attached, I'll use them for the estimate method. Otherwise, I'll set my own.
If the parameter input has non-NaN values at the free parameters, then I'll use those as the starting point for any MLE search; the defaults for the variables without fixed values starts from 1 as usual.
I do check the more pointer of the parameters for additional pages and NaNs on those pages.

Here is a sample program. It produces a few thousand draws from a Multivariate Normal distribution, and then tries to recover the means given a var/covar matrix fixed at the correct variance.

#include <apop.h>
int main(){
    size_t ct = 5e4;
    //set up the model & params
    apop_data *params = apop_data_falloc((2,2,2), 8,  1, 0.5,
                                                  2,  0.5, 1);
    apop_model *pvm = apop_model_copy(apop_multivariate_normal);
    pvm->parameters = apop_data_copy(params);
    pvm->dsize = 2;
    apop_data *d = apop_model_draws(pvm, ct);
    //set up and estimate a model with fixed covariance matrix but free means
    gsl_vector_set_all(pvm->parameters->vector, GSL_NAN);
    apop_model *mep1 = apop_model_fix_params(pvm);
    apop_model *e1 = apop_estimate(d, mep1);
    
    //compare results
    printf("original params: ");
    apop_vector_print(params->vector);
    printf("estimated params: ");
    apop_vector_print(e1->parameters->vector);
    assert(apop_vector_distance(params->vector, e1->parameters->vector)<1e-2); 
}

Parameters

model_in The base model

Returns: a model that can be used like any other, with the given params fixed or free.

apop_model * apop_model_fix_params_get_base ( apop_model * fixed_model )

The apop_model_fix_params function produces a model that has only the non-fixed parameters of the model. After estimation of the fixed-parameter model, this function fills the parameters element of the base model and returns a pointer to the base model.

void apop_model_free ( apop_model * free_me )

Free an apop_model structure.

The parameters and settings are freed. These are the elements that are copied by apop_model_copy.
The data element is not freed, because the odds are you still need it.
If free_me->more_size is positive, the function runs free(free_me->more). But it has no idea what the more element contains; if it points to other structures (like an apop_data set), you need to free them before calling this function.
If free_me is NULL, this does nothing.

Parameters

free_me A pointer to the model to be freed.

apop_data * apop_model_hessian	(	apop_data *	data,
		apop_model *	model,
		double	delta
	)

Numerically estimate the matrix of second derivatives of the parameter values, via a series of re-evaluations at small differential steps. [Therefore, it may be expensive to do this for a very computationally-intensive model.]

Parameters

data	The apop_data at which the model was estimated (default: `NULL`)
model	The apop_model, with parameters already estimated (no default, must not be `NULL`)
delta	the step size for the differentials. (default: 1e-3, but see below)

Returns: The matrix of estimated second derivatives at the given data and parameter values.

If you do not set delta as an input, I first look for an apop_mle_settings group attached to the input model, and check that for a delta element. If that is also missing, use the default of 1e-3.
This function uses the Designated initializers syntax for inputs.

apop_model * apop_model_metropolis	(	apop_data *	d,
		gsl_rng *	rng,
		apop_model *	m
	)

Use Metropolis-Hastings Markov chain Monte Carlo to make draws from the given model.

The basic storyline is that draws are made from a proposal distribution, and the likelihood of your model given your data and the drawn parameters evaluated. At each step, a new set of proposal parameters are drawn, and if they are more likely than the previous set the new proposal is accepted as the next step, else with probability (prob of new params)/(prob of old params), they are accepted as the next step anyway. Otherwise the last accepted proposal is repeated.

The output is an apop_pmf model with a data set listing the draws that were accepted, including those repetitions. The output model is modified so that subsequent draws are one more step from the Markov chain, via apop_model_metropolis_draw.

Parameters

d	The apop_data set used for evaluating the likelihood of a proposed parameter set.
rng	A `gsl_rng`, probably allocated via apop_rng_alloc. (Default: an RNG from apop_rng_get_thread)
m	The apop_model from which parameters are being drawn. (No default; must not be `NULL`)

Returns: A modified apop_pmf model representing the results of the search. It has a specialized draw method that returns another step from the Markov chain with each draw.

Exceptions

out->error='c' Proposal was outside of a constraint; see below.

If a proposal fails to meet the constraint element of the model you input, then the proposal is thrown out and a new one selected. By the default proposal distribution, this is not mathematically correct (it breaks detailed balance), and values near the constraint will be oversampled. The output model will have outmodel->error=='c'. It is up to you to decide whether the resulting distribution is good enough for your purposes or whether to take the time to write a custom proposal and step function to accommodate the constraint.

Attach an apop_mcmc_settings group to your model to specify the proposal distribution, burnin, and other details of the search. See the apop_mcmc_settings documentation for details.

The default proposal includes an adaptive step: you specify a target accept rate (default: .35), and if the accept rate is currently higher the variance of the proposals is widened to explore more of the space; if the accept rate is currently lower the variance is narrowed to stay closer to the last accepted proposal. Technically, this breaks ergodicity of the Markov chain, but the consensus seems to be that this is not a serious problem. If it does concern you, you can set the base_adapt_fn in the apop_mcmc_settings group to a do-nothing function, or one that damps its adaptation as $n\to\infty$ .
If you have a univariate model, apop_arms_draw may be a suitable simpler alternative.
Note the gibbs_chunks element of the apop_mcmc_settings group. If you set gibbs_chunks='a', all parameters are drawn as a set, and accepted/rejected as a set. The variances are adapted at an identical rate. If you set gibbs_chunks='i', then each scalar parameter is assigned its own proposal distribution, which is adapted at its own pace. With gibbs_chunks='b' (the default), then each of the vector, matrix, and weights of your model's parameters are drawn/accepted/adapted as a block (and so on to additional chunks if your model has ->more pages). This works well for complex models which naturally break down into subsets of parameters.
Each chunk counts as a step in the Markov chain. Therefore, if there are several chunks, you can expect chunks to repeat from step to step. If you want a draw after cycling through all chunks, try using apop_model_metropolis_draw, which has that behavior.
If the likelihood model has NULL parameters, I will allocate them. That means you can use one of the stock models that ship with Apophenia. If I need to run the model's prep routine to get the size of the parameters, then I will make a copy of the likelihood model, run prep, and then allocate parameters for that copy of a model.
On exit, the parameters element of your likelihood model has the last accepted parameter proposal.
If you set apop_opts.verbose=2 or greater, I will report the accept rate of the M-H sampler. It is a common rule of thumb to select a proposal so that this is between 20% and 50%. Set apop_opts.verbose=3 to see the stream of proposal points, their likelihoods, and the acceptance odds. You may want to set apop_opts.log_file=fopen("yourlog", "w") first.

This function uses the Designated initializers syntax for inputs.

int apop_model_metropolis_draw	(	double *	out,
		gsl_rng *	rng,
		apop_model *	model
	)

The draw method for models estimated via apop_model_metropolis.

That method produces an apop_pmf, typically with a few thousand draws from the model in a batch. If you want to get a single next step from the Markov chain, use this.

A Markov chain works by making a new draw and then accepting or rejecting the draw. If the draw is rejected, the last value is reported as the next step in the chain. Users sometimes mitigate this repetition by making a batch of draws (say, ten at a time) and using only the last.

If you run this without first running apop_model_metropolis, I will run it for you, meaning that there will be an initial burn-in period before the first draw that can be reported to you. That run is done using model->data as input.

Parameters

out	An array of `doubles`, which will hold the draw, in the style of apop_draw.
rng	A `gsl_rng`, already initialized, probably via apop_rng_alloc.
model	A model which was probably already run through apop_model_metropolis.

Returns: On return, out is filled with the next step in the Markov chain. The ->data element of the PMF model is extended to include the additional steps in the chain. If a proposal failed the model constraints, then return 1; else return 0. See the notes in the documentation for apop_model_metropolis.

After pulling the attached settings group, the parent model is ignored. One expects that base_model in the mcmc settings group == the parent model.
If your settings break the model parameters into several chunks, this function returns after stepping through all chunks.

apop_data * apop_model_numerical_covariance	(	apop_data *	data,
		apop_model *	model,
		double	delta
	)

Produce the covariance matrix for the parameters of an estimated model via the derivative of the score function at the parameter. I.e., I find the second derivative via apop_model_hessian , and take the negation of the inverse.

I follow Efron and Hinkley in using the estimated information matrix—the value of the information matrix at the estimated value of the score—not the expected information matrix that is the integral over all possible data. See Pawitan 2001 (who cribbed a little off of Efron and Hinkley) or Klemens 2008 (who directly cribbed off of both) for further details.

Parameters

data	The data by which your model was estimated
model	A model whose parameters have been estimated.
delta	The differential by which to step for sampling changes. (default: 1e-3, but see below)

Returns: A covariance matrix for the data. Also, if the data does not have a "<Covariance>" page, I'll set it to the result as well [i.e., I won't overwrite an existing covariance page].

If you do not set delta as an input, I first look for an apop_mle_settings group attached to the input model, and check that for a delta element. If that is also missing, use the default of 1e-3.
This function uses the Designated initializers syntax for inputs.

void apop_model_print	(	apop_model *	model,
		FILE *	output_pipe
	)

Print the results of an estimation for a human to look over.

Parameters

model	The model whose information should be displayed (No default. If `NULL`, print `NULL`)
output_pipe	The output stream. Default: `stdout`. If you'd like something else, use `fopen`. E.g.: 1 FILE *out =fopen("outfile.txt", "w"); //or "a" to append. 2 apop_model_print(the_model, out); 3 fclose(out); //optional in many cases.

The default prints the name, parameters, info, &c. but I check a vtable for alternate methods you define; see Registering new methods in vtables for details. The typedef new functions must conform to and the hash used for lookups are:

 typedef void (*apop_model_print_type)(apop_model *params, FILE *out);
 #define apop_model_print_hash(m1) ((m1)->log_likelihood ? (size_t)(m1)->log_likelihood : \
             (m1)->p ? (size_t)(m1)->p*33 : \
             (m1)->estimate ? (size_t)(m1)->estimate*33*33 : \
             (m1)->draw ? (size_t)(m1)->draw*33*27  : \
             (m1)->cdf ? (size_t)(m1)->cdf*27*27 : 27)

When building a special print method, all output should fprintf to the input FILE* handle. Apophenia's output routines also accept a file handle; e.g., if the file handle is named out, then if the thismodel print method uses apop_data_print to print the parameters, it must do so via a form like apop_data_print(thismodel->parameters, .output_pipe=ap).

Your print method can use both by masking itself for a few lines:

 void print_method(apop_model *in, FILE* ap){
   void *temp = in->estimate;
   in->estimate = NULL;
   apop_model_print(in, ap);
   in->estimate = temp;
 
   printf("Additional info:\n");
   ...
 }

Print methods are intended for human consumption and are subject to change.
This function uses the Designated initializers syntax for inputs.

apop_model * apop_model_to_pmf	(	apop_model *	model,
		apop_data *	binspec,
		long int	draws,
		int	bin_count
	)

Make random draws from an apop_model, and bin them using a binspec in the style of apop_data_to_bins. If you have a data set that used the same binspec, you now have synced histograms, which you can plot or sensibly test hypotheses about.

Parameters

binspec	A description of the bins in which to place the draws; see apop_data_to_bins. (default: as in apop_data_to_bins.)
model	The model to be drawn from. Because this function works via random draws, the model needs to have a `draw` method. (No default)
draws	The number of random draws to make. (arbitrary default = 10,000)
bin_count	If no bin spec, the number of bins to use (default: as per apop_data_to_bins, $\sqrt(N)$ )

Returns: An apop_pmf model, with a new binned data set attached (which you may have to apop_data_free(output_model->data) to prevent memory leaks). The weights on the data set are normalized to sum to one.

This function uses the Designated initializers syntax for inputs.

long double apop_multivariate_gamma	(	double	a,
		int	p
	)

The multivariate generalization of the Gamma distribution.

$\Gamma_p(a)= \pi^{p(p-1)/4}\prod_{j=1}^p \Gamma\left[ a+(1-j)/2\right].$

Because $\Gamma(x)$ is undefined for $x\in\{0, -1, -2, ...\}$ , this function returns NAN when $a+(1-j)/2$ takes on one of those values.

See also apop_multivariate_lngamma, which is more numerically stable in most cases.

long double apop_multivariate_lngamma	(	double	a,
		int	p
	)

The log of the multivariate generalization of the Gamma; see also apop_multivariate_gamma.

int apop_name_add	(	apop_name *	n,
		char const *	add_me,
		char	type
	)

Adds a name to the apop_name structure. Puts it at the end of the given list.

Parameters

n	An existing, allocated apop_name structure.
add_me	A string. If `NULL`, do nothing; return -1.
type	'r': add a row name 'c': add a matrix column name 't': add a text column name 'h': add a title (i.e., a header). 'v': add (or overwrite) the vector name

Returns: Returns the number of rows/cols/depvars after you have added the new one. But if add_me is NULL, return -1.

apop_name * apop_name_alloc ( void )

Allocates a name structure

Returns: An allocated, empty name structure. In the very unlikely event that malloc fails, return NULL.

Because apop_data_alloc uses this to set up its output, you will rarely if ever need to call this function explicitly. You may want to use it if wrapping a gsl_matrix into an apop_data set. For example, to put a title on a vector:

 apop_data *d = &(apop_data){.vector=your_vector, .names=apop_name_alloc()};
 apop_name_add(d->names, "A column of numbers", 'v');
 apop_data_print(d);
 
 ...
 apop_name_free(d->names); //but d itself is auto-allocated; no need to free it.

apop_name * apop_name_copy ( apop_name * in )

Copy one apop_name structure to another. That is, all data is duplicated.

Used internally by apop_data_copy, but sometimes useful by itself. For example, say that we have an apop_data struct named d and a gsl_matrix of the same dimensions named m; we could give m the labels from d for printing:

 apop_data *wrapped = &(apop_data){.matrix=m, .names=apop_name_copy(d)};
 apop_data_print(wrapped);
 apop_name_free(wrapped->names); //wrapped itself is auto-allocated; do not free.

Parameters

in	The input names

Returns: A apop_name struct with copies of all input names.

int apop_name_find	(	const apop_name *	n,
		const char *	name,
		const char	type
	)

Finds the position of an element in a list of names.

The function uses POSIX's strcasecmp, and so does case-insensitive search the way that function does.

Parameters

n	the apop_name object to search.
name	the name you seek; see above.
type	`'c'` (=column), `'r'` (=row), or `'t'` (=text). Default is `'c'`.

Returns: The position of findme. If 'c', then this may be -1, meaning the vector name. If not found, returns -2. On error, e.g. name==NULL, returns -2.

void apop_name_free ( apop_name * free_me )

Free the memory used by an apop_name structure.

void apop_name_print ( apop_name * n )

Prints the given list of names to stdout. Useful for debugging.

Parameters

n	The apop_name structure

void apop_name_stack	(	apop_name *	n1,
		apop_name *	nadd,
		char	type1,
		char	typeadd
	)

Append one list of names to another.

If the first list is empty, then this is a copy function.

Parameters

n1	The first set of names (no default, must not be `NULL`)
nadd	The second set of names, which will be appended after the first. (no default. If `NULL`, a no-op.)
type1	Either 'c', 'r', 't', or 'v' stating whether you are merging the columns, rows, text, or vector. If 'v', then ignore `typeadd` and just overwrite the target vector name with the source name. (default: 'r')
typeadd	Either 'c', 'r', 't', or 'v' stating whether you are merging the columns, rows, or text. If 'v', then overwrite the target with the source vector name. (default: type1)

gsl_vector * apop_numerical_gradient	(	apop_data *	data,
		apop_model *	model,
		double	delta
	)

A wrapper around the GSL's one-dimensional gsl_deriv_central to find a numeric differential for each dimension of the input apop_model's log likelihood (or p if log_likelihood is NULL).

Parameters

data	The apop_data set to use for all evaluations.
model	The apop_model, expressing the function whose derivative is sought. The gradient is taken via small changes along the model parameters.
delta	The size of the differential. (default: 1e-3, but see below)

1 gsl_vector *gradient = apop_numerical_gradient(data, your_parametrized_model);

If you do not set delta as an input, I first look for an apop_mle_settings group attached to the input model, and check that for a delta element. If that is also missing, use the default of 1e-3.
This function uses the Designated initializers syntax for inputs.

double apop_p	(	apop_data *	d,
		apop_model *	m
	)

Find the probability of a data/parametrized model pair.

Parameters

d	The data
m	The parametrized model, which must have either a `log_likelihood` or a `p` method.

apop_data * apop_paired_t_test	(	gsl_vector *	a,
		gsl_vector *	b
	)

Answers the question: with what confidence can I say that the mean difference between the two columns is zero?

If apop_opts.verbose >=2, then display some information, like the mean/var/count for both vectors and the t statistic, to stderr.

Parameters

a	A column of data
b	A matched column of data

Returns: an apop_data set with the following elements: mean left - right: the difference in means; if positive, first vector has larger mean, and one-tailed test is testing , else reverse if negative.
t statistic: used for the test
df: degrees of freedom
p value, 1 tail: the p-value for a one-tailed test that one vector mean is greater than the other.
confidence, 1 tail: 1- p value.
p value, 2 tail: the p-value for the two-tailed test that left mean = right mean.
confidence, 2 tail: 1-p value

See also: apop_t_test for an example, and for when the element-by-element difference between the vectors has no sensible interpretation.

apop_model * apop_parameter_model	(	apop_data *	d,
		apop_model *	m
	)

Get a model describing the distribution of the given parameter estimates.

For many models, the parameter estimates are well-known, such as the $t$ -distribution of the parameters for OLS.

For models where the distribution of $\hat{p}$ is not known, if you give me data, I will return an apop_normal or apop_multivariate_normal model, using the parameter estimates as mean and apop_bootstrap_cov for the variances.

If you don't give me data, then I will assume that this is a stochastic model where re-running the model will produce different parameter estimates each time. In this case, I will run the model 1e4 times and return a apop_pmf model with the resulting parameter distributions.

Before calling this, I expect that you have already run apop_estimate to produce $\hat{p}$ .

The apop_pm_settings structure dictates details of how the model is generated. For example, if you want only the distribution of the third parameter, and you know the distribution will be a PMF generated via random draws, then set settings and call the model via:

1 apop_model_group_add(your_model, apop_pm, .index =3, .draws=3e5);

2 apop_model *dist = apop_parameter_model(your_data, your_model);

Some useful parts of apop_pm_settings:

index gives the position of the parameter (in apop_data_pack order) in which you are interested. Thus, if this is zero or more, then you will get a univariate output distribution describing a single parameter. If index == -1, then I will give you the multivariate distribution across all parameters. The default is zero (i.e. the univariate distribution of the zeroth parameter).
draws If there is no closed-form solution and bootstrap is inappropriate, then the last resort is a large numbr of random draws of the model, summarized into a PMF. Default: 1,000 draws.
rng If the method requires random draws, then use this. If you provide NULL and one is needed, I provide one for you via apop_rng_get_thread.

The default is via resampling as above, but special-case calculations for certain models are held in a vtable; see Registering new methods in vtables for details. The typedef new functions must conform to and the hash used for lookups are:

1 typedef apop_model* (*apop_parameter_model_type)(apop_data *, apop_model *);

2 #define apop_parameter_model_hash(m1) ((size_t)((m1).log_likelihood ? (m1).log_likelihood : (m1).p)*33 + (m1).estimate ? (size_t)(m1).estimate: 27)

apop_data * apop_predict	(	apop_data *	d,
		apop_model *	m
	)

A prediction supplies E(a missing value | original data, already-estimated parameters, and other supplied data elements ).

For a regression, one would first estimate the parameters of the model, then supply a row of predictors X. The value of the dependent variable $y$ is unknown, so the system would predict that value.

For a univariate model (i.e. a model in one-dimensional data space), there is only one variable to omit and fill in, so the prediction problem reduces to the expected value: E(a missing value | original data, already-estimated parameters). [In some models, this may not be the expected value, but is a best value for the missing item using some other meaning of `best'.]

In other cases, prediction is the missing data problem: for three-dimensional data, you may supply the input (34, NaN, 12), and the parameterized model provides the most likely value of the middle parameter given the parameters and known data.

If you give me a NULL data set, I will assume you want all values filled in, for most models with the expected value.

If you give me data with NaNs, I will take those as the points to be predicted given the provided data.

If the model has no predict method, the default is to use the apop_ml_impute function to do the work. That function does a maximum-likelihood search for the best parameters.

Returns: If you gave me a non-NULL data set, I will return that, with the NaNs filled in. If NULL input, I will allocate an apop_data set and fill it with the expected values.

There may be a second page (i.e., a apop_data set attached to the ->more pointer of the main) listing confidence and standard error information. See your specific model documentation for details.

Special-case calculations for certain models are held in a vtable; see Registering new methods in vtables for details. The typedef new functions must conform to and the hash used for lookups are:

1 typedef apop_data * (*apop_predict_type)(apop_data *d, apop_model *params);

2 #define apop_predict_hash(m1) ((size_t)((m1).log_likelihood ? (m1).log_likelihood : (m1).p)*33 + (m1).estimate ? (size_t)(m1).estimate: 27)

void apop_prep	(	apop_data *	d,
		apop_model *	m
	)

Allocate and initialize the parameters, info, and other requisite parts of a apop_model.

Some models have associated prep routines that also attach settings groups to the model, and set up additional special-case functions in vtables.

The input model is modified in place.
If called repeatedly, subsequent calls to apop_prep are no-ops. Thus, a model can not be re-prepped using a new data set or other conditions.
The default prep is to simply call apop_model_clear. If the input apop_model has a prep method, then that gets called instead.

int apop_prep_output	(	char const *	output_name,
		FILE **	output_pipe,
		char *	output_type,
		char *	output_append
	)

If you're reading this, it is probably because you were referred by another function that uses this internally. You should never call this function directly, but do read this documentation.

There are four settings that affect how output happens, which can be set when you call the function that sent you to this documentation, e.g:

1 apop_data_print(your_data, .output_type ='f', .output_append = 'w');

Parameters

output_name	The name of the output file, if any. For a database, the table to write.
output_pipe	If you have already opened a file and have a `FILE*` on hand, use this instead of giving the file name.
output_type	`'p'` = pipe, `'f'=` file, `'d'` = database
output_append	`'a'` = append (default), `'w'` = write over.

At the end, output_name, output_pipe, and output_type are all set. Notably, the local output_pipe will have the correct location for the calling function to fprintf to.

See legi for more discussion.

The default is output to stdout. For example,
1 apop_data_print(your_data);
2 //is equivalent to
3 apop_data_print(your_data, .output_type='p', .output_pipe=stdout);

Tip: if writing to the database, you can get a major speed boost by wrapping the call in a begin/commit wrapper:

 apop_query("begin;");
 apop_data_print(your_data, .output_name="dbtab", .output_type='d');
 apop_query("commit;");

apop_data * apop_rake	(	char const *	margin_table,
		char const	var_list,
		int	var_ct,
		char const	contrasts,
		int	contrast_ct,
		char const *	structural_zeros,
		int	max_iterations,
		double	tolerance,
		char const *	count_col,
		char const *	init_table,
		char const *	init_count_col,
		double	nudge
	)

Fit a log-linear model via iterative proportional fitting, aka raking.

Raking has many uses. The Modeling with Data blog presents a series of discussions of uses of raking, including some worked examples.

Or see Wikipedia for an overview of Log linear models, aka Poisson regressions. One approach toward log-linear modeling is a regression form; let there be four categories, A, B, C, and D, from which we can produce a model positing, for example, that cell count is a function of a form like $g_1(A) + g_2(BC) + g_3(CD)$ . In this case, we would assign a separate coefficient to every possible value of A, every possible value of (B, C), and every value of (C, D). Raking is the technique that searches for that large set of parameters.

The combinations of categories that are considered to be relevant are called contrasts, after ANOVA terminology of the 1940s.

The other constraint on the search are structural zeros, which are values that you know can never be non-zero, due to field-specific facts about the variables. For example, U.S. Social Security payments are available only to those age 65 or older, so "age <65 and gets_soc_security=1" is a structural zero.

Because there is one parameter for every combination, there may be millions of parameters to estimate, so the search to find the most likely value requires some attention to technique. For over half a century, the consensus method for searching has been raking, which iteratively draws each category closer to the mean in a somewhat simple manner (this was first developed circa 1940 and had to be feasible by hand), but which is guaranteed to eventually arrive at the maximum likelihood estimate for all cells.

Another complication is that the table is invariably sparse. One can easily construct tables with millions of cells, but the corresponding data set may have only a few thousand observations.

This function uses the database to resolve the sparseness problem. It constructs a query requesting all combinations of categories the could possibly be non-zero after raking, given all of the above constraints. Then, raking is done using only that subset. This means that the work is done on a number of cells proportional to the number of data points, not to the full cross of all categories. Set apop_opts.verbose to 2 or greater to show the query on stderr.

One could use raking to generate `fully synthetic' data: start with observation-level data in a margin table. Begin the raking with a starting data set of all-ones. Then rake until the all-ones set transforms into something that conforms to the margins and (if any) structural zeros. You now have a data set which matches the marginal totals but does not use any other information from the observation-level data. If you do not specify an .init_table, then an all-ones default table will be used.

Parameters

margin_table	The name of the table in the database to use for calculating the margins. The table should have one observation per row. (No default)
var_list	The full list of variables to search. A list of strings, e.g., `(char *[]){"var1", "var2", ..., "var15"}`
var_ct	The count of the full list of variables to search.
contrasts	The contrasts describing your model. Like the `var_list` input, a list of strings like `(char *[]){"var1", "var7", "var13"}` contrast is a pipe-delimited list of variable names. (No default)
contrast_ct	The number of contrasts in the list of contrasts. (No default)
structural_zeros	a SQL clause indicating combinations that can never take a nonzero value. This will go into a `where` clause, so anything you could put there is OK, e.g. "age <65 and gets_soc_security=1 or age <15 and married=1". Your margin data is not checked for structural zeros. (default: no structural zeros)
max_iterations	Number of rounds of raking at which the algorithm halts. (default: 1000)
tolerance	I calculate the change for each cell from round to round; if the largest cell change is smaller than this, I stop. (default: 1e-5)
count_col	This column gives the count of how many observations are represented by each row. If `NULL`, ech row represents one person. (default: `NULL`)
init_table	The default is to initially set all table elements to one and then rake from there. This is effectively the `fully synthetic' approach, which uses only the information in the margins and derives the data set closest to the all-ones data set that is consistent with the margins. Care is taken to maintan sparsity in this case. If you specify an `init_table`, then I will get the initial cell counts from it. (default: the fully-synthetic approach, using a starting point of an all-ones grid.)
init_count_col	The column in `init_table` with the cell counts.
nudge	There is a common hack of adding a small value to every zero entry, because a zero entry will always scale to zero, while a small value could eventually scale to anything. Recall that this function works on sparse sets, so I first filter out those cells that could possibly have a nonzero value given the observations, then I add `nudge` to any zero cells within that subset.

Returns: An apop_data set where every row is a single combination of variable values and the weights vector gives the most likely value for each cell.

Exceptions

out->error='i'	Input was somehow wrong.
out->error='c'	Raking did not converge, reached max. iteration count.

Set apop_opts.verbose=3 to see the intermediate tables at the end of each round of raking.
If you want all cells to have nonzero value, then you can do that via pre-processing:
1 apop_query("update data_table set count_col = 1e-3 where count_col = 0");
This function is thread-safe. To make this happen, temp database tables are named using a number built with omp_get_thread_num.
This function uses the Designated initializers syntax for inputs.

int apop_regex	(	const char *	string,
		const char *	regex,
		apop_data **	substrings,
		const char	use_case
	)

Extract subsets from a string via regular expressions.

This function takes a regular expression and repeatedly applies it to an input string. It returns the count of matches, and optionally returns the matches themselves organized into the text grid of an apop_data set.

There are three common flavors of regular expression: Basic, Extended, and Perl-compatible (BRE, ERE, PCRE). I use EREs, as per the specs of your C library, which should match POSIX's ERE specification.

For example, "p.val" will match "P value", "p.value", "p values" (and even "tempeval", so be careful).

If you give a non-NULL address in which to place a table of paren-delimited substrings, I'll return them as a row in the text element of the returned apop_data set. I'll return all the matches, filling the first row with substrings from the first application of your regex, then filling the next row with another set of matches (if any), and so on to the end of the string. Useful when parsing a list of items, for example.

Parameters

string	The string to search (no default)
regex	The regular expression (no default)
substrings	Parens in the regex indicate that I should return matching substrings. Give me the address of an apop_data* set, and I will allocate and fill the text portion with matches. Default= `NULL`, meaning do not return substrings (even if parens exist in the regex). If no match, return an empty apop_data set, so `output->textsize[0]==0`.
use_case	Should I be case sensitive, `'y'` or `'n'`? (default = `'n'`, which is not the POSIX default.)

Returns: Count of matches found. 0 == no match. substrings may be allocated and filled if needed.

If apop_opts.stop_on_warning='n' returns -1 on error (e.g., regex NULL or didn't compile).
If strings==NULL, I return 0—no match—and if substrings is provided, set it to NULL.

Here is the test function. Notice that the substring-pulling function call passes &subs, not plain subs.

#include <apop.h>
int main(){
    char string1[] = "Hello. I am a string.";
    assert(apop_regex(string1, "hell"));
    apop_data *subs;
    apop_regex(string1, "(e).*I.*(xxx)*(am)", .substrings = &subs);
    //apop_data_show(subs);
    assert(!strcmp(subs->text[0][0], "e"));
    assert(!strlen(subs->text[0][1])); //The non-match to (xx)* has a zero-length blank
    assert(!strcmp(subs->text[0][2], "am"));
    apop_data_free(subs);
    //Split a comma-delimited list, throwing out white space.
    //Notice that the regex includes only one instance of a non-comma blob 
    //ending in a non-space followed by a comma, but the function keeps 
    //applying it until the end of string.
    char string2[] = " one, two , three ,four";
    apop_regex(string2, " *([^,]*[^ ]) *(,|$) *", &subs);
    assert(!strcmp(*subs->text[0], "one"));
    assert(!strcmp(*subs->text[1], "two"));
    assert(!strcmp(*subs->text[2], "three"));
    assert(!strcmp(*subs->text[3], "four"));
    apop_data_free(subs);
    //Get a parenthetical. For EREs, \( \) match plain parens in the text.
    char string3[] = " one (but secretly, two)";
    apop_regex(string3, "(\\([^)]*\\))", &subs);
    assert(!strcmp(*subs->text[0], "(but secretly, two)"));
    apop_data_free(subs);
    //NULL input string ==> no-op.
    int match_count = apop_regex(NULL, " *([^,]*[^ ]) *(,|$) *", &subs);
    assert(!match_count);
    assert(!subs);
}

Each set of matches will be one row of the output data. E.g., given the regex ([A-Za-z])([0-9]), the column zero of outdata will hold letters, and column one will hold numbers. Use apop_data_transpose to reverse this so that the letters are in outdata->text[0] and numbers in outdata->text[1].

gsl_rng * apop_rng_alloc ( int seed )

Initialize a gsl_rng.

Uses the Tausworth routine.

Parameters

seed	The seed. No need to get funny with it: 0, 1, and 2 will produce wholly different streams.

Returns: The RNG ready for your use.

If you are confident that your code is debugged and would like a new stream of values every time your program runs (provided your runs are more than a second apart), seed with the time:

#include <apop.h>
#include <time.h>
int main(){
    apop_opts.rng_seed = time(NULL);
    apop_data_print(
            apop_model_draws(
                apop_model_set_parameters(apop_normal, 0, 1), 
                .count=10, 
            )
    );
}

double apop_rng_GHgB3	(	gsl_rng *	r,
		double *	a
	)

RNG from a Generalized Hypergeometric type B3.

Devroye uses this as the base for many of his distribution-generators, including the Waring.

If one of the inputs is <=0, error; return NaN and print a warning.

void apop_score	(	apop_data *	d,
		gsl_vector *	out,
		apop_model *	m
	)

Find the vector of first derivatives (aka the gradient) of the log likelihood of a data/parametrized model pair.

On input, the model m must already be sufficiently prepped that the log likelihood can be evaluated; see p, log_likelihood for details.

On output, the gsl_vector input to the function will be filled with the gradients (or NaNs on errors). If the model parameters have a more complex shape than a simple vector, then the vector will be in apop_data_pack order; use apop_data_unpack to reformat to the preferred shape.

Parameters

d	The apop_data set at which the score is being evaluated.
out	The score to be returned. I expect you to have allocated this already.
m	The parametrized model, which must have either a `log_likelihood` or a `p` method.

The default is to use apop_numerical_gradient, but special-case calculations for certain models are held in a vtable; see Registering new methods in vtables for details. The typedef new functions must conform to and the hash used for lookups are:

1 typedef void (*apop_score_type)(apop_data *d, gsl_vector *gradient, apop_model *m);

2 #define apop_score_hash(m1) ((size_t)((m1).log_likelihood ? (m1).log_likelihood : (m1).p))

apop_data * apop_t_test	(	gsl_vector *	a,
		gsl_vector *	b
	)

Answers the question: with what confidence can I say that the means of these two columns of data are different?

If apop_opts.verbose is >=1, then display some information to stdout, like the mean/var/count for both vectors and the t statistic.

Parameters

a	one column of data
b	another column of data

Returns: an apop_data set with the following elements: mean left - right: the difference in means; if positive, first vector has larger mean, and one-tailed test is testing , else reverse if negative.
t statistic: used for the test
df: degrees of freedom
p value, 1 tail: the p-value for a one-tailed test that one vector mean is greater than the other.
confidence, 1 tail: 1- p value.
p value, 2 tail: the p-value for the two-tailed test that left mean = right mean.
confidence, 2 tail: 1-p value

Example usage:

 gsl_vector *L = apop_query_to_vector("select * from data where sex='M'");
 gsl_vector *R = apop_query_to_vector("select * from data where sex='F'");
 apop_data *test_out = apop_t_test(L, R);
 printf("Reject the null hypothesis of no difference between M and F with %g%% confidence\n", apop_data_get(test_out, .rowname="confidence, 2 tail"));

See also: apop_paired_t_test, which answers the question: with what confidence can I say that the mean difference between the two columns is zero?

int apop_table_exists	(	char const *	name,
		char	remove
	)

Check for the existence of a table, and maybe delete it.

Recreating a table which already exists can cause errors, so it is good practice to check for existence first. Also, this is the stylish way to delete a table, since just calling "drop table" will give you an error if the table doesn't exist.

Parameters

name	the table name (no default)
remove	'd' ==>delete table so it can be recreated in main. 'n' ==>no action. Return result so program can continue. (default)

Returns: 0 = table does not exist
1 = table was found, and if remove=='d', has been deleted -1 = processing error

In the SQLite engine, this function considers table views to be tables.
This function uses the Designated initializers syntax for inputs.

double apop_test	(	double	statistic,
		char *	distribution,
		double	p1,
		double	p2,
		char	tail
	)

This is a convenience function to do the lookup of a given statistic along a given distribution. You give me a statistic, its (hypothesized) distribution, and whether to use the upper tail, lower tail, or both. I will return the odds of a Type I error given the model—in statistician jargon, the $p$ -value. [Type I error: odds of rejecting the null hypothesis when it is true.]

For example,

1 apop_test(1.3);

will return the density of the standard Normal distribution that is more than 1.3 from zero. If this function returns a small value, we can be confident that the statistic is significant. Or,

1 apop_test(1.3, "t", 10, .tail='u');

will give the appropriate odds for an upper-tailed test using the $t$ -distribution with 10 degrees of freedom (e.g., a $t$ -test of the null hypothesis that the statistic is less than or equal to zero).

Several more distributions are supported; see below.

For a two-tailed test (the default), this returns the density outside the range. I'll only do this for symmetric distributions.
For an upper-tail test ('u'), this returns the density above the cutoff
For a lower-tail test ('l'), this returns the density below the cutoff

Parameters

statistic	The scalar value to be tested.
distribution	The name of the distribution; see below.
p1	The first parameter for the distribution; see below.
p2	The second parameter for the distribution; see below.
tail	'u' = upper tail; 'l' = lower tail; anything else = two-tailed. (default = two-tailed)

Returns: The odds of a Type I error given the model (the -value).

Here are the distributions you can use and their parameters.

"normal" or "gaussian"

p1= $\mu$ , p2= $\sigma$
default (0, 1)

"lognormal"

p1= $\mu$ , p2= $\sigma$
default (0, 1)
Remember, $\mu$ and $\sigma$ refer to the Normal one would get after exponentiation
One-tailed tests only

"uniform"

p1=lower edge, p2=upper edge
default (0, 1)
two-tailed tests are run relative to the center, (p1+p2)/2.

"t"

p1=df
no default

"chi squared", "chi", "chisq":

p1=df
no default
One-tailed tests only; default='u' ( -value for typical cases)

"f"

p1=df1, p2=df2
no default
One-tailed tests only

This function uses the Designated initializers syntax for inputs.

apop_data * apop_test_anova_independence ( apop_data * d )

Run a Chi-squared test on an ANOVA table, i.e., an NxN table with the null hypothesis that all cells are equally likely.

Parameters

d	The input data, which is a crosstab of various elements. They don't have to sum to one.

Returns: A apop_data set including elements named "chi squared statistic", "df", and "p value". Retrieve via, e.g., apop_data_get(out, .rowname="p value").

See also: apop_test_fisher_exact

apop_data * apop_test_fisher_exact ( apop_data * intab )

Run the Fisher exact test on an input contingency table.

Returns: An apop_data set with two rows:
"probability of table": Probability of the observed table for fixed marginal totals.
"p value": Table p-value. The probability of a more extreme table, where `extreme' is in a probabilistic sense.

If there are processing errors, these values will be NaN.

Exceptions

out->error=='p' Processing error in the test.

For example:

#include <apop.h>
int main() {
    /* This test is thanks to Nick Eriksson, who sent it to me in the form of a bug report. */
    apop_data * testdata = apop_data_falloc((2, 3),
                              30, 50, 45, 
                              34, 12, 17 );
    apop_data * t2 = apop_test_fisher_exact(testdata);
    assert(fabs(apop_data_get(t2,.rowname="p value") - 0.0001761) < 1e-6);
}

apop_data * apop_test_kolmogorov	(	apop_model *	m1,
		apop_model *	m2
	)

Run the Kolmogorov-Smirnov test to determine whether two distributions are identical.

Parameters

m1	A sorted PMF model. I.e., a model estimated via something like `apop_model *m1 = apop_estimate(apop_data_sort(input_data), apop_pmf);`
m2	Another apop_model. If it is a PMF, then I will use a two-sample test, which is different from the one-sample test used if this is not a PMF.

Returns: An apop_data set including the -value from the Kolmogorov-Smirnov test that the two distributions are equal.

Exceptions

out->error='m' Model error: m1 is not an apop_pmf. I verify this by checking whether m1->cdf == apop_pmf->cdf.

If you are using a apop_pmf model, the data set(s) must be sorted before you set up the model, as per the example below. See apop_data_sort and the discussion of CDFs in the apop_pmf documentation. If you don't do this, the test will almost certainly reject the null hypothesis that m1 and m2 are identical. A future version of Apophenia may implement a mechanism to allow this function to test for sorted data, but it currently can't.

Here is an example, which tests whether a set of draws from a Normal(0, 1) matches a sequence of Normal distributions with increasing mean.

#include <apop.h>
//This program finds the p-value of a K-S test between
//500 draws from a N(0, 1) and a N(x, 1), where x grows from 0 to 1.
apop_model * model_to_pmfs(apop_model *m1, int size){
    apop_data *outd1 = apop_model_draws(m1, size);
    return apop_estimate(apop_data_sort(outd1), apop_pmf);
}
int main(){
    apop_model *n1 = apop_model_set_parameters(apop_normal, 0, 1);
    apop_model *pmf1 = model_to_pmfs(n1, 5e2);
    apop_data *ktest;
    //first, there should be zero divergence between a PMF and itself:
    apop_model *pmf2 = apop_model_copy(pmf1);
    ktest = apop_test_kolmogorov(pmf1, pmf2);
    double pval = apop_data_get(ktest, .rowname="p value, 2 tail");
    assert(pval > .999);
    //as the mean m drifts, the pval for a comparison
    //between a N(0, 1) and N(m, 1) gets smaller.
    printf("mean\tpval\n");
    double prior_pval = 18;
    for(double i=0; i<= .6; i+=0.2){
        apop_model *n11 = apop_model_set_parameters(apop_normal, i, 1);
        ktest = apop_test_kolmogorov(pmf1, n11);
        apop_data_print(ktest, NULL);
        double pval = apop_data_get(ktest, .rowname="p value, 2 tail");
        assert(pval < prior_pval);
        printf("%g\t%g\n", i, pval);
        prior_pval = pval;
    }
    apop_model_free(pmf1);
}

apop_data * apop_text_alloc	(	apop_data *	in,
		const size_t	row,
		const size_t	col
	)

This allocates or resizes the text element of an apop_data set.

If the text element already exists, then this is effectively a realloc function, reshaping to the size you specify.

Parameters

in	An apop_data set. It's OK to send in `NULL`, in which case an apop_data set with `NULL` `matrix` and `vector` elements is returned.
row	the number of rows of text.
col	the number of columns of text.

Returns: A pointer to the relevant apop_data set. If the input was not NULL, then this is a repeat of the input pointer.

Exceptions

out->error=='a' Allocation error.

void apop_text_free	(	char ***	freeme,
		int	rows,
		int	cols
	)

Free a matrix of chars* (i.e., a char***). This is what apop_data_free uses internally to deallocate the text element of an apop_data set. You may never need to use it directly.

Sample usage:

1 apop_text_free(yourdata->text, yourdata->textsize[0], yourdata->textsize[1]);

char * apop_text_paste	(	apop_data const *	strings,
		char *	between,
		char *	before,
		char *	after,
		char *	between_cols,
		apop_fn_riip	prune,
		void *	prune_parameter
	)

Join together the text grid of an apop_data set into a single string.

For example, say that we have a data set with some text: row 0 has "a0", "b0", "c0"; row 2 has "a1", "b1", "c1"; and so on. We would like to produce

 insert into tab values ('a0', 'b0', 'c0');
 insert into tab values ('a1', 'b1', 'c1');
 ...

This could be sent to an SQL engine to copy the data to a database (but this is just an example for demonstration—use apop_data_print to write to a database table).

To construct this single string from the text grid, we would need to add:

before the text, Insert into tab values ('.
between each element on a row: ', '
between rows: '); \ninsert into tab values('
at the tail end: ');'

Thus, do the conversion via:

 char *insert_string = apop_text_paste(indata,
     .before="Insert into tab values ('",
     .between="', '",
     .between_cols="'); \\ninsert into tab values(',
     .after="');'"
 );

Parameters


  
    strings An apop_data set with a grid of text to be combined into a single string 
    between The text to put in between the rows of the table, such as ", ". (Default is a single space: " ") 
    before The text to put at the head of the string. For the query example, this would be .before="select ". (Default: NULL) 
    after The text to put at the tail of the string. For the query example, .after=" from data_table". (Default: NULL) 
    between_cols The text to insert between columns of text. See below for an example (Default is set to equal .between) 
    prune If you don't want to use the entire text set, you can provide a function to indicate which elements should be pruned out. Some examples:     1 //Just use column 3
    2 int is_not_col_3(apop_data *indata, int row, int col, void *ignore){
    3     return col!=3;
    4 }
    5 
    6 //Jump over blanks as if they don't exist.
    7 int is_blank(apop_data *indata, int row, int col, void *ignore){
    8     return strlen(indata->text[row][col])==0;
    9 }
 
    prune_parameter A void pointer to pass to your prune function.


Returns
A single string with the elements of the strings table joined as per your specification. Allocated by the function, to be freed by you if desired.

If the table of strings is NULL or has no text, the output string will have only the .before and .after parts with nothing in between. 
if  apop_opts.verbose >=3, then print the pasted text to stderr. 
It is sometimes useful to use Apop_r and Apop_rs to get a view of only one or a few rows in conjunction with this function.


This function uses the Designated initializers syntax for inputs.

This sample snippet generates the SQL for a query using a list of column names (where the query begins with select , ends with from datatab, and has commas in between each element), re-processes the same list to produce the head of an HTML table, then produces the body of the table with the query result.

#include <apop.h>
int main(){
    apop_query("create table datatab(name, age, sex);"
                "insert into datatab values ('Alex', 23, 'm');"
                "insert into datatab values ('Alex', 32, 'f');"
                "insert into datatab values ('Michael', 41, 'f');"
                "insert into datatab values ('Michael', 14, 'm');");
    apop_data *cols = apop_text_alloc(NULL, 3, 1);
    apop_text_set(cols, 0, 0, "name");
    apop_text_set(cols, 1, 0, "age");
    apop_text_set(cols, 2, 0, "sex");
    char *query= apop_text_paste(cols, .before="select ", .between=", ");
    apop_data *d = apop_query_to_text("%s from datatab", query);
    char *html_head = apop_text_paste(cols, .before="<table><tr><td>",
                                .between="</td><td>", .after="</tr>\n<tr><td>");
    char *html_table = apop_text_paste(d, .before=html_head, .after="</td></tr></table>\n",
                                .between="</tr>\n<tr><td>", .between_cols="</td><td>");
    FILE *outfile = fopen("yourdata.html", "w");
    fprintf(outfile, "%s", html_table);
    fclose(outfile);
}

int apop_text_set	(	apop_data *	in,
		const size_t	row,
		const size_t	col,
		const char *	fmt,
			...
	)

Add a string to the text element of an apop_data set. If you send me a NULL string, I will write the value of apop_opts.nan_string in the given slot. If there is already something in that slot, that string is freed, preventing memory leaks.

Parameters

in	The apop_data set, that already has an allocated `text` element.
row	The row
col	The column
fmt	The text to write.
...	You can use a printf-style fmt and follow it with the usual variables to fill in.

Returns: 0=OK, -1=error (probably out-of-bounds)

UTF-8 or ASCII text is correctly handled.
Apophenia follows a general rule of not reallocating behind your back: if your text matrix is currently of size (3,3) and you try to put an item in slot (4,4), then I display an error rather than reallocating the text matrix.
The string added is a copy (via asprintf), not a pointer to the input(s).
If there had been a string at the grid point you are writing to, the old one is freed to prevent leaks. Remember this if you had other pointers aliasing that string.
If an element is NULL, write apop_opts.nan_string at that point. You may prefer to use "" to express a blank.
apop_text_alloc will reallocate to a new size if you need. For example, this code will fill the diagonals of the text array with a message, resizing as it goes:

 apop_data *list = (something already allocated.);
 for (int n=0; n < 10; n++){
     apop_text_alloc(list, n+1, n+1);
     apop_text_set(list, n, n, "This is cell (%i, %i)", n, n);
 }

apop_data * apop_text_to_data	(	char const *	text_file,
		int	has_row_names,
		int	has_col_names,
		int const *	field_ends,
		char const *	delimiters
	)

Read a delimited or fixed-wisdth text file into the matrix element of an apop_data set.

See Input text file formatting.

See also apop_text_to_db, which handles text data, and may othewise be a perferable approach to data management.

Parameters

text_file	= "-" The name of the text file to be read in. If "-" (the default), use stdin.
has_row_names	Does the lines of data have row names? `'y'` =yes; `'n'` =no (default: 'n')
has_col_names	Is the top line a list of column names? See Input text file formatting for notes on dimension (default: 'y')
field_ends	If fields have a fixed size, give the end of each field, e.g. `.field_ends=(int[]){3, 8 11}`. (default: `NULL`, indicating not fixed width)
delimiters	A string listing the characters that delimit fields. (default: `"\|,\t"`)

Returns: Returns an apop_data set.

Exceptions

out->error=='a'	allocation error
out->error=='t'	text-reading error

example: See apop_ols.

This function uses the Designated initializers syntax for inputs.

int apop_text_to_db	(	char const *	text_file,
		char *	tabname,
		int	has_row_names,
		int	has_col_names,
		char **	field_names,
		int const *	field_ends,
		apop_data *	field_params,
		char *	table_params,
		char const *	delimiters,
		char	if_table_exists
	)

Read a delimited or fixed-width text file into a database table. See Input text file formatting.

For purely numeric data, you may be able to bypass the database by using apop_text_to_data.

See the apop_ols page for an example that uses this function to read in sample data (also listed on that page).

Apophenia ships with an apop_text_to_db command-line utility, which is a wrapper for this function.

Especially if you are using a pre-2007 version of SQLite, there may be a speedup to putting this function in a begin/commit wrapper:

 apop_query("begin;");
 apop_data_print(dataset, .output_name="dbtab", .output_type='d');
 apop_query("commit;");

Parameters

text_file	The name of the text file to be read in. If `"-"`, then read from `STDIN`. (default: "-")
tabname	The name to give the table in the database (default: `text_file` after the last slash and up to the next dot. E.g., `text_file=="../data/pant_lengths.csv"` gives `tabname=="pant_lengths"`)
has_row_names	Does the lines of data have row names? (default: 0)
has_col_names	Is the top line a list of column names? (default: 1)
field_names	The list of field names, which will be the columns for the table. If `has_col_names==1`, read the names from the file (and just set this to `NULL`). If has_col_names == 1 && field_names !=NULL, I'll use the field names. (default: NULL)
field_ends	If fields have a fixed size, give the end of each field, e.g. `.field_ends=(int[]){3, 8 11}`. (default: `NULL`, indicating not fixed width)
field_params	There is an implicit `create table` in setting up the database. If you want to add a type, constraint, or key, put that here. The relevant part of the input apop_data set is the `text` grid, which should be $N \times 2$ . The first item in each row (`your_params->text[n][0]`, for each ) is a regular expression to match against the variable names; the second item (`your_params->text[n][1]`) is the type, constraint, and/or key (i.e., what comes after the name in the `create` query). Not all variables need be mentioned; the default type if nothing matches is `numeric`. I go in order until I find a regex that matches the given field, so if you don't like the default, then set the last row to have name `.*`, which is a regex guaranteed to match anything that wasn't matched by an earlier row, and then set the associated type to your preferred default. See apop_regex on details of matching. (default: NULL)
table_params	There is an implicit `create table` in setting up the database. If you want to add a table constraint or key, such as `not null primary key (age, sex)`, put that here.
delimiters	A string listing the characters that delimit fields. default = `"\|,\t"`
if_table_exists	What should I do if the table exists? `'n'` Do nothing; exit this function. (default) `'d'` Retain the table but delete all data; refill with the new data (i.e., call `"delete * from your_table"`). `'o'` Overwrite the table from scratch; deleting the previous table entirely. `'a'` Append new data to the existing table.

Returns: Returns the number of rows on success, -1 on error.

This function uses the Designated initializers syntax for inputs.

apop_data * apop_text_unique_elements	(	const apop_data *	d,
		size_t	col
	)

Give me a column of text, and I'll give you a sorted list of the unique elements. This is basically running select distinct * from datacolumn, but without the aid of the database.

Parameters

d	An apop_data set with a text component
col	The text column you want me to use.

Returns: An apop_data set with a single sorted column of text, where each unique text input appears once.

See also: apop_vector_unique_elements

apop_model * apop_update	(	apop_data *	data,
		apop_model *	prior,
		apop_model *	likelihood,
		gsl_rng *	rng
	)

Take in a prior and likelihood distribution, and output a posterior distribution.

This function first checks a table of conjugate distributions for the pair you sent in. If the models are listed on the table, then the function returns a corresponding closed-form model with updated parameters.

If the parameters aren't in the table of conjugate, and the prior distribution has a p or log_likelihood element, then use apop_model_metropolis to generate the posterior. If you expect MCMC to run, you may add an apop_mcmc_settings group to your prior to control the details of the search. See also the apop_model_metropolis documentation.

If the prior does not have a p or log_likelihood but does have a draw element, then make draws from the prior and weight them by the p given by the likelihood distribution. This is not a rejection sampling method, so the burnin is ignored.

Parameters

data	The input data, that will be used by the likelihood function (default = `NULL`.)
prior	The prior apop_model. If the system needs to estimate the posterior via MCMC, this needs to have a `log_likelihood` or `p` method. (No default, must not be `NULL`.)
likelihood	The likelihood apop_model. If the system needs to estimate the posterior via MCMC, this needs to have a `log_likelihood` or `p` method (ll preferred). (No default, must not be `NULL`.)
rng	A `gsl_rng`, already initialized (e.g., via apop_rng_alloc). (default: an RNG from apop_rng_get_thread)

Returns: an apop_model struct representing the posterior, with updated parameters.

In all cases, the output is a apop_model that can be used as the input to this function, so you can chain Bayesian updating procedures.
Here are the conjugate distributions currently defined:

Prior	Likelihood	Notes
Beta	Binomial
Beta	Bernoulli
Exponential	Gamma	Gamma likelihood represents the distribution of $\lambda^{-1}$ , not plain $\lambda$
Normal	Normal	Assumes prior with fixed $\sigma$ ; updates distribution for $\mu$
Gamma	Poisson	Uses sum and size of the data

Here is a test function that compares the output via conjugate table and via Metropolis-Hastings sampling:

#include <apop.h>
//For the test suite.
void distances(gsl_vector *v1, gsl_vector *v2, double tol){
    double error = apop_vector_distance(v1, v2, .metric='m');
    double updated_size = apop_vector_sum(v1);
    Apop_stopif(error/updated_size > tol, exit(1), 0, "The error is %g, which is too big.", error/updated_size);
}
int main(){
    double binom_start = 0.6;
    double beta_start_a = 0.3;
    double beta_start_b = 0.5;
    double n = 4000;
    //First, the easy estimation using the conjugate distribution table.
    apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start);
    apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b);
    apop_model *updated = apop_update(.prior= beta, .likelihood=bin);
    //Now estimate via MCMC. 
    //Requires a one-parameter binomial, with n fixed,
    //and a data set of n data points with the right p.
    apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN);
    apop_data *bin_draws = apop_data_falloc((1,2), n*(1-binom_start), n*binom_start);
    bin = apop_model_fix_params(bcopy);
    Apop_settings_add_group(beta, apop_mcmc, .burnin=.2, .periods=1e5);
    apop_model *out_h = apop_update(bin_draws, beta, bin, NULL);
    apop_model *out_beta = apop_estimate(out_h->data, apop_beta);
    //Finally, we can compare the conjugate and Gibbs results:
    distances(updated->parameters->vector, out_beta->parameters->vector, 0.01);
    //The apop_update function used apop_model_metropolis to generate
    //a batch of draws, so the draw method for out_h is apop_model_metropolis_draw.
    //So, here we make more draws using metropolis, and compare the beta
    //distribution that fits to those draws to the beta distribution output above.
    int draws = 1.3e5;
    apop_data *d = apop_model_draws(out_h, draws);
    apop_model *drawn = apop_estimate(d, apop_beta);
    distances(updated->parameters->vector, drawn->parameters->vector, 0.02);
}

The conjugate table is stored using a vtable; see Registering new methods in vtables for details. If you are writing a new vtable entry, the typedef new functions must conform to and the hash used for lookups are:

1 typedef apop_model *(*apop_update_type)(apop_data *, apop_model , apop_model);

2 #define apop_update_hash(m1, m2) ((size_t)(m1).draw + (size_t)((m2).log_likelihood ? (m2).log_likelihood : (m2).p)*33)

This function uses the Designated initializers syntax for inputs.

void apop_vector_apply	(	gsl_vector *	v,
		void()(double )	fn
	)

Apply a function to every row of a matrix. The function that you input takes in a double* and may modify the input value in place. This function will send a pointer to each element of your vector to your function.

Parameters

v	The input vector
fn	A function of the form `void fn(double in)`

If the vector is NULL, this is a no-op.
See the map/apply page for details.
See also
apop_map

int apop_vector_bounded	(	const gsl_vector *	in,
		long double	max
	)

Test that all elements of a vector are within bounds, so you can preempt a procedure that is about to break on infinite or too-large values.

Parameters

in	A `gsl_vector`
max	An upper and lower bound to the elements of the vector. (default: INFINITY)

Returns: 1 if everything is bounded: not Inf, -Inf, or NaN, and $-\max < x < \max$ ;
0 otherwise.

A NULL vector has no unbounded elements, so NULL input returns 1. You get a warning if apop_opts.verbosity >=2.
This function uses the Designated initializers syntax for inputs.

gsl_vector * apop_vector_copy ( const gsl_vector * in )

Copy one gsl_vector to another. That is, all data is duplicated. Unlike gsl_vector_memcpy, this function allocates and returns the destination, so you can use it like this:

1 gsl_vector *a_copy = apop_vector_copy(original);

Parameters

in	The input vector

Returns: A structure that this function will allocate and fill. If gsl_vector_alloc fails, returns NULL and print a warning.

double apop_vector_correlation	(	const gsl_vector *	ina,
		const gsl_vector *	inb,
		const gsl_vector *	weights
	)

Returns the correlation coefficient of two vectors: ${\hbox{cov}(a,b)\over \sqrt{\hbox{var}(a)} \sqrt{\hbox{var}(b)}}.$

An example

 gsl_matrix *m = apop_text_to_data("indata")->matrix;
 printf("The correlation coefficient between rows two "
        "and three is %g.\n", apop_vector_correlation(Apop_mrv(m, 2), Apop_mrv(m, 3)));

Parameters

ina,inb	Two vectors of equal length (no default, must not be NULL)
weights	Replicate weights for the observations. (default: equal weights for all observations)

This function uses the Designated initializers syntax for inputs.

double apop_vector_cov	(	const gsl_vector *	v1,
		const gsl_vector *	v2,
		const gsl_vector *	weights
	)

Find the sample covariance of a pair of vectors, with an optional weighting. This only makes sense if the weightings are identical, so the function takes only one weighting vector for both.

Parameters

v1,v2	The data vectors (no default; must not be `NULL`)
weights	The weight vector. (default equal weights for all elements)

Returns: The sample covariance

This function uses the Designated initializers syntax for inputs.

double apop_vector_distance	(	const gsl_vector *	ina,
		const gsl_vector *	inb,
		const char	metric,
		const double	norm
	)

Returns the distance between two vectors, where distance is defined based on the third (optional) parameter:

'e' (the default): scalar distance (standard Euclidean metric) between two vectors. $\sqrt{\sum_i{(a_i - b_i)^2}},$ where iterates over dimensions.
'm' Returns the Manhattan metric distance between two vectors: $\sum_i{|a_i - b_i|},$ where iterates over dimensions.
'd' The discrete norm: if , return zero, else return one.
's' The sup norm: find the dimension where is largest, return the distance along that one dimension.
'l' or 'L' The norm, $\left(\sum_i{|a_i - b_i|^2}\right)^{1/p}$ . The value of is set by the fourth (optional) argument.

Parameters

ina	First vector (No default, must not be `NULL`)
inb	Second vector (Default = zero)
metric	The type of metric, as above.
norm	If you are using an norm, this is . Must be strictly greater than zero. (default = 2)

The defaults are such that
1 apop_vector_distance(v);
2 apop_vector_distance(v, .metric = 's');
3 apop_vector_distance(v, .metric = 'm');
gives you the standard Euclidean length of v, its longest element, and its sum.
This function uses the Designated initializers syntax for inputs.

#include <apop.h>
/* Test distance calculations using a 3-4-5 triangle */
int main(){
    gsl_vector *v1 = gsl_vector_alloc(2);
    gsl_vector *v2 = gsl_vector_alloc(2);
    apop_vector_fill(v1, 2, 2);
    apop_vector_fill(v2, 5, 6);
    assert(apop_vector_distance(v1, v1, 'd') == 0);
    assert(apop_vector_distance(v1, v2, 'd') == 1);
    assert(apop_vector_distance(v1, .metric='m') == 4);
    assert(apop_vector_distance(v2, .metric='s') == 6);
    assert(apop_vector_distance(v1,v2) == 5.); //the hypotenuse of the 3-4-5 triangle
    assert(apop_vector_distance(v1,v2, 'm') == 7.);
    assert(apop_vector_distance(v1,v2, 'L', 2) == 5.);  //L_2 norm == Euclidean
}

long double apop_vector_entropy ( gsl_vector * in )

Given a vector representing a probability distribution of observations, calculate the entropy, $\sum_i -\ln(v_i)v_i$ .

You may input a vector giving frequencies (normalized to sum to one) or counts (arbitrary sum).

The entropy of a data set depends only on the frequency with which elements are observed, not the value of the elements themselves. The apop_data_pmf_compress function will reduce an input apop_data set to one weighted line per observation, and the weights would determine the entropy:

 apop_data *data = apop_text_to_data("indata");
 apop_data_pmf_compress(data);
 data_entropy = apop_vector_entropy(d->weights);

The entropy is calculated using natural logs. To convert to base 2, divide by $\ln(2)$ ; see the example.

The entropy of an empty data set (NULL or a total weight of zero) is zero. Print a warning when given NULL input and apop_opts.verbose >=1.

If the input vector has negative elements, return NaN; print a warning when apop_opts.verbose >= 0.

Sample code:

#include <apop.h>
#define Diff(left, right, eps) Apop_stopif(fabs((left)-(right))>(eps), abort(), 0, "%g is too different from %g (abitrary limit=%g).", (double)(left), (double)(right), eps)
long double entropy_base_2(gsl_vector *x) {
    return apop_vector_entropy(x)/log(2);
}
int main(){
    apop_model *flip = apop_model_set_parameters(apop_bernoulli, .5);
    //zero data => entropy zero
    gsl_vector *v = gsl_vector_calloc(1);
    assert(apop_vector_entropy(v) == 0);
    //negative data => NaN
    gsl_vector_set(v, 0, -1);
    int v1 = apop_opts.verbose;
    apop_opts.verbose = -1;
    assert(isnan(apop_vector_entropy(v)));
    apop_opts.verbose = v1;
    //N equiprobable bins => entropy = log(N)
    v = apop_vector_realloc(v, 100);
    gsl_vector_set_all(v, 1./100);
    Diff(log(100), apop_vector_entropy(v), 1e-5);
    //Normalization is optional. You may send a vector of counts.
    gsl_vector_set_all(v, 1);
    Diff(log(100), apop_vector_entropy(v), 1e-5);
    //flip two coins.
    apop_data *coin_flips = apop_model_draws(flip, .count=10000);
    apop_data *c2         = apop_model_draws(flip, .count=10000);
    apop_data_stack(c2, coin_flips, 'c', .inplace='y');
    //entropy of one coin flip in base2 == 1
    apop_data_pmf_compress(coin_flips);
    Diff(entropy_base_2(coin_flips->weights), 1, 1e-3);
    //entropy of two coin flips in base2 == 2
    apop_data_pmf_compress(c2);
    Diff(entropy_base_2(c2->weights), 2, 1e-3);
    //flip three coins, via model cross products
    Diff(entropy_base_2(apop_data_pmf_compress(apop_model_draws(
            apop_model_cross(flip, flip, flip) ,.count=10000))->weights), 3, 1e-3);
    apop_data_free(coin_flips);
    apop_data_free(c2);
    gsl_vector_free(v);
}

void apop_vector_exp ( gsl_vector * v )

Replace every vector element $v_i$ with exp $(v_i)$ .

If the input vector is NULL, do nothing.

double apop_vector_kurtosis ( const gsl_vector * in )

Returns the sample fourth central moment of the data in the given vector. Corrections are made to produce an unbiased result as per Appendix M (PDF) of Modeling with data.

This is an estimate of the fourth central moment without normalization. The kurtosis of a ${\cal N}(0,1)$ is $3 \sigma^4$ , not three, one, or zero.
See also
apop_vector_kurtosis_pop

double apop_vector_kurtosis_pop	(	gsl_vector const *	v,
		gsl_vector const *	weights
	)

Returns the population fourth central moment [ $\sum_i (x_i - \mu)^4/n)$ ] of the data in the given vector, with an optional weighting.

Parameters

v	The data vector
weights	The weight vector. If NULL, assume equal weights.

Returns: The weighted kurtosis.

Some people like to normalize the fourth central moment by dividing by variance squared, or by subtracting three; those things are not done here, so you'll have to do them separately if desired.
This function uses the Designated initializers syntax for inputs.
See also
apop_vector_kurtosis for the unbiased sample version.

void apop_vector_log ( gsl_vector * v )

Replace every vector element $v_i$ with ln $(v_i)$ .

If the input vector is NULL, do nothing.

void apop_vector_log10 ( gsl_vector * v )

Replace every vector element $v_i$ with log $_{10}(v_i)$ .

If the input vector is NULL, do nothing.

gsl_vector * apop_vector_map	(	const gsl_vector *	v,
		double(*)(double)	fn
	)

Map a function onto every element of a vector. Thus function will send each element to the function you provide, and will output a gsl_vector holding your function's output for each row.

Parameters

v	The input vector
fn	A function of the form `double fn(double in)`

Returns: A gsl_vector (allocated by this function) with the corresponding value for each row.

If you input a NULL vector, I return NULL.
See the map/apply page for details.
See also
apop_map, apop_map_sum

double apop_vector_map_sum	(	const gsl_vector *	in,
		double(*)(double)	fn
	)

Returns the sum of the output of apop_vector_map. For example, apop_vector_map_sum(v, isnan) returns the count of elements of v that are NaN.

If you input a NULL vector, I return the sum of zero items: zero.
See the map/apply page for details.
See also
apop_map, apop_map_sum

double apop_vector_mean	(	gsl_vector const *	v,
		gsl_vector const *	weights
	)

Find the mean, weighted or unweighted.

Parameters

v	The data vector
weights	The weight vector. Default: assume equal weights.

Returns

The weighted mean

This function uses the Designated initializers syntax for inputs.

int gsl_vector * apop_vector_moving_average	(	gsl_vector *	v,
		size_t	bandwidth
	)

Return a new vector that is the moving average of the input vector.

Parameters

v	The input vector, unsmoothed
bandwidth	An integer $\geq 1$ giving the number of elements to be averaged to produce one number.

Returns: A smoothed vector of size v->size - (bandwidth/2)*2.

void apop_vector_normalize	(	gsl_vector *	in,
		gsl_vector **	out,
		const char	normalization_type
	)

This function will normalize a vector, either such that it has mean zero and variance one, or ranges between zero and one, or sums to one.

Parameters

in	A `gsl_vector` with the un-normalized data. `NULL` input gives `NULL` output. (No default)
out	If normalizing in place, `NULL`. If not, the address of a `gsl_vector*`. Do not allocate. (default = `NULL`.)
normalization_type	`'p'`: normalized vector will sum to one. E.g., start with a set of observations in bins, end with the percentage of observations in each bin. (the default) `'r'`: normalized vector will range between zero and one. Replace each X with (X-min) / (max - min). `'s'`: normalized vector will have mean zero and (sample) variance one. Replace each X with $(X-\mu) / \sigma$ , where $\sigma$ is the sample standard deviation. `'m'`: normalize to mean zero: Replace each X with $(X-\mu)$

Example

This function uses the Designated initializers syntax for inputs.

double * apop_vector_percentiles	(	gsl_vector *	data,
		char	rounding
	)

Returns an array of size 101, where returned_vector[95] gives the value of the 95th percentile, for example. Returned_vector[100] is always the maximum value, and returned_vector[0] is always the min (regardless of rounding rule).

Parameters

data	A `gsl_vector` with the data. (No default, must not be `NULL`.)
rounding	Either be `'u'`, `'d'`, or `'a'`. Unless your data is exactly a multiple of 101, some percentiles will be ambiguous. If `'u'`, then round up (use the next highest value); if `'d'`, round down to the next lowest value; if `'a'`, take the mean of the two nearest points. (Default = `'d'`.)

If the rounding method is 'u' or 'a', then you can say "5% or more of the sample is below returned_vector[5]"; if 'd' or 'a', then you can say "5% or more of the sample is above returned_vector[5]".
You may eventually want to free() the array returned by this function.
This function uses the Designated initializers syntax for inputs.

void apop_vector_print	(	gsl_vector *	data,
		Output_declares
	)

Print a vector to the screen, a file, a pipe, or the database.

See apop_prep_output for more on how printing settings are set.
For example, the default for apop_opts.output_delimiter is a tab, which puts the vector on one line, but apop_opts.output_type="\n" would print the vector vertically.
See also Legible output for more details and examples.
This function uses the Designated initializers syntax for inputs.

gsl_vector * apop_vector_realloc	(	gsl_vector *	v,
		size_t	newheight
	)

This function will resize a gsl_vector to a new length.

Data in the vector will be retained. If the new height is smaller than the old, then data at the end of the vector will be cropped away (in a non–memory-leaking manner). If the new height is larger than the old, then new cells will be filled with garbage; it is your responsibility to zero out or otherwise fill them before use.

A large number of reallocs can take a noticeable amount of time. You are thus encouraged to make an effort to determine the size of your data and do one allocation, rather than writing for loops that resize a vector at every increment.
The gsl_vector is a versatile struct that can represent subvectors, matrix columns and other cuts from parent data. Resizing a portion of a parent matrix makes no sense, so return NULL and print an error if asked to resize a view.

Parameters

v	The already-allocated vector to resize. If you give me `NULL`, this is equivalent to `gsl_vector_alloc`
newheight	The height you'd like the vector to be.

Returns: v, now resized

double apop_vector_skew ( const gsl_vector * in )

Returns an unbiased estimate of the sample skew of the data in the given vector.

double apop_vector_skew_pop	(	gsl_vector const *	v,
		gsl_vector const *	weights
	)

Returns the population skew $(\sum_i (x_i - \mu)^3/n))$ of the data in the given vector. Observations may be weighted.

Parameters

v	The data vector
weights	The weight vector. Default: equal weights for all observations.

Returns: The weighted skew.

Some people like to normalize the skew by dividing by (variance) $^{3/2}$ ; that's not done here, so you'll have to do so separately if need be.

Apophenia tries to be smart about reading the weights. If weights sum to one, then the system uses w->size as the number of elements, and returns the usual sum over . If weights > 1, then the system uses the total weights as . Thus, you can use the weights as standard weightings or to represent elements that appear repeatedly.

gsl_vector * apop_vector_stack	(	gsl_vector *	v1,
		gsl_vector const *	v2,
		char	inplace
	)

Put the first vector on top of the second vector.

Parameters

v1	the upper vector (default=`NULL`, in which case this copies `v2`)
v2	the second vector (default=`NULL`, in which case nothing is added)
inplace	If `'y'`, use apop_vector_realloc to modify `v1` in place; see the caveats on that function. Otherwise, allocate a new vector, leaving `v1` undisturbed. (default=`'n'`)

Returns: the stacked data, either in a new vector or a pointer to v1.

This function uses the Designated initializers syntax for inputs.

long double apop_vector_sum ( const gsl_vector * in )

Returns the sum of the data in the given vector.

gsl_matrix * apop_vector_to_matrix	(	const gsl_vector *	in,
		char	row_col
	)

This function copies the data in a vector to a new one-column (or one-row) matrix and returns the newly-allocated and filled matrix.

For the reverse, try apop_data_pack.

Parameters

in	a `gsl_vector` (No default. If `NULL`, I return `NULL`, with a warning if `apop_opts.verbose >=1` )
row_col	If `'r'`, then this will be a row (1 x N) instead of the default, a column (N x 1). (default: `'c'`)

Returns: a newly-allocated gsl_matrix with one column (or row).

If you send in a NULL vector, you get a NULL pointer in return. I warn you of this if apop_opts.verbosity >=2 .
If gsl_matrix_alloc fails you get a NULL pointer in return.
This function uses the Designated initializers syntax for inputs.

gsl_vector * apop_vector_unique_elements ( const gsl_vector * v )

Give me a vector of numbers, and I'll give you a sorted list of the unique elements. This is basically running select distinct datacol from data order by datacol, but without the aid of the database.

Parameters

v	a vector of items

Returns

a sorted vector of the distinct elements that appear in the input.

NaNs (if any) appear at the end of the sort order.

See also: apop_text_unique_elements

double apop_vector_var	(	gsl_vector const *	v,
		gsl_vector const *	weights
	)

Find the sample variance of a vector, weighted or unweighted.

Parameters

v	The data vector
weights	The weight vector. If NULL (the default), assume equal weights.

Returns: The weighted sample variance.

This uses (n-1) in the denominator of the sum; i.e., it corrects for the bias introduced by using $\bar x$ instead of $\mu$ .
Multiply the output by (n-1)/n if you need population variance.
Apophenia tries to be smart about reading the weights. If weights sum to one, then the system uses w->size as the number of elements, and returns the usual sum over . If weights > 1, then the system uses the total weights as . Thus, you can use the weights as standard weightings or to represent elements that appear repeatedly.
This function uses the Designated initializers syntax for inputs.
See also
apop_vector_var_m for the case where you already have the vector's mean.

double apop_vector_var_m	(	const gsl_vector *	in,
		const double	mean
	)

Returns the variance of the data in the given vector, given that you've already calculated the mean.

Parameters

in	the vector in question
mean	the mean, which you've already calculated using apop_vector_mean.

See also: apop_vector_var

Variable Documentation

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

apop_opts_type apop_opts

Here are where the options are initially set. See the apop_opts_type documentation for details.

strings	An apop_data set with a grid of text to be combined into a single string
between	The text to put in between the rows of the table, such as ", ". (Default is a single space: " ")
before	The text to put at the head of the string. For the query example, this would be `.before="select "`. (Default: NULL)
after	The text to put at the tail of the string. For the query example, `.after=" from data_table"`. (Default: NULL)
between_cols	The text to insert between columns of text. See below for an example (Default is set to equal `.between`)
prune	If you don't want to use the entire text set, you can provide a function to indicate which elements should be pruned out. Some examples: 1 //Just use column 3 2 int is_not_col_3(apop_data indata, int row, int col, void ignore){ 3 return col!=3; 4 } 5 6 //Jump over blanks as if they don't exist. 7 int is_blank(apop_data indata, int row, int col, void ignore){ 8 return strlen(indata->text[row][col])==0; 9 }
prune_parameter	A void pointer to pass to your `prune` function.

Apophenia

Data Structures

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

Function Documentation

Variable Documentation