geoip.c File Reference

Functions related to maintaining an IP-to-country database and to summarizing client connections by country. More...

#include "or.h"
#include "ht.h"

Data Structures

struct  geoip_entry_t
struct  geoip_country_t
struct  clientmap_entry_t
struct  c_hist_t
struct  dirreq_map_entry_t

Defines

#define GEOIP_PRIVATE
#define REQUEST_HIST_LEN   1
#define REQUEST_HIST_PERIOD   (24*60*60)
#define ACTION_MASK   3
#define REQUEST_SHARE_INTERVAL   (15 * 60)
#define MIN_IPS_TO_NOTE_COUNTRY   1
#define MIN_IPS_TO_NOTE_ANYTHING   1
#define IP_GRANULARITY   8
#define DIRREQ_TIMEOUT   (10*60)
#define DIR_REQ_GRANULARITY   4
#define MIN_DIR_REQ_RESPONSES   16
#define GEOIP_MIN_OBSERVATION_TIME   (12*60*60)
#define RESPONSE_GRANULARITY   8

Functions

static void clear_geoip_db (void)
country_t geoip_get_country (const char *country)
static void geoip_add_entry (uint32_t low, uint32_t high, const char *country)
int geoip_parse_entry (const char *line)
static int _geoip_compare_entries (const void **_a, const void **_b)
static int _geoip_compare_key_to_entry (const void *_key, const void **_member)
int should_record_bridge_info (or_options_t *options)
int geoip_load_file (const char *filename, or_options_t *options)
int geoip_get_country_by_ip (uint32_t ipaddr)
int geoip_get_n_countries (void)
const char * geoip_get_country_name (country_t num)
int geoip_is_loaded (void)
static HT_HEAD (clientmap, clientmap_entry_t)
static INLINE int clientmap_entries_eq (const clientmap_entry_t *a, const clientmap_entry_t *b)
 HT_PROTOTYPE (clientmap, clientmap_entry_t, node, clientmap_entry_hash, clientmap_entries_eq)
 HT_GENERATE (clientmap, clientmap_entry_t, node, clientmap_entry_hash, clientmap_entries_eq, 0.6, malloc, realloc, free)
static void geoip_determine_shares (time_t now)
static int geoip_get_mean_shares (time_t now, double *v2_share_out, double *v3_share_out)
static void rotate_request_period (void)
void geoip_note_client_seen (geoip_client_action_t action, uint32_t addr, time_t now)
static int _remove_old_client_helper (struct clientmap_entry_t *ent, void *_cutoff)
void geoip_remove_old_clients (time_t cutoff)
void geoip_note_ns_response (geoip_client_action_t action, geoip_ns_response_t response)
time_t geoip_get_history_start (void)
static int _c_hist_compare (const void **_a, const void **_b)
static HT_HEAD (dirreqmap, dirreq_map_entry_t)
static unsigned dirreq_map_ent_hash (const dirreq_map_entry_t *entry)
 HT_PROTOTYPE (dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash, dirreq_map_ent_eq)
 HT_GENERATE (dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash, dirreq_map_ent_eq, 0.6, malloc, realloc, free)
static void _dirreq_map_put (dirreq_map_entry_t *entry, dirreq_type_t type, uint64_t dirreq_id)
static dirreq_map_entry_t_dirreq_map_get (dirreq_type_t type, uint64_t dirreq_id)
void geoip_start_dirreq (uint64_t dirreq_id, size_t response_size, geoip_client_action_t action, dirreq_type_t type)
void geoip_change_dirreq_state (uint64_t dirreq_id, dirreq_type_t type, dirreq_state_t new_state)
static char * geoip_get_dirreq_history (geoip_client_action_t action, dirreq_type_t type)
static char * geoip_get_client_history (time_t now, geoip_client_action_t action, int min_observation_time, unsigned granularity)
char * geoip_get_client_history_dirreq (time_t now, geoip_client_action_t action)
char * geoip_get_client_history_bridge (time_t now, geoip_client_action_t action)
char * geoip_get_request_history (time_t now, geoip_client_action_t action)
void geoip_dirreq_stats_init (time_t now)
void geoip_dirreq_stats_write (time_t now)
void geoip_bridge_stats_init (time_t now)
static char * parse_bridge_stats_controller (const char *stats_str, time_t now)
time_t geoip_bridge_stats_write (time_t now)
static void load_bridge_stats (time_t now)
const char * geoip_get_bridge_stats_extrainfo (time_t now)
const char * geoip_get_bridge_stats_controller (time_t now)
void geoip_entry_stats_init (time_t now)
void geoip_entry_stats_write (time_t now)
int getinfo_helper_geoip (control_connection_t *control_conn, const char *question, char **answer)
void geoip_free_all (void)

Variables

static smartlist_tgeoip_countries = NULL
static strmap_t * country_idxplus1_by_lc_code = NULL
static smartlist_tgeoip_entries = NULL
static time_t last_time_determined_shares = 0
static double v2_share_times_seconds
static double v3_share_times_seconds
static int share_seconds
static uint32_t ns_v2_responses [GEOIP_NS_RESPONSE_NUM]
static uint32_t ns_v3_responses [GEOIP_NS_RESPONSE_NUM]
static time_t start_of_dirreq_stats_interval
static time_t start_of_bridge_stats_interval
static char * bridge_stats_extrainfo = NULL
static char * bridge_stats_controller = NULL
static time_t start_of_entry_stats_interval


Detailed Description

Functions related to maintaining an IP-to-country database and to summarizing client connections by country.


Define Documentation

#define DIRREQ_TIMEOUT   (10*60)

When there are incomplete directory requests at the end of a 24-hour period, consider those requests running for longer than this timeout as failed, the others as still running.

Referenced by geoip_get_dirreq_history().

#define GEOIP_MIN_OBSERVATION_TIME   (12*60*60)

How long do we have to have observed per-country request history before we are willing to talk about it?

Referenced by geoip_get_client_history_bridge(), and geoip_get_request_history().

#define IP_GRANULARITY   8

When reporting geoip data about countries, round up to the nearest multiple of this value.

Referenced by geoip_get_client_history_bridge(), and geoip_get_request_history().

#define MIN_IPS_TO_NOTE_ANYTHING   1

Do not report any geoip data at all if we have fewer than this number of IPs to report about.

Referenced by geoip_get_client_history().

#define MIN_IPS_TO_NOTE_COUNTRY   1

Do not mention any country from which fewer than this number of IPs have connected. This conceivably avoids reporting information that could deanonymize users, though analysis is lacking.

Referenced by geoip_get_client_history().

#define REQUEST_HIST_LEN   1

For how many periods should we remember per-country request history?

Referenced by geoip_get_request_history(), and geoip_note_client_seen().

#define REQUEST_HIST_PERIOD   (24*60*60)

How long are the periods for which we should remember request history?

Referenced by geoip_note_client_seen().

#define REQUEST_SHARE_INTERVAL   (15 * 60)

How often do we update our estimate which share of v2 and v3 directory requests is sent to us? We could as well trigger updates of shares from network status updates, but that means adding a lot of calls into code that is independent from geoip stats (and keeping them up-to-date). We are perfectly fine with an approximation of 15-minute granularity.

Referenced by geoip_note_client_seen().


Function Documentation

static int _c_hist_compare ( const void **  _a,
const void **  _b 
) [static]

Sorting helper: return -1, 1, or 0 based on comparison of two geoip_entry_t. Sort in descending order of total, and then by country code.

References c_hist_t::country, and c_hist_t::total.

Referenced by geoip_get_client_history(), and geoip_get_request_history().

static dirreq_map_entry_t* _dirreq_map_get ( dirreq_type_t  type,
uint64_t  dirreq_id 
) [static]

Helper: Look up and return an entry in the map of directory requests using tunneled and dirreq_id as key parts. If there is no such entry, return NULL.

Referenced by geoip_change_dirreq_state().

static void _dirreq_map_put ( dirreq_map_entry_t entry,
dirreq_type_t  type,
uint64_t  dirreq_id 
) [static]

Helper: Put entry into map of directory requests using tunneled and dirreq_id as key parts. If there is already an entry for that key, print out a BUG warning and return.

References LD_BUG, and tor_assert.

Referenced by geoip_start_dirreq().

static int _geoip_compare_entries ( const void **  _a,
const void **  _b 
) [static]

Sorting helper: return -1, 1, or 0 based on comparison of two geoip_entry_t

References geoip_entry_t::ip_low.

Referenced by geoip_load_file().

static int _geoip_compare_key_to_entry ( const void *  _key,
const void **  _member 
) [static]

bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer to a uint32_t in host order) to a geoip_entry_t

References geoip_entry_t::ip_high.

Referenced by geoip_get_country_by_ip().

static int _remove_old_client_helper ( struct clientmap_entry_t ent,
void *  _cutoff 
) [static]

HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's older than a certain time.

References tor_free.

Referenced by geoip_remove_old_clients().

static void clear_geoip_db ( void   )  [static]

Release all storage held by the GeoIP database.

References country_idxplus1_by_lc_code, smartlist_free(), strmap_free(), and tor_free.

Referenced by geoip_free_all(), and geoip_load_file().

static INLINE int clientmap_entries_eq ( const clientmap_entry_t a,
const clientmap_entry_t b 
) [static]

Hashtable helper: compare two clientmap_entry_t values for equality.

static void geoip_add_entry ( uint32_t  low,
uint32_t  high,
const char *  country 
) [static]

Add an entry to the GeoIP table, mapping all IPs between low and high, inclusive, to the 2-letter country code country.

References geoip_entry_t::country, country_idxplus1_by_lc_code, geoip_country_t::countrycode, geoip_entry_t::ip_high, geoip_entry_t::ip_low, smartlist_add(), strmap_get_lc(), strmap_set_lc(), tor_assert, and tor_strlower().

Referenced by geoip_parse_entry().

void geoip_bridge_stats_init ( time_t  now  ) 

Initialize bridge stats.

Referenced by run_scheduled_events().

time_t geoip_bridge_stats_write ( time_t  now  ) 

void geoip_change_dirreq_state ( uint64_t  dirreq_id,
dirreq_type_t  type,
dirreq_state_t  new_state 
)

Change the state of the either direct or tunneled (see type) directory request with dirreq_id to new_state and possibly mark it as completed. If no entry can be found for the given key parts (e.g., if this is a directory request that we are not measuring, or one that was started in the previous measurement period), or if the state cannot be advanced to new_state, do nothing.

References _dirreq_map_get(), DIRREQ_FLUSHING_DIR_CONN_FINISHED, DIRREQ_IS_FOR_NETWORK_STATUS, DIRREQ_OR_CONN_BUFFER_FLUSHED, get_options(), and tor_gettimeofday().

Referenced by connection_dir_finished_flushing(), connection_handle_write_impl(), connection_or_flush_from_first_active_circuit(), and relay_send_command_from_edge().

static void geoip_determine_shares ( time_t  now  )  [static]

Try to determine which fraction of v2 and v3 directory requests aimed at caches will be sent to us at time now and store that value in order to take a mean value later on.

References last_time_determined_shares, router_get_my_share_of_directory_requests(), share_seconds, v2_share_times_seconds, and v3_share_times_seconds.

Referenced by geoip_get_mean_shares(), and geoip_note_client_seen().

void geoip_dirreq_stats_init ( time_t  now  ) 

Initialize directory request stats.

Referenced by run_scheduled_events().

void geoip_dirreq_stats_write ( time_t  now  ) 

void geoip_entry_stats_init ( time_t  now  ) 

Initialize entry stats.

Referenced by run_scheduled_events().

void geoip_entry_stats_write ( time_t  now  ) 

void geoip_free_all ( void   ) 

Release all storage held in this file.

References clear_geoip_db(), and tor_free.

Referenced by tor_free_all().

const char* geoip_get_bridge_stats_controller ( time_t  now  ) 

Return most recent bridge statistics to be returned to controller clients, or NULL if we don't have recent bridge statistics.

References load_bridge_stats().

Referenced by getinfo_helper_events().

const char* geoip_get_bridge_stats_extrainfo ( time_t  now  ) 

Return most recent bridge statistics for inclusion in extra-info descriptors, or NULL if we don't have recent bridge statistics.

References load_bridge_stats().

Referenced by extrainfo_dump_to_string().

static char* geoip_get_client_history ( time_t  now,
geoip_client_action_t  action,
int  min_observation_time,
unsigned  granularity 
) [static]

char* geoip_get_client_history_bridge ( time_t  now,
geoip_client_action_t  action 
)

Return a newly allocated comma-separated string containing entries for all the countries from which we've seen enough clients connect as a bridge. The entry format is cc=num where num is the number of IPs we've seen connecting from that country, and cc is a lowercased country code. Returns NULL if we don't want to export geoip data yet.

References geoip_get_client_history(), GEOIP_MIN_OBSERVATION_TIME, and IP_GRANULARITY.

Referenced by geoip_bridge_stats_write().

char* geoip_get_client_history_dirreq ( time_t  now,
geoip_client_action_t  action 
)

Return a newly allocated comma-separated string containing entries for all the countries from which we've seen enough clients connect as a directory. The entry format is cc=num where num is the number of IPs we've seen connecting from that country, and cc is a lowercased country code. Returns NULL if we don't want to export geoip data yet.

References DIR_RECORD_USAGE_GRANULARITY, DIR_RECORD_USAGE_MIN_OBSERVATION_TIME, and geoip_get_client_history().

Referenced by geoip_dirreq_stats_write(), and geoip_entry_stats_write().

country_t geoip_get_country ( const char *  country  ) 

Return the index of the country's entry in the GeoIP DB if it is a valid 2-letter country code, otherwise return -1.

References country_idxplus1_by_lc_code, and strmap_get_lc().

Referenced by routerset_refresh_countries().

int geoip_get_country_by_ip ( uint32_t  ipaddr  ) 

Given an IP address in host order, return a number representing the country to which that address belongs, or -1 for unknown. The return value will always be less than geoip_get_n_countries(). To decode it, call geoip_get_country_name().

References _geoip_compare_key_to_entry(), geoip_entry_t::country, and smartlist_bsearch().

Referenced by geoip_get_client_history(), geoip_note_client_seen(), getinfo_helper_geoip(), routerinfo_set_country(), and routerset_contains().

const char* geoip_get_country_name ( country_t  num  ) 

Return the two-letter country code associated with the number num, or "??" for an unknown value.

References geoip_country_t::countrycode.

Referenced by geoip_get_client_history(), and getinfo_helper_geoip().

static char* geoip_get_dirreq_history ( geoip_client_action_t  action,
dirreq_type_t  type 
) [static]

Return a newly allocated comma-separated string containing statistics on network status downloads. The string contains the number of completed requests, timeouts, and still running requests as well as the download times by deciles and quartiles. Return NULL if we have not observed requests for long enough.

References DIRREQ_TIMEOUT, GEOIP_CLIENT_NETWORKSTATUS, GEOIP_CLIENT_NETWORKSTATUS_V2, round_uint32_to_next_multiple_of(), smartlist_add(), smartlist_create(), smartlist_free(), tor_free, tor_gettimeofday(), tor_snprintf(), and tv_mdiff().

Referenced by geoip_dirreq_stats_write().

time_t geoip_get_history_start ( void   ) 

Return the time at which we started recording geoip data.

static int geoip_get_mean_shares ( time_t  now,
double *  v2_share_out,
double *  v3_share_out 
) [static]

Calculate which fraction of v2 and v3 directory requests aimed at caches have been sent to us since the last call of this function up to time now. Set *v2_share_out and *v3_share_out to the fractions of v2 and v3 protocol shares we expect to have seen. Reset counters afterwards. Return 0 on success, -1 on failure (e.g. when zero seconds have passed since the last call).

References geoip_determine_shares(), share_seconds, v2_share_times_seconds, and v3_share_times_seconds.

Referenced by geoip_dirreq_stats_write().

int geoip_get_n_countries ( void   ) 

Return the number of countries recognized by the GeoIP database.

Referenced by geoip_get_client_history(), and routerset_refresh_countries().

char* geoip_get_request_history ( time_t  now,
geoip_client_action_t  action 
)

Return a newly allocated string holding the per-country request history for action in a format suitable for an extra-info document, or NULL on failure.

References _c_hist_compare(), GEOIP_CLIENT_NETWORKSTATUS, GEOIP_CLIENT_NETWORKSTATUS_V2, GEOIP_MIN_OBSERVATION_TIME, IP_GRANULARITY, REQUEST_HIST_LEN, round_to_next_multiple_of(), smartlist_add(), smartlist_create(), smartlist_free(), smartlist_join_strings(), smartlist_sort(), tor_asprintf(), and tor_free.

Referenced by geoip_dirreq_stats_write().

int geoip_is_loaded ( void   ) 

Return true iff we have loaded a GeoIP database.

Referenced by geoip_get_client_history(), getinfo_helper_geoip(), options_act(), and routerset_refresh_countries().

int geoip_load_file ( const char *  filename,
or_options_t options 
)

Clear the GeoIP database and reload it from the file filename. Return 0 on success, -1 on failure.

Recognized line formats are: INTIPLOW,INTIPHIGH,CC and "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME" where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned integers, and CC is a country code.

It also recognizes, and skips over, blank lines and lines that start with '#' (comments).

References _geoip_compare_entries(), clear_geoip_db(), country_idxplus1_by_lc_code, geoip_country_t::countrycode, geoip_parse_entry(), LD_GENERAL, options_need_geoip_info(), refresh_all_country_info(), smartlist_add(), smartlist_create(), smartlist_free(), smartlist_sort(), and tor_free.

Referenced by options_act().

void geoip_note_client_seen ( geoip_client_action_t  action,
uint32_t  addr,
time_t  now 
)

void geoip_note_ns_response ( geoip_client_action_t  action,
geoip_ns_response_t  response 
)

Note that we've rejected a client's request for a v2 or v3 network status, encoded in action for reason reason at time now.

References GEOIP_CLIENT_NETWORKSTATUS, GEOIP_CLIENT_NETWORKSTATUS_V2, get_options(), and tor_assert.

Referenced by directory_handle_command_get().

int geoip_parse_entry ( const char *  line  ) 

Add an entry to the GeoIP table, parsing it from line. The format is as for geoip_load_file().

References country_idxplus1_by_lc_code, escaped(), geoip_add_entry(), LD_GENERAL, and smartlist_create().

Referenced by geoip_load_file().

void geoip_remove_old_clients ( time_t  cutoff  ) 

Forget about all clients that haven't connected since cutoff. If cutoff is in the future, clients won't be added to the history until this time is reached. This is useful to prevent relays that switch to bridges from reporting unbelievable numbers of clients.

References _remove_old_client_helper().

Referenced by geoip_bridge_stats_write(), geoip_dirreq_stats_write(), geoip_entry_stats_write(), geoip_note_client_seen(), and options_act().

void geoip_start_dirreq ( uint64_t  dirreq_id,
size_t  response_size,
geoip_client_action_t  action,
dirreq_type_t  type 
)

Note that an either direct or tunneled (see type) directory request for a network status with unique ID dirreq_id of size response_size and action action (either v2 or v3) has started.

References _dirreq_map_put(), get_options(), and tor_gettimeofday().

Referenced by directory_handle_command_get().

int getinfo_helper_geoip ( control_connection_t control_conn,
const char *  question,
char **  answer 
)

Helper used to implement GETINFO ip-to-country/... controller command.

References geoip_get_country_by_ip(), geoip_get_country_name(), geoip_is_loaded(), strcmpstart(), and tor_inet_aton().

static HT_HEAD ( dirreqmap  ,
dirreq_map_entry_t   
) [static]

Map of all directory requests asking for v2 or v3 network statuses in the current geoip-stats interval. Values are of type *dirreq_map_entry_t.

static HT_HEAD ( clientmap  ,
clientmap_entry_t   
) [static]

Map from client IP address to last time seen. Hashtable helper: compute a hash of a clientmap_entry_t.

static void load_bridge_stats ( time_t  now  )  [static]

Try to load the most recent bridge statistics from disk, unless we have finished a measurement interval lately.

References check_private_dir(), get_datadir_fname, get_datadir_fname2, parse_bridge_stats_controller(), read_file_to_str(), RFTS_IGNORE_MISSING, and tor_free.

Referenced by geoip_get_bridge_stats_controller(), and geoip_get_bridge_stats_extrainfo().

static char* parse_bridge_stats_controller ( const char *  stats_str,
time_t  now 
) [static]

Parse the bridge statistics as they are written to extra-info descriptors for being returned to controller clients. Return the controller string if successful, or NULL otherwise.

References eat_whitespace_no_nl(), find_str_at_start_of_line(), format_iso_time(), parse_iso_time(), tor_asprintf(), tor_assert, and tor_free.

Referenced by geoip_bridge_stats_write(), and load_bridge_stats().

int should_record_bridge_info ( or_options_t options  ) 

Return 1 if we should collect geoip stats on bridge users, and include them in our extrainfo descriptor. Else return 0.

References or_options_t::BridgeRecordUsageByCountry, and or_options_t::BridgeRelay.

Referenced by extrainfo_dump_to_string(), options_act(), and run_scheduled_events().


Variable Documentation

char* bridge_stats_controller = NULL [static]

Most recent bridge statistics formatted to be returned to controller clients.

char* bridge_stats_extrainfo = NULL [static]

Most recent bridge statistics formatted to be written to extra-info descriptors.

strmap_t* country_idxplus1_by_lc_code = NULL [static]

A map from lowercased country codes to their position in geoip_countries. The index is encoded in the pointer, and 1 is added so that NULL can mean not found.

Referenced by clear_geoip_db(), geoip_add_entry(), geoip_get_country(), geoip_load_file(), and geoip_parse_entry().

smartlist_t* geoip_countries = NULL [static]

A list of geoip_country_t

smartlist_t* geoip_entries = NULL [static]

A list of all known geoip_entry_t, sorted by ip_low.

time_t last_time_determined_shares = 0 [static]

When did we last determine which share of v2 and v3 directory requests is sent to us?

Referenced by geoip_determine_shares(), and geoip_note_client_seen().

uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM] [static]

How many responses are we giving to clients requesting v2 network statuses?

uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM] [static]

How many responses are we giving to clients requesting v3 network statuses?

int share_seconds [static]

Number of seconds we are determining v2 and v3 shares.

Referenced by geoip_determine_shares(), and geoip_get_mean_shares().

Start time of bridge stats.

Start time of directory request stats.

Start time of entry stats.

double v2_share_times_seconds [static]

Sum of products of v2 shares times the number of seconds for which we consider these shares as valid.

Referenced by geoip_determine_shares(), and geoip_get_mean_shares().

double v3_share_times_seconds [static]

Sum of products of v3 shares times the number of seconds for which we consider these shares as valid.

Referenced by geoip_determine_shares(), and geoip_get_mean_shares().


Generated on Tue May 25 00:30:44 2010 for tor by  doxygen 1.5.6