diff --git a/.gitignore b/.gitignore index 1819886a78..abacb365ff 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ r2/_builder.egg-info/ r2/_normalized_hot.egg-info/ r2/_sorts.egg-info/ r2/r2/lib/_normalized_hot.c +r2/r2/lib/mr_tools/_mr_tools.c r2/r2/lib/db/_sorts.c r2/r2/lib/sgm.c r2/r2/lib/utils/_utils.c diff --git a/config/cassandra/cassandra.yaml b/config/cassandra/cassandra.yaml new file mode 100644 index 0000000000..356c76e907 --- /dev/null +++ b/config/cassandra/cassandra.yaml @@ -0,0 +1,479 @@ +# Cassandra storage config YAML + +# NOTE: +# See http://wiki.apache.org/cassandra/StorageConfiguration for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. +cluster_name: 'reddit' + +# You should always specify InitialToken when setting up a production +# cluster for the first time, and often when adding capacity later. +# The principle is that each node should be given an equal slice of +# the token ring; see http://wiki.apache.org/cassandra/Operations +# for more details. +# +# If blank, Cassandra will request a token bisecting the range of +# the heaviest-loaded existing node. If there is no load information +# available, such as is the case with a new cluster, it will pick +# a random token, which will lead to hot spots. +initial_token: + +# Set to true to make new [non-seed] nodes automatically migrate data +# to themselves from the pre-existing nodes in the cluster. Defaults +# to false because you can only bootstrap N machines at a time from +# an existing cluster of N, so if you are bringing up a cluster of +# 10 machines with 3 seeds you would have to do it in stages. Leaving +# this off for the initial start simplifies that. +auto_bootstrap: false + +# See http://wiki.apache.org/cassandra/HintedHandoff +hinted_handoff_enabled: true + +# authentication backend, implementing IAuthenticator; used to identify users +authenticator: org.apache.cassandra.auth.AllowAllAuthenticator + +# authorization backend, implementing IAuthority; used to limit access/provide permissions +authority: org.apache.cassandra.auth.AllowAllAuthority + +# The partitioner is responsible for distributing rows (by key) across +# nodes in the cluster. Any IPartitioner may be used, including your +# own as long as it is on the classpath. Out of the box, Cassandra +# provides org.apache.cassandra.dht.RandomPartitioner +# org.apache.cassandra.dht.ByteOrderedPartitioner, +# org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated), +# and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner +# (deprecated). +# +# - RandomPartitioner distributes rows across the cluster evenly by md5. +# When in doubt, this is the best option. +# - ByteOrderedPartitioner orders rows lexically by key bytes. BOP allows +# scanning rows in key order, but the ordering can generate hot spots +# for sequential insertion workloads. +# - OrderPreservingPartitioner is an obsolete form of BOP, that stores +# - keys in a less-efficient format and only works with keys that are +# UTF8-encoded Strings. +# - CollatingOPP colates according to EN,US rules rather than lexical byte +# ordering. Use this as an example if you need custom collation. +# +# See http://wiki.apache.org/cassandra/Operations for more on +# partitioners and token selection. +partitioner: org.apache.cassandra.dht.RandomPartitioner + +# directories where Cassandra should store data on disk. +data_file_directories: + - /cassandra/data + +# commit log +commitlog_directory: /cassandra/commitlog + +# saved caches +saved_caches_directory: /cassandra/saved_caches + +# Size to allow commitlog to grow to before creating a new segment +commitlog_rotation_threshold_in_mb: 128 + +# commitlog_sync may be either "periodic" or "batch." +# When in batch mode, Cassandra won't ack writes until the commit log +# has been fsynced to disk. It will wait up to +# CommitLogSyncBatchWindowInMS milliseconds for other writes, before +# performing the sync. +commitlog_sync: periodic + +# the other option is "timed," where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync_period_in_ms: 10000 + +# Addresses of hosts that are deemed contact points. +# Cassandra nodes use this list of hosts to find each other and learn +# the topology of the ring. You must change this if you are running +# multiple nodes! +seeds: + - pmc01 + - pmc02 + - pmc03 + - pmc04 + - pmc05 + - pmc06 + - pmc07 + - pmc08 + - pmc09 + - pmc10 + - pmc11 + - pmc12 + - pmc13 + - pmc14 + - pmc15 + - pmc16 + - pmc17 + - pmc18 + - pmc19 + - pmc20 + +# Access mode. mmapped i/o is substantially faster, but only practical on +# a 64bit machine (which notably does not include EC2 "small" instances) +# or relatively small datasets. "auto", the safe choice, will enable +# mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only" +# (which may allow you to get part of the benefits of mmap on a 32bit +# machine by mmapping only index files) and "standard". +# (The buffer size settings that follow only apply to standard, +# non-mmapped i/o.) +disk_access_mode: mmap_index_only + +# Unlike most systems, in Cassandra writes are faster than reads, so +# you can afford more of those in parallel. A good rule of thumb is 2 +# concurrent reads per processor core. Increase ConcurrentWrites to +# the number of clients writing at once if you enable CommitLogSync + +# CommitLogSyncDelay. --> +concurrent_reads: 8 +concurrent_writes: 32 + +# This sets the amount of memtable flush writer threads. These will +# be blocked by disk io, and each one will hold a memtable in memory +# while blocked. If you have a large heap and many data directories, +# you can increase this value for better flush performance. +# By default this will be set to the amount of data directories defined. +#memtable_flush_writers: 1 + +# Buffer size to use when performing contiguous column slices. +# Increase this to the size of the column slices you typically perform +sliced_buffer_size_in_kb: 64 + +# TCP port, for commands and data +storage_port: 7000 + +# Address to bind to and tell other Cassandra nodes to connect to. You +# _must_ change this if you want multiple nodes to be able to +# communicate! +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing *if* the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). +# +# Setting this to 0.0.0.0 is always wrong. +listen_address: + +# The address to bind the Thrift RPC service to -- clients connect +# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if +# you want Thrift to listen on all interfaces. +# +# Leaving this blank has the same effect it does for ListenAddress, +# (i.e. it will be based on the configured hostname of the node). +rpc_address: 0.0.0.0 +# port for Thrift to listen for clients on +rpc_port: 9160 + +# enable or disable keepalive on rpc connections +rpc_keepalive: true + +# uncomment to set socket buffer sizes on rpc connections +# rpc_send_buff_size_in_bytes: +# rpc_recv_buff_size_in_bytes: + +# Frame size for thrift (maximum field length). +# 0 disables TFramedTransport in favor of TSocket. This option +# is deprecated; we strongly recommend using Framed mode. +thrift_framed_transport_size_in_mb: 15 + +# The max length of a thrift message, including all fields and +# internal thrift overhead. +thrift_max_message_length_in_mb: 16 + +# Whether or not to take a snapshot before each compaction. Be +# careful using this option, since Cassandra won't clean up the +# snapshots for you. Mostly useful if you're paranoid when there +# is a data format change. +snapshot_before_compaction: false + +# change this to increase the compaction thread's priority. In java, 1 is the +# lowest priority and that is our default. +# compaction_thread_priority: 1 + +# The threshold size in megabytes the binary memtable must grow to, +# before it's submitted for flushing to disk. +binary_memtable_throughput_in_mb: 256 + +# Add column indexes to a row after its contents reach this size. +# Increase if your column values are large, or if you have a very large +# number of columns. The competing causes are, Cassandra has to +# deserialize this much of the row to read a single column, so you want +# it to be small - at least if you do many partial-row reads - but all +# the index data is read for each access, so you don't want to generate +# that wastefully either. +column_index_size_in_kb: 64 + +# Size limit for rows being compacted in memory. Larger rows will spill +# over to disk and use a slower two-pass compaction process. A message +# will be logged specifying the row key. +in_memory_compaction_limit_in_mb: 64 + +# Time to wait for a reply from other nodes before failing the command +rpc_timeout_in_ms: 20000 + +# phi value that must be reached for a host to be marked down. +# most users should never need to adjust this. +phi_convict_threshold: 10 + +# endpoint_snitch -- Set this to a class that implements +# IEndpointSnitch, which will let Cassandra know enough +# about your network topology to route requests efficiently. +# Out of the box, Cassandra provides +# - org.apache.cassandra.locator.SimpleSnitch: +# Treats Strategy order as proximity. This improves cache locality +# when disabling read repair, which can further improve throughput. +# - org.apache.cassandra.locator.RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's +# IP address, respectively +# org.apache.cassandra.locator.PropertyFileSnitch: +# - Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. +endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch + +# dynamic_snitch -- This boolean controls whether the above snitch is +# wrapped with a dynamic snitch, which will monitor read latencies +# and avoid reading from hosts that have slowed (due to compaction, +# for instance) +dynamic_snitch: true +# controls how often to perform the more expensive part of host score +# calculation +dynamic_snitch_update_interval_in_ms: 100 +# controls how often to reset all host scores, allowing a bad host to +# possibly recover +dynamic_snitch_reset_interval_in_ms: 600000 +# if set greater than zero and read_repair_chance is < 1.0, this will allow +# 'pinning' of replicas to hosts in order to increase cache capacity. +# The badness threshold will control how much worse the pinned host has to be +# before the dynamic snitch will prefer other replicas over it. This is +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. +dynamic_snitch_badness_threshold: 0.1 + +# request_scheduler -- Set this to a class that implements +# RequestScheduler, which will schedule incoming client requests +# according to the specific policy. This is useful for multi-tenancy +# with a single Cassandra cluster. +# NOTE: This is specifically for requests from the client and does +# not affect inter node communication. +# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place +# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of +# client requests to a node with a separate queue for each +# request_scheduler_id. The scheduler is further customized by +# request_scheduler_options as described below. +request_scheduler: org.apache.cassandra.scheduler.NoScheduler + +# Scheduler Options vary based on the type of scheduler +# NoScheduler - Has no options +# RoundRobin +# - throttle_limit -- The throttle_limit is the number of in-flight +# requests per client. Requests beyond +# that limit are queued up until +# running requests can complete. +# The value of 80 here is twice the number of +# concurrent_reads + concurrent_writes. +# - default_weight -- default_weight is optional and allows for +# overriding the default which is 1. +# - weights -- Weights are optional and will default to 1 or the +# overridden default_weight. The weight translates into how +# many requests are handled during each turn of the +# RoundRobin, based on the scheduler id. +# +# request_scheduler_options: +# throttle_limit: 80 +# default_weight: 5 +# weights: +# Keyspace1: 1 +# Keyspace2: 5 + +# request_scheduler_id -- An identifer based on which to perform +# the request scheduling. Currently the only valid option is keyspace. +# request_scheduler_id: keyspace + +# The Index Interval determines how large the sampling of row keys +# is for a given SSTable. The larger the sampling, the more effective +# the index is at the cost of space. +index_interval: 128 + +# Keyspaces have ColumnFamilies. (Usually 1 KS per application.) +# ColumnFamilies have Rows. (Dozens of CFs per KS.) +# Rows contain Columns. (Many per CF.) +# Columns contain name:value:timestamp. (Many per Row.) +# +# A KS is most similar to a schema, and a CF is most similar to a relational table. +# +# Keyspaces, ColumnFamilies, and Columns may carry additional +# metadata that change their behavior. These are as follows: +# +# Keyspace required parameters: +# - name: name of the keyspace; "system" is +# reserved for Cassandra Internals. +# - replica_placement_strategy: the class that determines how replicas +# are distributed among nodes. Contains both the class as well as +# configuration information. Must extend AbstractReplicationStrategy. +# Out of the box, Cassandra provides +# * org.apache.cassandra.locator.SimpleStrategy +# * org.apache.cassandra.locator.NetworkTopologyStrategy +# * org.apache.cassandra.locator.OldNetworkTopologyStrategy +# +# SimpleStrategy merely places the first +# replica at the node whose token is closest to the key (as determined +# by the Partitioner), and additional replicas on subsequent nodes +# along the ring in increasing Token order. +# +# With NetworkTopologyStrategy, +# for each datacenter, you can specify how many replicas you want +# on a per-keyspace basis. Replicas are placed on different racks +# within each DC, if possible. This strategy also requires rack aware +# snitch, such as RackInferringSnitch or PropertyFileSnitch. +# An example: +# - name: Keyspace1 +# replica_placement_strategy: org.apache.cassandra.locator.NetworkTopologyStrategy +# strategy_options: +# DC1 : 3 +# DC2 : 2 +# DC3 : 1 +# +# OldNetworkToplogyStrategy [formerly RackAwareStrategy] +# places one replica in each of two datacenters, and the third on a +# different rack in in the first. Additional datacenters are not +# guaranteed to get a replica. Additional replicas after three are placed +# in ring order after the third without regard to rack or datacenter. +# - replication_factor: Number of replicas of each row +# Keyspace optional paramaters: +# - strategy_options: Additional information for the replication strategy. +# - column_families: +# ColumnFamily required parameters: +# - name: name of the ColumnFamily. Must not contain the character "-". +# - compare_with: tells Cassandra how to sort the columns for slicing +# operations. The default is BytesType, which is a straightforward +# lexical comparison of the bytes in each column. Other options are +# AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType, +# and IntegerType (a generic variable-length integer type). +# You can also specify the fully-qualified class name to a class of +# your choice extending org.apache.cassandra.db.marshal.AbstractType. +# +# ColumnFamily optional parameters: +# - keys_cached: specifies the number of keys per sstable whose +# locations we keep in memory in "mostly LRU" order. (JUST the key +# locations, NOT any column values.) Specify a fraction (value less +# than 1) or an absolute number of keys to cache. Defaults to 200000 +# keys. +# - rows_cached: specifies the number of rows whose entire contents we +# cache in memory. Do not use this on ColumnFamilies with large rows, +# or ColumnFamilies with high write:read ratios. Specify a fraction +# (value less than 1) or an absolute number of rows to cache. +# Defaults to 0. (i.e. row caching is off by default) +# - comment: used to attach additional human-readable information about +# the column family to its definition. +# - read_repair_chance: specifies the probability with which read +# repairs should be invoked on non-quorum reads. must be between 0 +# and 1. defaults to 1.0 (always read repair). +# - gc_grace_seconds: specifies the time to wait before garbage +# collecting tombstones (deletion markers). defaults to 864000 (10 +# days). See http://wiki.apache.org/cassandra/DistributedDeletes +# - default_validation_class: specifies a validator class to use for +# validating all the column values in the CF. +# NOTE: +# min_ must be less than max_compaction_threshold! +# - min_compaction_threshold: the minimum number of SSTables needed +# to start a minor compaction. increasing this will cause minor +# compactions to start less frequently and be more intensive. setting +# this to 0 disables minor compactions. defaults to 4. +# - max_compaction_threshold: the maximum number of SSTables allowed +# before a minor compaction is forced. decreasing this will cause +# minor compactions to start more frequently and be less intensive. +# setting this to 0 disables minor compactions. defaults to 32. +# /NOTE +# - row_cache_save_period_in_seconds: number of seconds between saving +# row caches. The row caches can be saved periodically and if one +# exists on startup it will be loaded. +# - key_cache_save_period_in_seconds: number of seconds between saving +# key caches. The key caches can be saved periodically and if one +# exists on startup it will be loaded. +# - memtable_flush_after_mins: The maximum time to leave a dirty table +# unflushed. This should be large enough that it won't cause a flush +# storm of all memtables during periods of inactivity. +# - memtable_throughput_in_mb: The maximum size of the memtable before +# it is flushed. If undefined, 1/8 * heapsize will be used. +# - memtable_operations_in_millions: Number of operations in millions +# before the memtable is flushed. If undefined, throughput / 64 * 0.3 +# will be used. +# - column_metadata: +# Column required parameters: +# - name: binds a validator (and optionally an indexer) to columns +# with this name in any row of the enclosing column family. +# - validator: like cf.compare_with, an AbstractType that checks +# that the value of the column is well-defined. +# Column optional parameters: +# NOTE: +# index_name cannot be set if index_type is not also set! +# - index_name: User-friendly name for the index. +# - index_type: The type of index to be created. Currently only +# KEYS is supported. +# /NOTE +# +# NOTE: +# this keyspace definition is for demonstration purposes only. +# Cassandra will not load these definitions during startup. See +# http://wiki.apache.org/cassandra/FAQ#no_keyspaces for an explanation. +# /NOTE +keyspaces: + - name: reddit + replica_placement_strategy: org.apache.cassandra.locator.RackUnawareStrategy + replication_factor: 3 + column_families: + - column_type: Standard + compare_with: BytesType + name: permacache + row_cache_save_period_in_seconds: 3600 + rows_cached: 100000 + - column_type: Standard + compare_with: BytesType + name: urls + row_cache_save_period_in_seconds: 3600 + rows_cached: 100000 + - column_type: Standard + compare_with: UTF8Type + name: LinkVote + rows_cached: 100000 + - column_type: Standard + compare_with: UTF8Type + name: CommentVote + rows_cached: 100000 + - column_type: Standard + compare_with: UTF8Type + name: Friend + rows_cached: 10000 + - column_type: Standard + compare_with: UTF8Type + name: Save + rows_cached: 100000 + - column_type: Standard + compare_with: UTF8Type + name: Hide + rows_cached: 100000 + - column_type: Standard + compare_with: UTF8Type + name: Click + rows_cached: 100000 + - column_type: Standard + compare_with: UTF8Type + name: VotesByLink + - column_type: Standard + compare_with: UTF8Type + name: VotesByDay + - column_type: Standard + name: FriendsByAccount + - column_type: Standard + compare_with: UTF8Type + name: SavesByAccount + - column_type: Standard + compare_with: UTF8Type + name: CommentSortsCache + row_cache_save_period_in_seconds: 3600 + rows_cached: 200000 diff --git a/config/cassandra/storage-conf.xml b/config/cassandra/storage-conf.xml deleted file mode 100644 index a1189b815d..0000000000 --- a/config/cassandra/storage-conf.xml +++ /dev/null @@ -1,420 +0,0 @@ - - - - - - - - reddit - - - false - - - true - - - - - - - - - - - - - org.apache.cassandra.locator.RackUnawareStrategy - - - 1 - - - org.apache.cassandra.locator.EndPointSnitch - - - - - - - org.apache.cassandra.locator.RackUnawareStrategy - 3 - org.apache.cassandra.locator.EndPointSnitch - - - - - - - - - - - - org.apache.cassandra.locator.RackUnawareStrategy - 3 - org.apache.cassandra.locator.EndPointSnitch - - - - - - org.apache.cassandra.auth.AllowAllAuthenticator - - - org.apache.cassandra.dht.RandomPartitioner - - - - - - /cassandra/commitlog - - /cassandra/data - - - - - pmc01 - pmc02 - pmc03 - pmc04 - pmc05 - pmc06 - pmc07 - pmc08 - pmc09 - pmc10 - pmc11 - pmc12 - - - - - - 30000 - - 10 - - 128 - - - - - - - - 7000 - - - - - 9160 - - false - - - - - - - - mmap_index_only - - - 512 - - - 64 - - - 32 - 8 - - - 64 - - - 128 - - 256 - - 0.3 - - 60 - - - 8 - 32 - - - periodic - - 10000 - - - - - 864000 - diff --git a/r2/example.ini b/r2/example.ini index 342a847149..e80cab8552 100644 --- a/r2/example.ini +++ b/r2/example.ini @@ -32,9 +32,11 @@ MODSECRET = abcdefghijklmnopqrstuvwxyz0123456789 # secret for /prefs/feeds FEEDSECRET = abcdefghijklmnopqrstuvwxyz0123456789 +INDEXTANK_API_URL = + # -- important settings -- # the domain that this app serves itself up as -domain = localhost +domain = reddit.local # if you use www for the old-timey feel, put it here domain_prefix = # the user used for "system" operations and messages @@ -80,6 +82,7 @@ admin_message_acct = reddit # data cache (used for caching Thing objects) num_mc_clients = 5 memcaches = 127.0.0.1:11211 +stalecaches = # render caches (the second is "remote" and the local is optional but in the same format) local_rendercache = rendercaches = 127.0.0.1:11211 @@ -94,7 +97,7 @@ permacache_memcaches = 127.0.0.1:11211 cassandra_seeds = 127.0.0.1:9160 # read/write consistency levels for Cassandra cassandra_rcl = ONE -cassandra_wcl = QUORUM +cassandra_wcl = ONE # -- url cache options -- url_caches = 127.0.0.1:11211 @@ -117,6 +120,8 @@ adtracker_url = /static/pixel.png adframetracker_url = http://pixel.reddit.com/pixel/of_defenestration.png # open redirector to bounce clicks off of on sponsored links for tracking clicktracker_url = /static/pixel.png +# new pixel +newtracker_url = # amqp amqp_host = localhost:5672 @@ -128,17 +133,23 @@ amqp_virtual_host = / # list of all databases named in the subsequent table databases = main, comment, vote, email, authorize, award, hc -#db name db host user, pass -main_db = reddit, 127.0.0.1, reddit, password -comment_db = reddit, 127.0.0.1, reddit, password -comment2_db = reddit, 127.0.0.1, reddit, password -vote_db = reddit, 127.0.0.1, reddit, password -email_db = reddit, 127.0.0.1, reddit, password -authorize_db = reddit, 127.0.0.1, reddit, password -award_db = reddit, 127.0.0.1, reddit, password -hc_db = reddit, 127.0.0.1, reddit, password +db_user = reddit +db_pass = password +db_port = 5432 +db_pool_size = 3 +db_pool_overflow_size = 3 + +#db name db host user, pass, port, conn, overflow_conn +main_db = reddit, 127.0.0.1, *, *, *, *, * +comment_db = reddit, 127.0.0.1, *, *, *, *, * +comment2_db = reddit, 127.0.0.1, *, *, *, *, * +vote_db = reddit, 127.0.0.1, *, *, *, *, * +email_db = reddit, 127.0.0.1, *, *, *, *, * +authorize_db = reddit, 127.0.0.1, *, *, *, *, * +award_db = reddit, 127.0.0.1, *, *, *, *, * +hc_db = reddit, 127.0.0.1, *, *, *, *, * -hardcache_categories = *:hc +hardcache_categories = *:hc:hc # this setting will prefix all of the table names db_app_name = reddit @@ -197,13 +208,17 @@ tracking_secret = abcdefghijklmnopqrstuvwxyz0123456789 ## -- Self-service sponsored link stuff -- # (secure) payment domain -payment_domain = http://pay.localhost/ -ad_domain = http://localhost +payment_domain = http://reddit.local/ +ad_domain = http://reddit.local +allowed_pay_countries = United States, United Kingdom, Canada sponsors = -# authorize.net credentials + +# authorize.net credentials (blank authorizenetapi to disable) +authorizenetapi = +# authorizenetapi = https://api.authorize.net/xml/v1/request.api authorizenetname = authorizenetkey = -authorizenetapi = https://api.authorize.net/xml/v1/request.api + min_promote_bid = 20 max_promote_bid = 9999 min_promote_future = 2 @@ -227,7 +242,7 @@ authorized_cnames = num_query_queue_workers = 5 query_queue_worker = http://cslowe.local:8000 enable_doquery = True -use_query_cache = False +use_query_cache = True write_query_queue = True # -- stylesheet editor -- @@ -243,6 +258,9 @@ stylesheet_rtl = reddit-rtl.css # location of the static directory static_path = /static/ +# make frontpage 100% dart +frontpage_dart = false + # -- translator UI -- # enable/disable access to the translation UI in /admin/i18n translator = true diff --git a/r2/r2/config/middleware.py b/r2/r2/config/middleware.py index 709e75e53f..ae731d4128 100644 --- a/r2/r2/config/middleware.py +++ b/r2/r2/config/middleware.py @@ -255,7 +255,7 @@ def filter(self, execution_func, prof_arg = None): return [res] class DomainMiddleware(object): - lang_re = re.compile(r"^\w\w(-\w\w)?$") + lang_re = re.compile(r"\A\w\w(-\w\w)?\Z") def __init__(self, app): self.app = app @@ -371,7 +371,7 @@ def __call__(self, environ, start_response): return self.app(environ, start_response) class DomainListingMiddleware(object): - domain_pattern = re.compile(r'^/domain/(([-\w]+\.)+[\w]+)') + domain_pattern = re.compile(r'\A/domain/(([-\w]+\.)+[\w]+)') def __init__(self, app): self.app = app @@ -386,7 +386,7 @@ def __call__(self, environ, start_response): return self.app(environ, start_response) class ExtensionMiddleware(object): - ext_pattern = re.compile(r'\.([^/]+)$') + ext_pattern = re.compile(r'\.([^/]+)\Z') extensions = (('rss' , ('xml', 'text/xml; charset=UTF-8')), ('xml' , ('xml', 'text/xml; charset=UTF-8')), diff --git a/r2/r2/config/rewrites.py b/r2/r2/config/rewrites.py index 7e6dd728f3..3464023b29 100644 --- a/r2/r2/config/rewrites.py +++ b/r2/r2/config/rewrites.py @@ -23,9 +23,9 @@ rewrites = (#these first two rules prevent the .embed rewrite from #breaking other js that should work - ("^/_(.*)", "/_$1"), - ("^/static/(.*\.js)", "/static/$1"), + ("\A/_(.*)", "/_$1"), + ("\A/static/(.*\.js)", "/static/$1"), #This next rewrite makes it so that all the embed stuff works. - ("^(.*)(?' % sn - else: - g.log.error("GOOGLE CHCEKOUT: didn't work") - g.log.error(repr(list(request.POST.iteritems()))) - - - @noresponse(VUser(), VModhash(), thing = VByName('id')) @@ -1770,9 +1573,8 @@ def GET_bookmarklet(self, action, uh, links): @validatedForm(VUser(), - code = VPrintable("code", 30), - postcard_okay = VOneOf("postcard", ("yes", "no")),) - def POST_claimgold(self, form, jquery, code, postcard_okay): + code = VPrintable("code", 30)) + def POST_claimgold(self, form, jquery, code): if not code: c.errors.add(errors.NO_TEXT, field = "code") form.has_errors("code", errors.NO_TEXT) @@ -1802,17 +1604,21 @@ def POST_claimgold(self, form, jquery, code, postcard_okay): if subscr_id: c.user.gold_subscr_id = subscr_id - admintools.engolden(c.user, days) + if code.startswith("cr_"): + c.user.gold_creddits += int(days / 31) + c.user._commit() + form.set_html(".status", _("claimed! now go to someone's userpage and give them a present!")) + else: + admintools.engolden(c.user, days) - g.cache.set("recent-gold-" + c.user.name, True, 600) - form.set_html(".status", _("claimed!")) - jquery(".lounge").show() + g.cache.set("recent-gold-" + c.user.name, True, 600) + form.set_html(".status", _("claimed!")) + jquery(".lounge").show() # Activate any errors we just manually set form.has_errors("code", errors.INVALID_CODE, errors.CLAIMED_CODE, errors.NO_TEXT) - @validatedForm(user = VUserWithEmail('name')) def POST_password(self, form, jquery, user): if form.has_errors('name', errors.USER_DOESNT_EXIST): @@ -1824,7 +1630,7 @@ def POST_password(self, form, jquery, user): form.set_html(".status", _("an email will be sent to that account's address shortly")) - + @validatedForm(cache_evt = VCacheKey('reset', ('key',)), password = VPassword(['passwd', 'passwd2'])) def POST_resetpassword(self, form, jquery, cache_evt, password): @@ -1862,11 +1668,6 @@ def POST_frame(self): c.user._commit() - - @validatedForm() - def POST_new_captcha(self, form, jquery, *a, **kw): - jquery("body").captcha(get_iden()) - @noresponse(VAdmin(), tr = VTranslation("lang"), user = nop('user')) diff --git a/r2/r2/controllers/embed.py b/r2/r2/controllers/embed.py index 6ac0e9701a..589050fa2f 100644 --- a/r2/r2/controllers/embed.py +++ b/r2/r2/controllers/embed.py @@ -23,6 +23,7 @@ from r2.lib.template_helpers import get_domain from r2.lib.pages import Embed, BoringPage, HelpPage from r2.lib.filters import websafe, SC_OFF, SC_ON +from r2.lib.memoize import memoize from pylons.i18n import _ from pylons import c, g, request @@ -30,6 +31,22 @@ from urllib2 import HTTPError +@memoize("renderurl_cached", time=60) +def renderurl_cached(path): + # Needed so http://reddit.com/help/ works + fp = path.rstrip("/") + u = "http://code.reddit.com/wiki" + fp + '?stripped=1' + + g.log.debug("Pulling %s for help" % u) + + try: + return fp, proxyurl(u) + except HTTPError, e: + if e.code != 404: + print "error %s" % e.code + print e.fp.read() + return (None, None) + class EmbedController(RedditController): allow_stylesheets = True @@ -73,20 +90,10 @@ def renderurl(self, override=None): else: path = request.path - # Needed so http://reddit.com/help/ works - fp = path.rstrip("/") - u = "http://code.reddit.com/wiki" + fp + '?stripped=1' - - g.log.debug("Pulling %s for help" % u) - - try: - content = proxyurl(u) - return self.rendercontent(content, fp) - except HTTPError, e: - if e.code != 404: - print "error %s" % e.code - print e.fp.read() - return self.abort404() + fp, content = renderurl_cached(path) + if content is None: + self.abort404() + return self.rendercontent(content, fp) GET_help = POST_help = renderurl diff --git a/r2/r2/controllers/error.py b/r2/r2/controllers/error.py index 02e95835e3..4fc40a9e29 100644 --- a/r2/r2/controllers/error.py +++ b/r2/r2/controllers/error.py @@ -119,7 +119,7 @@ def send404(self): c.response.status_code = 404 if 'usable_error_content' in request.environ: return request.environ['usable_error_content'] - if c.site._spam and not c.user_is_admin: + if c.site.spammy() and not c.user_is_admin: subject = ("the subreddit /r/%s has been incorrectly banned" % c.site.name) lnk = ("/r/redditrequest/submit?url=%s&title=%s" diff --git a/r2/r2/controllers/errors.py b/r2/r2/controllers/errors.py index b01e6efdb2..f77047cfdb 100644 --- a/r2/r2/controllers/errors.py +++ b/r2/r2/controllers/errors.py @@ -56,7 +56,7 @@ ('SUBREDDIT_NOTALLOWED', _("you aren't allowed to post there.")), ('SUBREDDIT_REQUIRED', _('you must specify a reddit')), ('BAD_SR_NAME', _('that name isn\'t going to work')), - ('RATELIMIT', _('you are trying to submit too fast. try again in %(time)s.')), + ('RATELIMIT', _('you are doing that too much. try again in %(time)s.')), ('EXPIRED', _('your session has expired')), ('DRACONIAN', _('you must accept the terms first')), ('BANNED_IP', "IP banned"), diff --git a/r2/r2/controllers/front.py b/r2/r2/controllers/front.py index 33a1026ef8..19e225ccb9 100644 --- a/r2/r2/controllers/front.py +++ b/r2/r2/controllers/front.py @@ -30,6 +30,7 @@ from r2.lib.menus import * from r2.lib.utils import to36, sanitize_url, check_cheating, title_to_url from r2.lib.utils import query_string, UrlParser, link_from_url, link_duplicates +from r2.lib.utils import randstr from r2.lib.template_helpers import get_domain from r2.lib.filters import unsafe from r2.lib.emailer import has_opted_out, Email @@ -37,7 +38,7 @@ from r2.lib.db import queries from r2.lib.strings import strings from r2.lib.solrsearch import RelatedSearchQuery, SubredditSearchQuery -from r2.lib.indextank import IndextankQuery, IndextankException +from r2.lib.indextank import IndextankQuery, IndextankException, InvalidIndextankQuery from r2.lib.contrib.pysolr import SolrError from r2.lib import jsontemplates from r2.lib import sup @@ -45,6 +46,7 @@ from listingcontroller import ListingController from pylons import c, request, request, Response +import string import random as rand import re, socket import time as time_module @@ -251,6 +253,8 @@ def GET_comments(self, article, comment, context, sort, limit, depth): if comment: displayPane.append(PermalinkMessage(article.make_permalink_slow())) + displayPane.append(LinkCommentSep()) + # insert reply box only for logged in user if c.user_is_loggedin and can_comment_link(article) and not is_api(): #no comment box for permalinks @@ -274,7 +278,6 @@ def GET_comments(self, article, comment, context, sort, limit, depth): # Used in template_helpers c.previous_visits = previous_visits - # finally add the comment listing displayPane.append(CommentPane(article, CommentSortMenu.operator(sort), comment, context, num, **kw)) @@ -569,7 +572,7 @@ def GET_search_reddits(self, query, reverse, after, count, num): simple=True).render() return res - verify_langs_regex = re.compile(r"^[a-z][a-z](,[a-z][a-z])*$") + verify_langs_regex = re.compile(r"\A[a-z][a-z](,[a-z][a-z])*\Z") @base_listing @validate(query = nop('q'), sort = VMenu('sort', SearchSortMenu, remember=False), @@ -587,20 +590,37 @@ def GET_search(self, query, num, reverse, after, count, sort, restrict_sr): site = c.site try: - q = IndextankQuery(query, site, sort) - - num, t, spane = self._search(q, num = num, after = after, reverse = reverse, - count = count) + cleanup_message = None + try: + q = IndextankQuery(query, site, sort) + num, t, spane = self._search(q, num=num, after=after, + reverse = reverse, count = count) + except InvalidIndextankQuery: + # delete special characters from the query and run again + special_characters = '+-&|!(){}[]^"~*?:\\' + translation = dict((ord(char), None) + for char in list(special_characters)) + cleaned = query.translate(translation) + + q = IndextankQuery(cleaned, site, sort) + num, t, spane = self._search(q, num=num, after=after, + reverse = reverse, count = count) + cleanup_message = _('I couldn\'t understand your query, ' + + 'so I simplified it and searched for ' + + '"%(clean_query)s" instead.') % { + 'clean_query': cleaned } + res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus = [SearchSortMenu(default=sort)], - search_params = dict(sort = sort), - simple=False, site=c.site, restrict_sr=restrict_sr).render() + search_params = dict(sort = sort), + infotext=cleanup_message, + simple=False, site=c.site, + restrict_sr=restrict_sr).render() return res except (IndextankException, socket.error), e: return self.search_fail(e) - def _search(self, query_obj, num, after, reverse, count=0): """Helper function for interfacing with search. Basically a thin wrapper for SearchBuilder.""" @@ -983,3 +1003,64 @@ def GET_try_compact(self, dest): def GET_thanks(self, secret): """The page to claim reddit gold trophies""" return BoringPage(_("thanks"), content=Thanks(secret)).render() + + @validate(VUser(), + goldtype = VOneOf("goldtype", + ("autorenew", "onetime", "creddits", "gift")), + period = VOneOf("period", ("monthly", "yearly")), + months = VInt("months"), + # variables below are just for gifts + signed = VBoolean("signed"), + recipient_name = VPrintable("recipient", max_length = 50), + giftmessage = VLength("giftmessage", 10000)) + def GET_gold(self, goldtype, period, months, + signed, recipient_name, giftmessage): + start_over = False + recipient = None + if goldtype == "autorenew": + if period is None: + start_over = True + elif goldtype in ("onetime", "creddits"): + if months is None or months < 1: + start_over = True + elif goldtype == "gift": + if months is None or months < 1: + start_over = True + try: + recipient = Account._by_name(recipient_name or "") + except NotFound: + start_over = True + else: + goldtype = "" + start_over = True + + if start_over: + return BoringPage(_("reddit gold"), + show_sidebar = False, + content=Gold(goldtype, period, months, signed, + recipient, recipient_name)).render() + else: + payment_blob = dict(goldtype = goldtype, + account_id = c.user._id, + account_name = c.user.name, + status = "initialized") + + if goldtype == "gift": + payment_blob["signed"] = signed + payment_blob["recipient"] = recipient_name + payment_blob["giftmessage"] = giftmessage + + passthrough = randstr(15) + + g.hardcache.set("payment_blob-" + passthrough, + payment_blob, 86400 * 30) + + g.log.info("just set payment_blob-%s" % passthrough) + + return BoringPage(_("reddit gold"), + show_sidebar = False, + content=GoldPayment(goldtype, period, months, + signed, recipient, + giftmessage, passthrough) + ).render() + diff --git a/r2/r2/controllers/ipn.py b/r2/r2/controllers/ipn.py new file mode 100644 index 0000000000..cac5306c3b --- /dev/null +++ b/r2/r2/controllers/ipn.py @@ -0,0 +1,476 @@ +from xml.dom.minidom import Document +from httplib import HTTPSConnection +from urlparse import urlparse +import base64 + +from pylons.controllers.util import abort +from pylons import c, g, response +from pylons.i18n import _ + +from validator import * +from r2.models import * + +from reddit_base import RedditController + +def get_blob(code): + key = "payment_blob-" + code + with g.make_lock("payment_blob_lock-" + code): + blob = g.hardcache.get(key) + if not blob: + raise NotFound("No payment_blob-" + code) + if blob.get('status', None) != 'initialized': + raise ValueError("payment_blob %s has status = %s" % + (code, blob.get('status', None))) + blob['status'] = "locked" + g.hardcache.set(key, blob, 86400 * 30) + return key, blob + +def dump_parameters(parameters): + for k, v in parameters.iteritems(): + g.log.info("IPN: %r = %r" % (k, v)) + +def check_payment_status(payment_status): + if payment_status is None: + payment_status = '' + + psl = payment_status.lower() + + if psl == 'completed': + return (None, psl) + elif psl == 'refunded': + log_text("refund", "Just got notice of a refund.", "info") + # TODO: something useful when this happens -- and don't + # forget to verify first + return ("Ok", psl) + elif psl == 'pending': + log_text("pending", + "Just got notice of a Pending, whatever that is.", "info") + # TODO: something useful when this happens -- and don't + # forget to verify first + return ("Ok", psl) + elif psl == 'reversed': + log_text("reversal", + "Just got notice of a PayPal reversal.", "info") + # TODO: something useful when this happens -- and don't + # forget to verify first + return ("Ok", psl) + elif psl == 'canceled_reversal': + log_text("canceled_reversal", + "Just got notice of a PayPal 'canceled reversal'.", "info") + return ("Ok", psl) + elif psl == '': + return (None, psl) + else: + raise ValueError("Unknown IPN status: %r" % payment_status) + +def check_txn_type(txn_type, psl): + if txn_type == 'subscr_signup': + return ("Ok", None) + elif txn_type == 'subscr_cancel': + return ("Ok", "cancel") + elif txn_type == 'subscr_eot': + return ("Ok", None) + elif txn_type == 'subscr_failed': + log_text("failed_subscription", + "Just got notice of a failed PayPal resub.", "info") + return ("Ok", None) + elif txn_type == 'subscr_modify': + log_text("modified_subscription", + "Just got notice of a modified PayPal sub.", "info") + return ("Ok", None) + elif txn_type == 'send_money': + return ("Ok", None) + elif txn_type in ('new_case', + 'recurring_payment_suspended_due_to_max_failed_payment'): + return ("Ok", None) + elif txn_type == 'subscr_payment' and psl == 'completed': + return (None, "new") + elif txn_type == 'web_accept' and psl == 'completed': + return (None, None) + else: + raise ValueError("Unknown IPN txn_type / psl %r" % + ((txn_type, psl),)) + + +def verify_ipn(parameters): + paraemeters['cmd'] = '_notify-validate' + try: + safer = dict([k, v.encode('utf-8')] for k, v in parameters.items()) + params = urllib.urlencode(safer) + except UnicodeEncodeError: + g.log.error("problem urlencoding %r" % (parameters,)) + raise + req = urllib2.Request(g.PAYPAL_URL, params) + req.add_header("Content-type", "application/x-www-form-urlencoded") + + response = urllib2.urlopen(req) + status = response.read() + + if status != "VERIFIED": + raise ValueError("Invalid IPN response: %r" % status) + + +def existing_subscription(subscr_id): + account_id = accountid_from_paypalsubscription(subscr_id) + + if account_id is None: + return None + + try: + account = Account._byID(account_id) + except NotFound: + g.log.info("Just got IPN renewal for deleted account #%d" + % account_id) + return "deleted account" + + return account + +def months_and_days_from_pennies(pennies): + if pennies >= 2999: + months = 12 * (pennies / 2999) + days = 366 * (pennies / 2999) + else: + months = pennies / 399 + days = 31 * months + return (months, days) + +def send_gift(buyer, recipient, months, days, signed, giftmessage): + admintools.engolden(recipient, days) + if signed: + sender = buyer.name + md_sender = "[%s](/user/%s)" % (sender, sender) + else: + sender = "someone" + md_sender = "An anonymous redditor" + + create_gift_gold (buyer._id, recipient._id, days, c.start_time, signed) + if months == 1: + amount = "a month" + else: + amount = "%d months" % months + + subject = sender + " just sent you reddit gold!" + message = strings.youve_got_gold % dict(sender=md_sender, amount=amount) + + if giftmessage and giftmessage.strip(): + message += "\n\n" + strings.giftgold_note + giftmessage + + send_system_message(recipient, subject, message) + + g.log.info("%s gifted %s to %s" % (buyer.name, amount, recipient.name)) + +def _google_ordernum_request(ordernums): + d = Document() + n = d.createElement("notification-history-request") + n.setAttribute("xmlns", "http://checkout.google.com/schema/2") + d.appendChild(n) + + on = d.createElement("order-numbers") + n.appendChild(on) + + for num in tup(ordernums): + gon = d.createElement('google-order-number') + gon.appendChild(d.createTextNode("%s" % num)) + on.appendChild(gon) + + return _google_checkout_post(g.GOOGLE_REPORT_URL, d.toxml("UTF-8")) + +def _google_charge_and_ship(ordernum): + d = Document() + n = d.createElement("charge-and-ship-order") + n.setAttribute("xmlns", "http://checkout.google.com/schema/2") + n.setAttribute("google-order-number", ordernum) + + d.appendChild(n) + + return _google_checkout_post(g.GOOGLE_REQUEST_URL, d.toxml("UTF-8")) + + +def _google_checkout_post(url, params): + u = urlparse("%s%s" % (url, g.GOOGLE_ID)) + conn = HTTPSConnection(u.hostname, u.port) + auth = base64.encodestring('%s:%s' % (g.GOOGLE_ID, g.GOOGLE_KEY))[:-1] + headers = {"Authorization": "Basic %s" % auth, + "Content-type": "text/xml; charset=\"UTF-8\""} + + conn.request("POST", u.path, params, headers) + response = conn.getresponse().read() + conn.close() + + return BeautifulStoneSoup(response) + +class IpnController(RedditController): + # Used when buying gold with creddits + @validatedForm(VUser(), + months = VInt("months"), + passthrough = VPrintable("passthrough", max_length=50)) + def POST_spendcreddits(self, form, jquery, months, passthrough): + if months is None or months < 1: + form.set_html(".status", _("nice try.")) + return + + days = months * 31 + + if not passthrough: + raise ValueError("/spendcreddits got no passthrough?") + + blob_key, payment_blob = get_blob(passthrough) + if payment_blob["goldtype"] != "gift": + raise ValueError("/spendcreddits payment_blob %s has goldtype %s" % + (passthrough, payment_blob["goldtype"])) + + signed = payment_blob["signed"] + giftmessage = payment_blob["giftmessage"] + recipient_name = payment_blob["recipient"] + + if payment_blob["account_id"] != c.user._id: + fmt = ("/spendcreddits payment_blob %s has userid %d " + + "but c.user._id is %d") + raise ValueError(fmt % passthrough, + payment_blob["account_id"], + c.user._id) + + try: + recipient = Account._by_name(recipient_name) + except NotFound: + raise ValueError("Invalid username %s in spendcreddits, buyer = %s" + % (recipient_name, c.user.name)) + + if not c.user_is_admin: + if months > c.user.gold_creddits: + raise ValueError("%s is trying to sneak around the creddit check" + % c.user.name) + + c.user.gold_creddits -= months + c.user.gold_creddit_escrow += months + c.user._commit() + + send_gift(c.user, recipient, months, days, signed, giftmessage) + + if not c.user_is_admin: + c.user.gold_creddit_escrow -= months + c.user._commit() + + payment_blob["status"] = "processed" + g.hardcache.set(blob_key, payment_blob, 86400 * 30) + + form.set_html(".status", _("the gold has been delivered!")) + jquery("button").hide() + + @textresponse(full_sn = VLength('serial-number', 100)) + def POST_gcheckout(self, full_sn): + if full_sn: + short_sn = full_sn.split('-')[0] + g.log.error( "GOOGLE CHECKOUT: %s" % short_sn) + trans = _google_ordernum_request(short_sn) + + # get the financial details + auth = trans.find("authorization-amount-notification") + + if not auth: + # see if the payment was declinded + status = trans.findAll('financial-order-state') + if 'PAYMENT_DECLINED' in [x.contents[0] for x in status]: + g.log.error("google declined transaction found: '%s'" % + short_sn) + elif 'REVIEWING' not in [x.contents[0] for x in status]: + g.log.error(("google transaction not found: " + + "'%s', status: %s") + % (short_sn, [x.contents[0] for x in status])) + else: + g.log.error(("google transaction status: " + + "'%s', status: %s") + % (short_sn, [x.contents[0] for x in status])) + elif auth.find("financial-order-state" + ).contents[0] == "CHARGEABLE": + email = str(auth.find("email").contents[0]) + payer_id = str(auth.find('buyer-id').contents[0]) + # get the "secret" + custom = None + cart = trans.find("shopping-cart") + if cart: + for item in cart.findAll("merchant-private-item-data"): + custom = str(item.contents[0]) + break + if custom: + days = None + try: + pennies = int(float(trans.find("order-total" + ).contents[0])*100) + months, days = months_and_days_from_pennies(pennies) + charged = trans.find("charge-amount-notification") + if not charged: + _google_charge_and_ship(short_sn) + + parameters = request.POST.copy() + self.finish(parameters, "g%s" % short_sn, + email, payer_id, None, + custom, pennies, months, days) + except ValueError, e: + g.log.error(e) + else: + raise ValueError("Got no custom blob for %s" % short_sn) + + return (('') % full_sn) + else: + g.log.error("GOOGLE CHCEKOUT: didn't work") + g.log.error(repr(list(request.POST.iteritems()))) + + @textresponse(paypal_secret = VPrintable('secret', 50), + payment_status = VPrintable('payment_status', 20), + txn_id = VPrintable('txn_id', 20), + paying_id = VPrintable('payer_id', 50), + payer_email = VPrintable('payer_email', 250), + mc_currency = VPrintable('mc_currency', 20), + mc_gross = VFloat('mc_gross'), + custom = VPrintable('custom', 50)) + def POST_ipn(self, paypal_secret, payment_status, txn_id, paying_id, + payer_email, mc_currency, mc_gross, custom): + + parameters = request.POST.copy() + + # Make sure it's really PayPal + if paypal_secret != g.PAYPAL_SECRET: + log_text("invalid IPN secret", + "%s guessed the wrong IPN secret" % request.ip, + "warning") + raise ValueError + + # Return early if it's an IPN class we don't care about + response, psl = check_payment_status(payment_status) + if response: + return response + + # Return early if it's a txn_type we don't care about + response, subscription = check_txn_type(parameters['txn_type'], psl) + if subscription is None: + subscr_id = None + elif subscription == "new": + subscr_id = parameters['subscr_id'] + elif subscription == "cancel": + cancel_subscription(parameters['subscr_id']) + else: + raise ValueError("Weird subscription: %r" % subscription) + + if response: + return response + + # Check for the debug flag, and if so, dump the IPN dict + if g.cache.get("ipn-debug"): + g.cache.delete("ipn-debug") + dump_parameters(parameters) + + # More sanity checks... + if False: # TODO: remove this line + verify_ipn(parameters) + + if mc_currency != 'USD': + raise ValueError("Somehow got non-USD IPN %r" % mc_currency) + + if not (txn_id and paying_id and payer_email and mc_gross): + dump_parameters(parameters) + raise ValueError("Got incomplete IPN") + + pennies = int(mc_gross * 100) + months, days = months_and_days_from_pennies(pennies) + + # Special case: autorenewal payment + existing = existing_subscription(subscr_id) + if existing: + if existing != "deleted account": + create_claimed_gold ("P" + txn_id, payer_email, paying_id, + pennies, days, None, existing._id, + c.start_time, subscr_id) + admintools.engolden(existing, days) + + g.log.info("Just applied IPN renewal for %s, %d days" % + (existing.name, days)) + return "Ok" + elif subscr_id: + g.log.warning("IPN subscription %s is not associated with anyone" + % subscr_id) + + # More sanity checks that all non-autorenewals should pass: + + if not custom: + dump_parameters(parameters) + raise ValueError("Got IPN with txn_id=%s and no custom" + % txn_id) + + self.finish(parameters, "P" + txn_id, + payer_email, paying_id, subscr_id, + custom, pennies, months, days) + + def finish(self, parameters, txn_id, + payer_email, paying_id, subscr_id, + custom, pennies, months, days): + + blob_key, payment_blob = get_blob(custom) + + buyer_id = payment_blob.get('account_id', None) + if not buyer_id: + dump_parameters(parameters) + raise ValueError("No buyer_id in IPN/GC with custom='%s'" % custom) + try: + buyer = Account._byID(buyer_id) + except NotFound: + dump_parameters(parameters) + raise ValueError("Invalid buyer_id %d in IPN/GC with custom='%s'" + % (buyer_id, custom)) + + if subscr_id: + buyer.gold_subscr_id = subscr_id + + instagift = False + if payment_blob['goldtype'] in ('autorenew', 'onetime'): + admintools.engolden(buyer, days) + + subject = _("thanks for buying reddit gold!") + + if g.lounge_reddit: + lounge_url = "/r/" + g.lounge_reddit + message = strings.lounge_msg % dict(link=lounge_url) + else: + message = ":)" + elif payment_blob['goldtype'] == 'creddits': + buyer._incr("gold_creddits", months) + buyer._commit() + subject = _("thanks for buying creddits!") + message = _("To spend them, visit [/gold](/gold) or your favorite person's userpage.") + elif payment_blob['goldtype'] == 'gift': + recipient_name = payment_blob.get('recipient', None) + try: + recipient = Account._by_name(recipient_name) + except NotFound: + dump_parameters(parameters) + raise ValueError("Invalid recipient_name %s in IPN/GC with custom='%s'" + % (recipient_name, custom)) + signed = payment_blob.get("signed", False) + giftmessage = payment_blob.get("giftmessage", False) + send_gift(buyer, recipient, months, days, signed, giftmessage) + instagift = True + subject = _("thanks for giving reddit gold!") + message = _("Your gift to %s has been delivered." % recipient.name) + else: + dump_parameters(parameters) + raise ValueError("Got status '%s' in IPN/GC" % payment_blob['status']) + + # Reuse the old "secret" column as a place to record the goldtype + # and "custom", just in case we need to debug it later or something + secret = payment_blob['goldtype'] + "-" + custom + + if instagift: + status="instagift" + else: + status="processed" + + create_claimed_gold(txn_id, payer_email, paying_id, pennies, days, + secret, buyer_id, c.start_time, + subscr_id, status=status) + + send_system_message(buyer, subject, message) + + payment_blob["status"] = "processed" + g.hardcache.set(blob_key, payment_blob, 86400 * 30) diff --git a/r2/r2/controllers/listingcontroller.py b/r2/r2/controllers/listingcontroller.py index 176e501787..181256a37c 100644 --- a/r2/r2/controllers/listingcontroller.py +++ b/r2/r2/controllers/listingcontroller.py @@ -147,6 +147,8 @@ def keep(item): wouldkeep = item.keep_item(item) if getattr(item, "promoted", None) is not None: return False + if item._deleted and not c.user_is_admin: + return False return wouldkeep return keep @@ -398,7 +400,7 @@ def keep(item): @property def menus(self): return [ControversyTimeMenu(default = self.time)] - + def query(self): return c.site.get_links(self.sort, self.time) @@ -453,14 +455,14 @@ def GET_listing(self, links, **env): #class RecommendedController(ListingController): # where = 'recommended' # title_text = _('recommended for you') -# +# # @property # def menus(self): # return [RecSortMenu(default = self.sort)] -# +# # def query(self): # return get_recommended(c.user._id, sort = self.sort) -# +# # @validate(VUser(), # sort = VMenu("controller", RecSortMenu)) # def GET_listing(self, sort, **env): @@ -497,7 +499,8 @@ def keep_fn(self): # keep promotions off of profile pages. def keep(item): wouldkeep = True - if item._deleted: + # TODO: Consider a flag to disable this (and see below plus builder.py) + if item._deleted and not c.user_is_admin: return False if self.time != 'all': wouldkeep = (item._date > utils.timeago('1 %s' % str(self.time))) @@ -618,25 +621,26 @@ def title(self): def keep_fn(self): def keep(item): wouldkeep = item.keep_item(item) - if item._deleted or item._spam: + # TODO: Consider a flag to disable this (and see above plus builder.py) + if (item._deleted or item._spam) and not c.user_is_admin: return False # don't show user their own unread stuff if ((self.where == 'unread' or self.subwhere == 'unread') and (item.author_id == c.user._id or not item.new)): return False + return wouldkeep return keep @staticmethod def builder_wrapper(thing): if isinstance(thing, Comment): - p = thing.make_permalink_slow() f = thing._fullname w = Wrapped(thing) w.render_class = Message w.to_id = c.user._id w.was_comment = True - w.permalink, w._fullname = p, f + w._fullname = f else: w = ListingController.builder_wrapper(thing) diff --git a/r2/r2/controllers/post.py b/r2/r2/controllers/post.py index c061e4d825..4ef8b0f1f7 100644 --- a/r2/r2/controllers/post.py +++ b/r2/r2/controllers/post.py @@ -97,6 +97,7 @@ def POST_unlogged_options(self, all_langs, pref_lang): pref_private_feeds = VBoolean("private_feeds"), pref_show_adbox = VBoolean("show_adbox"), pref_show_sponsors = VBoolean("show_sponsors"), + pref_show_sponsorships = VBoolean("show_sponsorships"), pref_highlight_new_comments = VBoolean("highlight_new_comments"), all_langs = nop('all-langs', default = 'all')) def POST_options(self, all_langs, pref_lang, **kw): @@ -117,6 +118,7 @@ def POST_options(self, all_langs, pref_lang, **kw): if not c.user.gold: kw['pref_show_adbox'] = True kw['pref_show_sponsors'] = True + kw['pref_show_sponsorships'] = True self.set_options(all_langs, pref_lang, **kw) u = UrlParser(c.site.path + "prefs") diff --git a/r2/r2/controllers/promotecontroller.py b/r2/r2/controllers/promotecontroller.py index 45f27a8532..097ca76655 100644 --- a/r2/r2/controllers/promotecontroller.py +++ b/r2/r2/controllers/promotecontroller.py @@ -406,10 +406,10 @@ def POST_rm_traffic_viewer(self, form, jquery, iuser, thing): customer_id = VInt("customer_id", min = 0), pay_id = VInt("account", min = 0), edit = VBoolean("edit"), - address = ValidAddress(["firstName", "lastName", - "company", "address", - "city", "state", "zip", - "country", "phoneNumber"]), + address = ValidAddress( + ["firstName", "lastName", "company", "address", + "city", "state", "zip", "country", "phoneNumber"], + allowed_countries = g.allowed_pay_countries), creditcard = ValidCard(["cardNumber", "expirationDate", "cardCode"])) def POST_update_pay(self, form, jquery, link, indx, customer_id, pay_id, @@ -423,13 +423,18 @@ def POST_update_pay(self, form, jquery, link, indx, customer_id, pay_id, form.has_errors(["cardNumber", "expirationDate", "cardCode"], errors.BAD_CARD)): pass - else: + elif g.authorizenetapi: pay_id = edit_profile(c.user, address, creditcard, pay_id) + else: + pay_id = 1 # if link is in use or finished, don't make a change if pay_id: # valid bid and created or existing bid id. # check if already a transaction - success, reason = promote.auth_campaign(link, indx, c.user, pay_id) + if g.authorizenetapi: + success, reason = promote.auth_campaign(link, indx, c.user, pay_id) + else: + success = True if success: form.redirect(promote.promo_edit_url(link)) else: @@ -449,10 +454,14 @@ def GET_pay(self, article, indx): if indx not in getattr(article, "campaigns", {}): return self.abort404() - data = get_account_info(c.user) - content = PaymentForm(article, indx, - customer_id = data.customerProfileId, - profiles = data.paymentProfiles) + if g.authorizenetapi: + data = get_account_info(c.user) + content = PaymentForm(article, indx, + customer_id = data.customerProfileId, + profiles = data.paymentProfiles) + else: + content = PaymentForm(article, 0, customer_id = 0, + profiles = []) res = LinkInfoPage(link = article, content = content, show_sidebar = False) diff --git a/r2/r2/controllers/reddit_base.py b/r2/r2/controllers/reddit_base.py index 9bf7c2f026..35e6ae3bed 100644 --- a/r2/r2/controllers/reddit_base.py +++ b/r2/r2/controllers/reddit_base.py @@ -50,6 +50,7 @@ from pylons import Response NEVER = 'Thu, 31 Dec 2037 23:59:59 GMT' +DELETE = 'Thu, 01-Jan-1970 00:00:01 GMT' cache_affecting_cookies = ('reddit_first','over18','_options') @@ -240,6 +241,8 @@ def set_subreddit(): sr_name = request.environ.get("subreddit", request.POST.get('r')) domain = request.environ.get("domain") + can_stale = request.method.upper() in ('GET','HEAD') + default_sr = DefaultSR() c.site = default_sr if not sr_name: @@ -256,7 +259,7 @@ def set_subreddit(): srs = set() sr_names = sr_name.split('+') real_path = sr_name - srs = Subreddit._by_name(sr_names).values() + srs = Subreddit._by_name(sr_names, stale=can_stale).values() if len(srs) != len(sr_names): abort(404) elif any(isinstance(sr, FakeSubreddit) @@ -271,7 +274,7 @@ def set_subreddit(): sr_ids = [sr._id for sr in srs] c.site = MultiReddit(sr_ids, real_path) else: - c.site = Subreddit._by_name(sr_name) + c.site = Subreddit._by_name(sr_name, stale=can_stale) except NotFound: sr_name = chksrname(sr_name) if sr_name: @@ -414,7 +417,7 @@ def set_recent_reddits(): def set_colors(): theme_rx = re.compile(r'') - color_rx = re.compile(r'^([a-fA-F0-9]){3}(([a-fA-F0-9]){3})?$') + color_rx = re.compile(r'\A([a-fA-F0-9]){3}(([a-fA-F0-9]){3})?\Z') c.theme = None if color_rx.match(request.get.get('bgcolor') or ''): c.bgcolor = request.get.get('bgcolor') @@ -492,7 +495,7 @@ def request_key(self): c.over18, c.firsttime, c.extension, - c.render_style, + c.render_style, cookies_key) def cached_response(self): @@ -503,7 +506,7 @@ def pre(self): c.start_time = datetime.now(g.tz) g.reset_caches() - c.domain_prefix = request.environ.get("reddit-domain-prefix", + c.domain_prefix = request.environ.get("reddit-domain-prefix", g.domain_prefix) #check if user-agent needs a dose of rate-limiting if not c.error_page: @@ -522,9 +525,9 @@ def pre(self): def try_pagecache(self): #check content cache - if not c.user_is_loggedin: + if request.method.upper() == 'GET' and not c.user_is_loggedin: r = g.rendercache.get(self.request_key()) - if r and request.method == 'GET': + if r: r, c.cookies = r response = c.response response.headers = r.headers @@ -570,7 +573,7 @@ def post(self): #return #set content cache if (g.page_cache_time - and request.method == 'GET' + and request.method.upper() == 'GET' and (not c.user_is_loggedin or c.allow_loggedin_cache) and not c.used_cache and response.status_code != 503 @@ -610,6 +613,12 @@ def post(self): sampling_rate = g.usage_sampling, action = action) + # this thread is probably going to be reused, but it could be + # a while before it is. So we might as well dump the cache in + # the mean time so that we don't have dead objects hanging + # around taking up memory + g.reset_caches() + def abort404(self): abort(404, "not found") @@ -634,7 +643,7 @@ def update_qstring(self, dict): def api_wrapper(self, kw): data = simplejson.dumps(kw) - if request.method == "GET" and request.GET.get("callback"): + if request.method.upper() == "GET" and request.GET.get("callback"): return "%s(%s)" % (websafe_json(request.GET.get("callback")), websafe_json(data)) return self.sendstring(data) @@ -647,10 +656,10 @@ class RedditController(MinimalController): def login(user, admin = False, rem = False): c.cookies[g.login_cookie] = Cookie(value = user.make_cookie(admin = admin), expires = NEVER if rem else None) - + @staticmethod def logout(admin = False): - c.cookies[g.login_cookie] = Cookie(value='') + c.cookies[g.login_cookie] = Cookie(value='', expires=DELETE) def pre(self): c.response_wrappers = [] @@ -699,7 +708,7 @@ def pre(self): if not c.user._loaded: c.user._load() c.modhash = c.user.modhash() - if request.method.lower() == 'get': + if request.method.upper() == 'GET': read_mod_cookie() if hasattr(c.user, 'msgtime') and c.user.msgtime: c.have_messages = c.user.msgtime @@ -739,13 +748,13 @@ def pre(self): # check that the site is available: - if c.site._spam and not c.user_is_admin and not c.error_page: + if c.site.spammy() and not c.user_is_admin and not c.error_page: abort(404, "not found") # check if the user has access to this subreddit if not c.site.can_view(c.user) and not c.error_page: abort(403, "forbidden") - + #check over 18 if (c.site.over_18 and not c.over18 and request.path not in ("/frame", "/over18") diff --git a/r2/r2/controllers/toolbar.py b/r2/r2/controllers/toolbar.py index 304a717abd..1e5ac9f27a 100644 --- a/r2/r2/controllers/toolbar.py +++ b/r2/r2/controllers/toolbar.py @@ -34,11 +34,12 @@ import string # strips /r/foo/, /s/, or both -strip_sr = re.compile('^/r/[a-zA-Z0-9_-]+') -strip_s_path = re.compile('^/s/') -leading_slash = re.compile('^/+') -has_protocol = re.compile('^https?:') -need_insert_slash = re.compile('^https?:/[^/]') +strip_sr = re.compile('\A/r/[a-zA-Z0-9_-]+') +strip_s_path = re.compile('\A/s/') +leading_slash = re.compile('\A/+') +has_protocol = re.compile('\A[a-zA-Z_-]+:') +allowed_protocol = re.compile('\Ahttps?:') +need_insert_slash = re.compile('\Ahttps?:/[^/]') def demangle_url(path): # there's often some URL mangling done by the stack above us, so # let's clean up the URL before looking it up @@ -46,7 +47,10 @@ def demangle_url(path): path = strip_s_path.sub('', path) path = leading_slash.sub("", path) - if not has_protocol.match(path): + if has_protocol.match(path): + if not allowed_protocol.match(path): + return None + else: path = 'http://%s' % path if need_insert_slash.match(path): @@ -203,12 +207,16 @@ def GET_toolbar(self, link, url): if link: res = link[0] elif url: + url = demangle_url(url) + if not url: # also check for validity + return self.abort404() + res = FrameToolbar(link = None, title = None, url = url, expanded = False) else: - self.abort404() + return self.abort404() return spaceCompress(res.render()) @validate(link = VByName('id')) diff --git a/r2/r2/controllers/validator/validator.py b/r2/r2/controllers/validator/validator.py index f950de13c4..f6ccb722bb 100644 --- a/r2/r2/controllers/validator/validator.py +++ b/r2/r2/controllers/validator/validator.py @@ -373,11 +373,11 @@ def run(self, limit): return min(max(i, 1), 100) class VCssMeasure(Validator): - measure = re.compile(r"^\s*[\d\.]+\w{0,3}\s*$") + measure = re.compile(r"\A\s*[\d\.]+\w{0,3}\s*\Z") def run(self, value): return value if value and self.measure.match(value) else '' -subreddit_rx = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_]{2,20}$") +subreddit_rx = re.compile(r"\A[A-Za-z0-9][A-Za-z0-9_]{2,20}\Z") def chksrname(x): #notice the space before reddit.com @@ -391,7 +391,7 @@ def chksrname(x): class VLength(Validator): - only_whitespace = re.compile(r"^\s*$", re.UNICODE) + only_whitespace = re.compile(r"\A\s*\Z", re.UNICODE) def __init__(self, param, max_length, empty_error = errors.NO_TEXT, @@ -514,7 +514,7 @@ def fullname_regex(thing_cls = None, multiple = False): pattern += r"_[0-9a-z]+" if multiple: pattern = r"(%s *,? *)+" % pattern - return re.compile(r"^" + pattern + r"$") + return re.compile(r"\A" + pattern + r"\Z") class VByName(Validator): splitter = re.compile('[ ,]+') @@ -750,7 +750,7 @@ def run(self, sr_name, link_type = None): return sr -pass_rx = re.compile(r"^.{3,20}$") +pass_rx = re.compile(r"\A.{3,20}\Z") def chkpass(x): return x if x and pass_rx.match(x) else None @@ -764,10 +764,14 @@ def run(self, password, verify): else: return password.encode('utf8') -user_rx = re.compile(r"^[\w-]{3,20}$", re.UNICODE) +user_rx = re.compile(r"\A[\w-]{3,20}\Z", re.UNICODE) def chkuser(x): + if x is None: + return None try: + if any(ch.isspace() for ch in x): + return None return str(x) if user_rx.match(x) else None except TypeError: return None @@ -879,6 +883,8 @@ def run(self, name): s = Subreddit._by_name(name.strip('#')) if isinstance(s, FakeSubreddit): raise NotFound, "fake subreddit" + if s._spam: + raise NotFound, "banned community" return s except NotFound: self.set_error(errors.SUBREDDIT_NOEXIST) @@ -977,7 +983,7 @@ class VCssName(Validator): returns a name iff it consists of alphanumeric characters and possibly "-", and is below the length limit. """ - r_css_name = re.compile(r"^[a-zA-Z0-9\-]{1,100}$") + r_css_name = re.compile(r"\A[a-zA-Z0-9\-]{1,100}\Z") def run(self, name): if name and self.r_css_name.match(name): return name @@ -1058,6 +1064,55 @@ def ratelimit(self, rate_user = False, rate_ip = False, prefix = "rate_", to_set['ip' + str(request.ip)] = expire_time g.cache.set_multi(to_set, prefix = prefix, time = seconds) +class VDelay(Validator): + def __init__(self, category, *a, **kw): + self.category = category + Validator.__init__(self, *a, **kw) + + def run (self): + key = "VDelay-%s-%s" % (self.category, request.ip) + prev_violations = g.cache.get(key) + if prev_violations: + time = utils.timeuntil(prev_violations["expire_time"]) + if prev_violations["expire_time"] > datetime.now(g.tz): + self.set_error(errors.RATELIMIT, {'time': time}, + field='vdelay') + + @classmethod + def record_violation(self, category, seconds = None, growfast=False): + if seconds is None: + seconds = g.RATELIMIT*60 + + key = "VDelay-%s-%s" % (category, request.ip) + prev_violations = g.memcache.get(key) + if prev_violations is None: + prev_violations = dict(count=0) + + num_violations = prev_violations["count"] + + if growfast: + multiplier = 3 ** num_violations + else: + multiplier = 1 + + max_duration = 8 * 3600 + duration = min(seconds * multiplier, max_duration) + + expire_time = (datetime.now(g.tz) + + timedelta(seconds = duration)) + + prev_violations["expire_time"] = expire_time + prev_violations["duration"] = duration + prev_violations["count"] += 1 + + with g.make_lock("lock-" + key, timeout=5, verbose=False): + existing = g.memcache.get(key) + if existing and existing["count"] > prev_violations["count"]: + g.log.warning("Tried to set %s to count=%d, but found existing=%d" + % (key, prev_violations["count"], existing["count"])) + else: + g.cache.set(key, prev_violations, max_duration) + class VCommentIDs(Validator): #id_str is a comma separated list of id36's def run(self, id_str): @@ -1182,7 +1237,7 @@ def run(self, emails0): class VCnameDomain(Validator): - domain_re = re.compile(r'^([\w\-_]+\.)+[\w]+$') + domain_re = re.compile(r'\A([\w\-_]+\.)+[\w]+\Z') def run(self, domain): if (domain @@ -1316,8 +1371,8 @@ def run(self, dest): return "/" class ValidAddress(Validator): - def __init__(self, param, usa_only = True): - self.usa_only = usa_only + def __init__(self, param, allowed_countries = ["United States"]): + self.allowed_countries = allowed_countries Validator.__init__(self, param) def set_error(self, msg, field): @@ -1338,20 +1393,18 @@ def run(self, firstName, lastName, company, address, self.set_error(_("please provide your state"), "state") elif not zipCode: self.set_error(_("please provide your zip or post code"), "zip") - elif (not self.usa_only and - (not country or not pycountry.countries.get(alpha2=country))): + elif not country: self.set_error(_("please pick a country"), "country") else: - if self.usa_only: - country = 'United States' - else: - country = pycountry.countries.get(alpha2=country).name + country = pycountry.countries.get(alpha2=country) + if country.name not in self.allowed_countries: + self.set_error(_("Our ToS don't cover your country (yet). Sorry."), "country") return Address(firstName = firstName, lastName = lastName, company = company or "", address = address, city = city, state = state, - zip = zipCode, country = country, + zip = zipCode, country = country.name, phoneNumber = phoneNumber or "") class ValidCard(Validator): @@ -1377,7 +1430,7 @@ def run(self, cardNumber, expirationDate, cardCode): cardCode = cardCode) class VTarget(Validator): - target_re = re.compile("^[\w_-]{3,20}$") + target_re = re.compile("\A[\w_-]{3,20}\Z") def run(self, name): if name and self.target_re.match(name): return name diff --git a/r2/r2/lib/amqp.py b/r2/r2/lib/amqp.py index 8aef88e6cf..5f882376b5 100644 --- a/r2/r2/lib/amqp.py +++ b/r2/r2/lib/amqp.py @@ -181,6 +181,8 @@ def consume_items(queue, callback, verbose=True): single items at a time. This is more efficient than handle_items when the queue is likely to be occasionally empty or if batching the received messages is not necessary.""" + from pylons import c + chan = connection_manager.get_channel() def _callback(msg): @@ -194,6 +196,8 @@ def _callback(msg): print "%s: 1 item %s" % (queue, count_str) g.reset_caches() + c.use_write_db = {} + ret = callback(msg) msg.channel.basic_ack(msg.delivery_tag) sys.stdout.flush() @@ -217,6 +221,7 @@ def handle_items(queue, callback, ack = True, limit = 1, drain = False, """Call callback() on every item in a particular queue. If the connection to the queue is lost, it will die. Intended to be used as a long-running process.""" + from pylons import c chan = connection_manager.get_channel() countdown = None @@ -238,6 +243,7 @@ def handle_items(queue, callback, ack = True, limit = 1, drain = False, countdown = 1 + msg.delivery_info['message_count'] g.reset_caches() + c.use_write_db = {} items = [] diff --git a/r2/r2/lib/app_globals.py b/r2/r2/lib/app_globals.py index 8278fa1d9f..723e085de8 100644 --- a/r2/r2/lib/app_globals.py +++ b/r2/r2/lib/app_globals.py @@ -24,11 +24,11 @@ import pytz, os, logging, sys, socket, re, subprocess, random import signal from datetime import timedelta, datetime -import pycassa +from pycassa.pool import ConnectionPool as PycassaConnectionPool from r2.lib.cache import LocalCache, SelfEmptyingCache -from r2.lib.cache import CMemcache +from r2.lib.cache import CMemcache, StaleCacheChain from r2.lib.cache import HardCache, MemcacheChain, MemcacheChain, HardcacheChain -from r2.lib.cache import CassandraCache, CassandraCacheChain, CacheChain, CL_ONE, CL_QUORUM, CL_ZERO +from r2.lib.cache import CassandraCache, CassandraCacheChain, CacheChain, CL_ONE, CL_QUORUM from r2.lib.utils import thread_dump from r2.lib.db.stats import QueryStats from r2.lib.translation import get_active_langs @@ -37,7 +37,9 @@ class Globals(object): - int_props = ['page_cache_time', + int_props = ['db_pool_size', + 'db_pool_overflow_size', + 'page_cache_time', 'solr_cache_time', 'num_mc_clients', 'MIN_DOWN_LINK', @@ -81,12 +83,13 @@ class Globals(object): 'exception_logging', 'amqp_logging', 'read_only_mode', + 'frontpage_dart', ] - tuple_props = ['memcaches', + tuple_props = ['stalecaches', + 'memcaches', 'permacache_memcaches', 'rendercaches', - 'local_rendercache', 'servicecaches', 'cassandra_seeds', 'admins', @@ -97,13 +100,12 @@ class Globals(object): 'allowed_css_linked_domains', 'authorized_cnames', 'hardcache_categories', - 'proxy_addr'] + 'proxy_addr', + 'allowed_pay_countries'] - choice_props = {'cassandra_rcl': {'ZERO': CL_ZERO, - 'ONE': CL_ONE, + choice_props = {'cassandra_rcl': {'ONE': CL_ONE, 'QUORUM': CL_QUORUM}, - 'cassandra_wcl': {'ZERO': CL_ZERO, - 'ONE': CL_ONE, + 'cassandra_wcl': {'ONE': CL_ONE, 'QUORUM': CL_QUORUM}, } @@ -173,34 +175,33 @@ def __init__(self, global_conf, app_conf, paths, **extra): if not self.cassandra_seeds: raise ValueError("cassandra_seeds not set in the .ini") - self.cassandra_seeds = list(self.cassandra_seeds) - random.shuffle(self.cassandra_seeds) - self.cassandra = pycassa.connect_thread_local(self.cassandra_seeds) + self.cassandra = PycassaConnectionPool('reddit', + server_list = self.cassandra_seeds, + # TODO: .ini setting + timeout=15, max_retries=3, + prefill=False) perma_memcache = (CMemcache(self.permacache_memcaches, num_clients = num_mc_clients) if self.permacache_memcaches else None) - self.permacache = self.init_cass_cache('permacache', 'permacache', - self.cassandra, - self.make_lock, - memcache = perma_memcache, - read_consistency_level = self.cassandra_rcl, - write_consistency_level = self.cassandra_wcl, - localcache_cls = localcache_cls) + self.permacache = CassandraCacheChain(localcache_cls(), + CassandraCache('permacache', + self.cassandra, + read_consistency_level = self.cassandra_rcl, + write_consistency_level = self.cassandra_wcl), + memcache = perma_memcache, + lock_factory = self.make_lock) + self.cache_chains.append(self.permacache) - self.urlcache = self.init_cass_cache('permacache', 'urls', - self.cassandra, - self.make_lock, - # TODO: increase this to QUORUM - # once we switch to live - read_consistency_level = self.cassandra_rcl, - write_consistency_level = CL_ONE, - localcache_cls = localcache_cls) - self.cache_chains.append(self.urlcache) # hardcache is done after the db info is loaded, and then the # chains are reset to use the appropriate initial entries - self.cache = MemcacheChain((localcache_cls(), self.memcache)) + if self.stalecaches: + self.cache = StaleCacheChain(localcache_cls(), + CMemcache(self.stalecaches, num_clients=num_mc_clients), + self.memcache) + else: + self.cache = MemcacheChain((localcache_cls(), self.memcache)) self.cache_chains.append(self.cache) self.rendercache = MemcacheChain((localcache_cls(), @@ -327,6 +328,13 @@ def reset_caches(): self.reddit_host = socket.gethostname() self.reddit_pid = os.getpid() + for arg in sys.argv: + tokens = arg.split("=") + if len(tokens) == 2: + k, v = tokens + self.log.debug("Overriding g.%s to %s" % (k, v)) + setattr(self, k, v) + #the shutdown toggle self.shutdown = False @@ -357,20 +365,6 @@ def reset_caches(): (self.reddit_host, self.reddit_pid, self.short_version, datetime.now())) - def init_cass_cache(self, keyspace, column_family, cassandra_client, - lock_factory, - memcache = None, - read_consistency_level = CL_ONE, - write_consistency_level = CL_ONE, - localcache_cls = LocalCache): - return CassandraCacheChain(localcache_cls(), - CassandraCache(keyspace, column_family, - cassandra_client, - read_consistency_level = read_consistency_level, - write_consistency_level = write_consistency_level), - memcache = memcache, - lock_factory = lock_factory) - @staticmethod def to_bool(x): return (x.lower() == 'true') if x else None @@ -388,7 +382,7 @@ def load_db_params(self, gc): return dbm = db_manager.db_manager() - db_param_names = ('name', 'db_host', 'db_user', 'db_pass', + db_param_names = ('name', 'db_host', 'db_user', 'db_pass', 'db_port', 'pool_size', 'max_overflow') for db_name in self.databases: conf_params = self.to_iter(gc[db_name + '_db']) @@ -397,6 +391,14 @@ def load_db_params(self, gc): params['db_user'] = self.db_user if params['db_pass'] == "*": params['db_pass'] = self.db_pass + if params['db_port'] == "*": + params['db_port'] = self.db_port + + if params['pool_size'] == "*": + params['pool_size'] = self.db_pool_size + if params['max_overflow'] == "*": + params['max_overflow'] = self.db_pool_overflow_size + ip = params['db_host'] ip_loads = get_db_load(self.servicecache, ip) if ip not in ip_loads or ip_loads[ip][0] < 1000: diff --git a/r2/r2/lib/authorize/api.py b/r2/r2/lib/authorize/api.py index f871d10c27..ba2fe27349 100644 --- a/r2/r2/lib/authorize/api.py +++ b/r2/r2/lib/authorize/api.py @@ -227,7 +227,8 @@ def is_error_code(self, res, code): def process_error(self, res): - raise AuthorizeNetException, res + msg = "Response %r from request %r" % (res, self.toXML()) + raise AuthorizeNetException(msg) _autoclose_re = re.compile("<([^/]+)/>") def _autoclose_handler(self, m): diff --git a/r2/r2/lib/base.py b/r2/r2/lib/base.py index fd41c1dc5f..70b8784eca 100644 --- a/r2/r2/lib/base.py +++ b/r2/r2/lib/base.py @@ -30,7 +30,7 @@ from utils import storify, string2js, read_http_date from r2.lib.log import log_exception -import re, md5 +import re, hashlib from urllib import quote import urllib2 import sys @@ -61,7 +61,7 @@ def __call__(self, environ, start_response): if (g.ip_hash and true_client_ip and ip_hash - and md5.new(true_client_ip + g.ip_hash).hexdigest() \ + and hashlib.md5(true_client_ip + g.ip_hash).hexdigest() \ == ip_hash.lower()): request.ip = true_client_ip elif remote_addr in g.proxy_addr and forwarded_for: @@ -133,17 +133,20 @@ def format_output_url(cls, url, **kw): # make sure to pass the port along if not 80 if not kw.has_key('port'): kw['port'] = request.port - + # disentagle the cname (for urls that would have # cnameframe=1 in them) u.mk_cname(**kw) - + # make sure the extensions agree with the current page if c.extension: u.set_extension(c.extension) # unparse and encode it un utf8 - return _force_unicode(u.unparse()).encode('utf8') + rv = _force_unicode(u.unparse()).encode('utf8') + if any(ch.isspace() for ch in rv): + raise ValueError("Space characters in redirect URL: [%r]" % rv) + return rv @classmethod @@ -158,11 +161,11 @@ def intermediate_redirect(cls, form_path): params = dict(dest = cls.format_output_url(request.fullpath)) if c.extension == "widget" and request.GET.get("callback"): params['callback'] = request.GET.get("callback") - + path = add_sr(cls.format_output_url(form_path) + query_string(params)) return cls.redirect(path) - + @classmethod def redirect(cls, dest, code = 302): """ diff --git a/r2/r2/lib/cache.py b/r2/r2/lib/cache.py index 3068160c2a..a127fd365f 100644 --- a/r2/r2/lib/cache.py +++ b/r2/r2/lib/cache.py @@ -27,11 +27,12 @@ import pylibmc from _pylibmc import MemcachedError -import pycassa -import cassandra.ttypes +from pycassa import ColumnFamily +from pycassa.cassandra.ttypes import ConsistencyLevel +from pycassa.cassandra.ttypes import NotFoundException as CassandraNotFound from r2.lib.contrib import memcache -from r2.lib.utils import in_chunks, prefix_keys +from r2.lib.utils import in_chunks, prefix_keys, trace from r2.lib.hardcachebackend import HardCacheBackend from r2.lib.sgm import sgm # get this into our namespace so that it's @@ -51,8 +52,8 @@ def add_multi(self, keys, prefix='', time=0): for k,v in keys.iteritems(): self.add(prefix+str(k), v, time = time) - def get_multi(self, keys, prefix=''): - return prefix_keys(keys, prefix, self.simple_get_multi) + def get_multi(self, keys, prefix='', **kw): + return prefix_keys(keys, prefix, lambda k: self.simple_get_multi(k, **kw)) class PyMemcache(CacheUtils, memcache.Client): """We still use our patched python-memcache to talk to the @@ -382,11 +383,11 @@ def get(self, key, default = None, allow_local = True): return default - def get_multi(self, keys, prefix='', allow_local = True): - l = lambda ks: self.simple_get_multi(ks, allow_local = allow_local) + def get_multi(self, keys, prefix='', allow_local = True, **kw): + l = lambda ks: self.simple_get_multi(ks, allow_local = allow_local, **kw) return prefix_keys(keys, prefix, l) - def simple_get_multi(self, keys, allow_local = True): + def simple_get_multi(self, keys, allow_local = True, stale=None): out = {} need = set(keys) for c in self.caches: @@ -462,15 +463,95 @@ def accrue(self, key, time=0, delta=1): for c in self.caches: c.set(key, auth_value, time=time) + return auth_value + @property def backend(self): # the hardcache is always the last item in a HardCacheChain return self.caches[-1].backend -CL_ZERO = cassandra.ttypes.ConsistencyLevel.ZERO -CL_ONE = cassandra.ttypes.ConsistencyLevel.ONE -CL_QUORUM = cassandra.ttypes.ConsistencyLevel.QUORUM -CL_ALL = cassandra.ttypes.ConsistencyLevel.ALL +class StaleCacheChain(CacheChain): + """A cache chain of two cache chains. When allowed by `stale`, + answers may be returned by a "closer" but potentially older + cache. Probably doesn't play well with NoneResult cacheing""" + staleness = 30 + + def __init__(self, localcache, stalecache, realcache): + self.localcache = localcache + self.stalecache = stalecache + self.realcache = realcache + self.caches = (localcache, realcache) # for the other + # CacheChain machinery + + def get(self, key, default=None, stale = False, **kw): + if kw.get('allow_local', True) and key in self.caches[0]: + return self.caches[0][key] + + if stale: + stale_value = self._getstale([key]).get(key, None) + if stale_value is not None: + return stale_value # never return stale data into the + # LocalCache, or people that didn't + # say they'll take stale data may + # get it + + value = CacheChain.get(self, key, **kw) + if value is None: + return default + + if value is not None and stale: + self.stalecache.set(key, value, time=self.staleness) + + return value + + def simple_get_multi(self, keys, stale = False, **kw): + if not isinstance(keys, set): + keys = set(keys) + + ret = {} + + if kw.get('allow_local'): + for k in list(keys): + if k in self.localcache: + ret[k] = self.localcache[k] + keys.remove(k) + + if keys and stale: + stale_values = self._getstale(keys) + # never put stale data into the localcache + for k, v in stale_values.iteritems(): + ret[k] = v + keys.remove(k) + + if keys: + values = self.realcache.simple_get_multi(keys) + if values and stale: + self.stalecache.set_multi(values, time=self.staleness) + self.localcache.update(values) + ret.update(values) + + return ret + + def _getstale(self, keys): + # this is only in its own function to make tapping it for + # debugging easier + return self.stalecache.simple_get_multi(keys) + + def reset(self): + newcache = self.localcache.__class__() + self.localcache = newcache + self.caches = (newcache,) + self.caches[1:] + if isinstance(self.realcache, CacheChain): + assert isinstance(self.realcache.caches[0], LocalCache) + self.realcache.caches = (newcache,) + self.realcache.caches[1:] + + def __repr__(self): + return '<%s %r>' % (self.__class__.__name__, + (self.localcache, self.stalecache, self.realcache)) + +CL_ONE = ConsistencyLevel.ONE +CL_QUORUM = ConsistencyLevel.QUORUM +CL_ALL = ConsistencyLevel.ALL class CassandraCacheChain(CacheChain): def __init__(self, localcache, cassa, lock_factory, memcache=None, **kw): @@ -484,7 +565,7 @@ def __init__(self, localcache, cassa, lock_factory, memcache=None, **kw): self.make_lock = lock_factory CacheChain.__init__(self, caches, **kw) - def mutate(self, key, mutation_fn, default = None): + def mutate(self, key, mutation_fn, default = None, willread=True): """Mutate a Cassandra key as atomically as possible""" with self.make_lock('mutate_%s' % key): # we have to do some of the the work of the cache chain @@ -500,28 +581,29 @@ def mutate(self, key, mutation_fn, default = None): # which would require some more row-cache performace # testing) rcl = wcl = self.cassa.write_consistency_level - if rcl == CL_ZERO: - rcl = CL_ONE - try: + if willread: + try: + value = None + if self.memcache: + value = self.memcache.get(key) + if value is None: + value = self.cassa.get(key, + read_consistency_level = rcl) + except cassandra.ttypes.NotFoundException: + value = default + + # due to an old bug in NoneResult caching, we still + # have some of these around + if value == NoneResult: + value = default + + else: value = None - if self.memcache: - value = self.memcache.get(key) - if value is None: - value = self.cassa.get(key, - read_consistency_level = rcl) - except cassandra.ttypes.NotFoundException: - value = default - - # due to an old bug in NoneResult caching, we still have - # some of these around - if value == NoneResult: - value = default - - new_value = mutation_fn(copy(value)) # send in a copy in - # case they mutate it - # in-place - - if value != new_value: + + # send in a copy in case they mutate it in-place + new_value = mutation_fn(copy(value)) + + if not willread or value != new_value: self.cassa.set(key, new_value, write_consistency_level = wcl) for ca in self.caches[:-1]: @@ -549,20 +631,19 @@ def bulk_load(self, start='', end='', chunk_size = 100): class CassandraCache(CacheUtils): - """A cache that uses a Cassandra cluster. Uses a single keyspace - and column family and only the column-name 'value'""" - def __init__(self, keyspace, column_family, client, + """A cache that uses a Cassandra ColumnFamily. Uses only the + column-name 'value'""" + def __init__(self, column_family, client, read_consistency_level = CL_ONE, write_consistency_level = CL_QUORUM): - self.keyspace = keyspace self.column_family = column_family self.client = client self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level - self.cf = pycassa.ColumnFamily(self.client, self.keyspace, - self.column_family, - read_consistency_level = read_consistency_level, - write_consistency_level = write_consistency_level) + self.cf = ColumnFamily(self.client, + self.column_family, + read_consistency_level = read_consistency_level, + write_consistency_level = write_consistency_level) def _rcl(self, alternative): return (alternative if alternative is not None @@ -578,7 +659,7 @@ def get(self, key, default = None, read_consistency_level = None): row = self.cf.get(key, columns=['value'], read_consistency_level = rcl) return pickle.loads(row['value']) - except (cassandra.ttypes.NotFoundException, KeyError): + except (CassandraNotFound, KeyError): return default def simple_get_multi(self, keys, read_consistency_level = None): @@ -590,29 +671,36 @@ def simple_get_multi(self, keys, read_consistency_level = None): for (key, row) in rows.iteritems()) def set(self, key, val, - write_consistency_level = None, time = None): + write_consistency_level = None, + time = None): if val == NoneResult: # NoneResult caching is for other parts of the chain return wcl = self._wcl(write_consistency_level) ret = self.cf.insert(key, {'value': pickle.dumps(val)}, - write_consistency_level = wcl) + write_consistency_level = wcl, + ttl = time) self._warm([key]) return ret def set_multi(self, keys, prefix='', - write_consistency_level = None, time = None): + write_consistency_level = None, + time = None): if not isinstance(keys, dict): + # allow iterables yielding tuples keys = dict(keys) - keys = dict(('%s%s' % (prefix, key), val) - for (key, val) in keys.iteritems()) + wcl = self._wcl(write_consistency_level) ret = {} - for key, val in keys.iteritems(): - if val != NoneResult: - ret[key] = self.cf.insert(key, {'value': pickle.dumps(val)}, - write_consistency_level = wcl) + + with self.cf.batch(write_consistency_level = wcl): + for key, val in keys.iteritems(): + if val != NoneResult: + ret[key] = self.cf.insert('%s%s' % (prefix, key), + {'value': pickle.dumps(val)}, + ttl = time) + self._warm(keys.keys()) return ret @@ -739,3 +827,25 @@ def _conv(s): h.update(_conv(kw)) return '%s(%s)' % (iden, h.hexdigest()) + +def test_stale(): + from pylons import g + ca = g.cache + assert isinstance(ca, StaleCacheChain) + + ca.localcache.clear() + + ca.stalecache.set('foo', 'bar', time=ca.staleness) + assert ca.stalecache.get('foo') == 'bar' + ca.realcache.set('foo', 'baz') + assert ca.realcache.get('foo') == 'baz' + + assert ca.get('foo', stale=True) == 'bar' + ca.localcache.clear() + assert ca.get('foo', stale=False) == 'baz' + ca.localcache.clear() + + assert ca.get_multi(['foo'], stale=True) == {'foo': 'bar'} + assert len(ca.localcache) == 0 + assert ca.get_multi(['foo'], stale=False) == {'foo': 'baz'} + ca.localcache.clear() diff --git a/r2/r2/lib/comment_tree.py b/r2/r2/lib/comment_tree.py index 98f3c6d181..f1e20d16e8 100644 --- a/r2/r2/lib/comment_tree.py +++ b/r2/r2/lib/comment_tree.py @@ -25,6 +25,8 @@ from r2.lib.db.sorts import epoch_seconds from r2.lib.cache import sgm +MAX_ITERATIONS = 20000 + def comments_key(link_id): return 'comments_' + str(link_id) @@ -308,10 +310,14 @@ def _load_link_comments(link_id): for cm_id in cids: num = 0 todo = [cm_id] + iteration_count = 0 while todo: + if iteration_count > MAX_ITERATIONS: + raise Exception("bad comment tree for link %s" % link_id) more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) + iteration_count += 1 num_children[cm_id] = num return cids, comment_tree, depth, num_children diff --git a/r2/r2/lib/contrib/discount/Makefile b/r2/r2/lib/contrib/discount/Makefile index 1d81e559a3..923302b348 100644 --- a/r2/r2/lib/contrib/discount/Makefile +++ b/r2/r2/lib/contrib/discount/Makefile @@ -13,7 +13,8 @@ SAMPLE_PGMS+= theme MKDLIB=libmarkdown.a OBJS=mkdio.o markdown.o dumptree.o generate.o \ resource.o docheader.o version.o toc.o css.o \ - xml.o Csio.o xmlpage.o basename.o emmatch.o + xml.o Csio.o xmlpage.o basename.o emmatch.o \ + tags.o html5.o MAN3PAGES=mkd-callbacks.3 mkd-functions.3 markdown.3 mkd-line.3 @@ -28,11 +29,11 @@ install.everything: install install.samples install.man install.samples: $(SAMPLE_PGMS) install /usr/bin/install -s -m 755 $(SAMPLE_PGMS) $(DESTDIR)/$(BINDIR) - /home/raldi/reddit/r2/r2/lib/contrib/discount/config.md $(DESTDIR)/$(MANDIR)/man1 + /tmp/discount-1.6.8/config.md $(DESTDIR)/$(MANDIR)/man1 /usr/bin/install -m 444 theme.1 $(DESTDIR)/$(MANDIR)/man1 install.man: - /home/raldi/reddit/r2/r2/lib/contrib/discount/config.md $(DESTDIR)/$(MANDIR)/man3 + /tmp/discount-1.6.8/config.md $(DESTDIR)/$(MANDIR)/man3 /usr/bin/install -m 444 $(MAN3PAGES) $(DESTDIR)/$(MANDIR)/man3 for x in mkd_line mkd_generateline; do \ ( echo '.\"' ; echo ".so man3/mkd-line.3" ) > $(DESTDIR)/$(MANDIR)/man3/$$x.3;\ @@ -43,9 +44,9 @@ install.man: for x in mkd_compile mkd_css mkd_generatecss mkd_generatehtml mkd_cleanup mkd_doc_title mkd_doc_author mkd_doc_date; do \ ( echo '.\"' ; echo ".so man3/mkd-functions.3" ) > $(DESTDIR)/$(MANDIR)/man3/$$x.3; \ done - /home/raldi/reddit/r2/r2/lib/contrib/discount/config.md $(DESTDIR)/$(MANDIR)/man7 + /tmp/discount-1.6.8/config.md $(DESTDIR)/$(MANDIR)/man7 /usr/bin/install -m 444 markdown.7 mkd-extensions.7 $(DESTDIR)/$(MANDIR)/man7 - /home/raldi/reddit/r2/r2/lib/contrib/discount/config.md $(DESTDIR)/$(MANDIR)/man1 + /tmp/discount-1.6.8/config.md $(DESTDIR)/$(MANDIR)/man1 /usr/bin/install -m 444 markdown.1 $(DESTDIR)/$(MANDIR)/man1 install.everything: install install.man @@ -79,9 +80,9 @@ test: $(PGMS) echo cols sh $$x || exit 1; \ done -cols: tools/cols.c +cols: tools/cols.c config.h $(CC) -o cols tools/cols.c -echo: tools/echo.c +echo: tools/echo.c config.h $(CC) -o echo tools/echo.c clean: diff --git a/r2/r2/lib/contrib/discount/Makefile.in b/r2/r2/lib/contrib/discount/Makefile.in index c4d5d562fd..44a5895dbd 100644 --- a/r2/r2/lib/contrib/discount/Makefile.in +++ b/r2/r2/lib/contrib/discount/Makefile.in @@ -13,7 +13,8 @@ SAMPLE_PGMS=mkd2html makepage MKDLIB=libmarkdown.a OBJS=mkdio.o markdown.o dumptree.o generate.o \ resource.o docheader.o version.o toc.o css.o \ - xml.o Csio.o xmlpage.o basename.o emmatch.o @AMALLOC@ + xml.o Csio.o xmlpage.o basename.o emmatch.o \ + tags.o html5.o @AMALLOC@ MAN3PAGES=mkd-callbacks.3 mkd-functions.3 markdown.3 mkd-line.3 @@ -79,9 +80,9 @@ test: $(PGMS) echo cols sh $$x || exit 1; \ done -cols: tools/cols.c +cols: tools/cols.c config.h $(CC) -o cols tools/cols.c -echo: tools/echo.c +echo: tools/echo.c config.h $(CC) -o echo tools/echo.c clean: diff --git a/r2/r2/lib/contrib/discount/Plan9/mkfile b/r2/r2/lib/contrib/discount/Plan9/mkfile index 189d7e9287..f15f987836 100644 --- a/r2/r2/lib/contrib/discount/Plan9/mkfile +++ b/r2/r2/lib/contrib/discount/Plan9/mkfile @@ -1,5 +1,5 @@ BIN=/$objtype/bin -CC='cc -D_BSD_EXTENSION' +CC='cc -D_BSD_EXTENSION -D_C99_SNPRINTF_EXTENSION' markdown: ape/psh -c 'cd .. && make' diff --git a/r2/r2/lib/contrib/discount/VERSION b/r2/r2/lib/contrib/discount/VERSION index 9edc58bb1d..d8c5e721a7 100644 --- a/r2/r2/lib/contrib/discount/VERSION +++ b/r2/r2/lib/contrib/discount/VERSION @@ -1 +1 @@ -1.6.4 +1.6.8 diff --git a/r2/r2/lib/contrib/discount/config.cmd b/r2/r2/lib/contrib/discount/config.cmd index ed15bbbbda..59920908c2 100755 --- a/r2/r2/lib/contrib/discount/config.cmd +++ b/r2/r2/lib/contrib/discount/config.cmd @@ -1,2 +1,2 @@ #! /bin/sh - ./configure.sh + ./configure.sh '--relaxed-emphasis' '--enable-superscript' diff --git a/r2/r2/lib/contrib/discount/config.h b/r2/r2/lib/contrib/discount/config.h index ec9f2e6ebd..80ccf973f9 100644 --- a/r2/r2/lib/contrib/discount/config.h +++ b/r2/r2/lib/contrib/discount/config.h @@ -1,5 +1,5 @@ /* - * configuration for markdown, generated Mon May 17 13:43:31 PDT 2010 + * configuration for markdown, generated Mon Oct 18 16:39:05 PDT 2010 * by raldi@zork */ #ifndef __AC_MARKDOWN_D @@ -20,6 +20,8 @@ #define HAVE_STRNCASECMP 1 #define HAVE_FCHDIR 1 #define TABSTOP 4 +#define SUPERSCRIPT 1 +#define RELAXED_EMPHASIS 1 #define HAVE_MALLOC_H 1 #define PATH_SED "/bin/sed" diff --git a/r2/r2/lib/contrib/discount/config.log b/r2/r2/lib/contrib/discount/config.log index 1231807308..5b1c680093 100644 --- a/r2/r2/lib/contrib/discount/config.log +++ b/r2/r2/lib/contrib/discount/config.log @@ -8,19 +8,19 @@ checking out the C compiler checking for "volatile" keyword checking for "const" keyword defining WORD & DWORD scalar types -/tmp/pd26167.c: In function 'main': -/tmp/pd26167.c:29: warning: incompatible implicit declaration of built-in function 'exit' -/tmp/pd26167.c:16: warning: return type of 'main' is not 'int' -/tmp/ngc26167.c: In function 'main': -/tmp/ngc26167.c:5: warning: initialization makes pointer from integer without a cast -/tmp/ngc26167.c:6: warning: initialization makes pointer from integer without a cast +/tmp/pd16169.c: In function 'main': +/tmp/pd16169.c:29: warning: incompatible implicit declaration of built-in function 'exit' +/tmp/pd16169.c:16: warning: return type of 'main' is not 'int' +/tmp/ngc16169.c: In function 'main': +/tmp/ngc16169.c:5: warning: initialization makes pointer from integer without a cast +/tmp/ngc16169.c:6: warning: initialization makes pointer from integer without a cast looking for header libgen.h looking for header pwd.h looking for the getpwuid function looking for the srandom function looking for the bzero function -/tmp/ngc26167.c: In function 'main': -/tmp/ngc26167.c:4: warning: incompatible implicit declaration of built-in function 'bzero' +/tmp/ngc16169.c: In function 'main': +/tmp/ngc16169.c:4: warning: incompatible implicit declaration of built-in function 'bzero' looking for the random function looking for the strcasecmp function looking for the strncasecmp function diff --git a/r2/r2/lib/contrib/discount/config.md b/r2/r2/lib/contrib/discount/config.md index 7e9437fb90..856b259597 100755 --- a/r2/r2/lib/contrib/discount/config.md +++ b/r2/r2/lib/contrib/discount/config.md @@ -1,5 +1,5 @@ #! /bin/sh -# script generated Mon May 17 13:43:31 PDT 2010 by configure.sh +# script generated Mon Oct 18 16:39:05 PDT 2010 by configure.sh test -d "$1" || mkdir -p "$1" exit 0 diff --git a/r2/r2/lib/contrib/discount/config.sub b/r2/r2/lib/contrib/discount/config.sub index 6fb1d363ea..39e1e4e639 100644 --- a/r2/r2/lib/contrib/discount/config.sub +++ b/r2/r2/lib/contrib/discount/config.sub @@ -3,7 +3,7 @@ s;@CPPFLAGS@;;g s;@INSTALL@;/usr/bin/install;g s;@INSTALL_PROGRAM@;/usr/bin/install -s -m 755;g s;@INSTALL_DATA@;/usr/bin/install -m 444;g -s;@INSTALL_DIR@;/home/raldi/reddit/r2/r2/lib/contrib/discount/config.md;g +s;@INSTALL_DIR@;/tmp/discount-1.6.8/config.md;g s;@CC@;cc;g s;@AR@;/usr/bin/ar;g s;@RANLIB@;/usr/bin/ranlib;g @@ -13,13 +13,13 @@ s:@BYTE@:unsigned char:g s;@THEME@;;g s;@TABSTOP@;4;g s;@AMALLOC@;;g -s;@STRICT@;.\";g +s;@STRICT@;;g s;@LIBS@;;g s;@CONFIGURE_FILES@;config.cmd config.sub config.h config.mak config.log config.md;g s;@GENERATED_FILES@;Makefile version.c markdown.1;g s;@CFLAGS@;-g;g s;@LDFLAGS@;-g;g -s;@srcdir@;/home/raldi/reddit/r2/r2/lib/contrib/discount;g +s;@srcdir@;/tmp/discount-1.6.8;g s;@prefix@;/usr/local;g s;@exedir@;/usr/local/bin;g s;@sbindir@;/usr/local/sbin;g diff --git a/r2/r2/lib/contrib/discount/cstring.h b/r2/r2/lib/contrib/discount/cstring.h index 164e75bb67..86755e8a9e 100644 --- a/r2/r2/lib/contrib/discount/cstring.h +++ b/r2/r2/lib/contrib/discount/cstring.h @@ -10,7 +10,9 @@ #include #include -#include "amalloc.h" +#ifndef __WITHOUT_AMALLOC +# include "amalloc.h" +#endif /* expandable Pascal-style string. */ diff --git a/r2/r2/lib/contrib/discount/dumptree.c b/r2/r2/lib/contrib/discount/dumptree.c index ecace98aa0..068084880d 100644 --- a/r2/r2/lib/contrib/discount/dumptree.c +++ b/r2/r2/lib/contrib/discount/dumptree.c @@ -33,6 +33,7 @@ Pptype(int typ) case HR : return "hr"; case TABLE : return "table"; case SOURCE : return "source"; + case STYLE : return "style"; default : return "mystery node!"; } } diff --git a/r2/r2/lib/contrib/discount/generate.c b/r2/r2/lib/contrib/discount/generate.c index 075a203bb0..7fe12e9861 100644 --- a/r2/r2/lib/contrib/discount/generate.c +++ b/r2/r2/lib/contrib/discount/generate.c @@ -18,7 +18,7 @@ #include "amalloc.h" typedef int (*stfu)(const void*,const void*); - +typedef void (*spanhandler)(MMIOT*,int); /* forward declarations */ static void text(MMIOT *f); @@ -164,16 +164,6 @@ Qprintf(MMIOT *f, char *fmt, ...) } -/* Qcopy() - */ -static void -Qcopy(int count, MMIOT *f) -{ - while ( count-- > 0 ) - Qchar(pull(f), f); -} - - /* Qem() */ static void @@ -272,12 +262,12 @@ parenthetical(int in, int out, MMIOT *f) for ( indent=1,size=0; indent; size++ ) { if ( (c = pull(f)) == EOF ) return EOF; - else if ( c == in ) - ++indent; - else if ( (c == '\\') && (peek(f,1) == out) ) { + else if ( (c == '\\') && (peek(f,1) == out || peek(f,1) == in) ) { ++size; pull(f); } + else if ( c == in ) + ++indent; else if ( c == out ) --indent; } @@ -664,11 +654,11 @@ mangle(char *s, int len, MMIOT *f) /* nrticks() -- count up a row of tick marks */ static int -nrticks(int offset, MMIOT *f) +nrticks(int offset, int tickchar, MMIOT *f) { int tick = 0; - while ( peek(f, offset+tick) == '`' ) tick++; + while ( peek(f, offset+tick) == tickchar ) tick++; return tick; } /* nrticks */ @@ -677,36 +667,34 @@ nrticks(int offset, MMIOT *f) /* matchticks() -- match a certain # of ticks, and if that fails * match the largest subset of those ticks. * - * if a subset was matched, modify the passed in - * # of ticks so that the caller (text()) can - * appropriately process the horrible thing. + * if a subset was matched, return the # of ticks + * that were matched. */ static int -matchticks(MMIOT *f, int *ticks) +matchticks(MMIOT *f, int tickchar, int ticks, int *endticks) { - int size, tick, c; + int size, count, c; int subsize=0, subtick=0; - for (size = *ticks; (c=peek(f,size)) != EOF; ) { - if ( c == '`' ) - if ( (tick=nrticks(size,f)) == *ticks ) + *endticks = ticks; + for (size = 0; (c=peek(f,size+ticks)) != EOF; size ++) { + if ( (c == tickchar) && ( count = nrticks(size+ticks,tickchar,f)) ) { + if ( count == ticks ) return size; - else { - if ( tick > subtick ) { + else if ( count ) { + if ( (count > subtick) && (count < ticks) ) { subsize = size; - subtick = tick; + subtick = count; } - size += tick; + size += count; } - else - size++; + } } if ( subsize ) { - *ticks = subtick; + *endticks = subtick; return subsize; } return 0; - } /* matchticks */ @@ -727,13 +715,24 @@ code(MMIOT *f, char *s, int length) } /* code */ +/* delspan() -- write out a chunk of text, blocking with ... + */ +static void +delspan(MMIOT *f, int size) +{ + Qstring("", f); + ___mkd_reparse(cursor(f)-1, size, 0, f); + Qstring("", f); +} + + /* codespan() -- write out a chunk of text as code, trimming one * space off the front and/or back as appropriate. */ static void codespan(MMIOT *f, int size) { - int i=0, c; + int i=0; if ( size > 1 && peek(f, size-1) == ' ' ) --size; if ( peek(f,i) == ' ' ) ++i, --size; @@ -1058,6 +1057,30 @@ smartypants(int c, int *flags, MMIOT *f) } /* smartypants */ +/* process a body of text encased in some sort of tick marks. If it + * works, generate the output and return 1, otherwise just return 0 and + * let the caller figure it out. + */ +static int +tickhandler(MMIOT *f, int tickchar, int minticks, spanhandler spanner) +{ + int endticks, size; + int tick = nrticks(0, tickchar, f); + + if ( (tick >= minticks) && (size = matchticks(f,tickchar,tick,&endticks)) ) { + if ( endticks < tick ) { + size += (tick - endticks); + tick = endticks; + } + + shift(f, tick); + (*spanner)(f,size); + shift(f, size+tick-1); + return 1; + } + return 0; +} + #define tag_text(f) (f->flags & INSIDE_TAG) @@ -1151,21 +1174,12 @@ text(MMIOT *f) } break; - case '`': if ( tag_text(f) ) + case '~': if ( (f->flags & (NOSTRIKETHROUGH|INSIDE_TAG|STRICT)) || !tickhandler(f,c,2,delspan) ) Qchar(c, f); - else { - int size, tick = nrticks(0, f); + break; - if ( size = matchticks(f, &tick) ) { - shift(f, tick); - codespan(f, size-tick); - shift(f, size-1); - } - else { - Qchar(c, f); - Qcopy(tick-1, f); - } - } + case '`': if ( tag_text(f) || !tickhandler(f,c,1,codespan) ) + Qchar(c, f); break; case '\\': switch ( c = pull(f) ) { @@ -1333,8 +1347,8 @@ static int printblock(Paragraph *pp, MMIOT *f) { Line *t = pp->text; - static char *Begin[] = { "", "

", "

" }; - static char *End[] = { "", "

","
" }; + static char *Begin[] = { "", "

", "

" }; + static char *End[] = { "", "

","

" }; while (t) { if ( S(t->text) ) { diff --git a/r2/r2/lib/contrib/discount/html5.c b/r2/r2/lib/contrib/discount/html5.c new file mode 100644 index 0000000000..8b869885aa --- /dev/null +++ b/r2/r2/lib/contrib/discount/html5.c @@ -0,0 +1,24 @@ +/* block-level tags for passing html5 blocks through the blender + */ +#include "tags.h" + +void +mkd_with_html5_tags() +{ + static int populated = 0; + + if ( populated ) return; + populated = 1; + + mkd_prepare_tags(); + + mkd_define_tag("ASIDE", 0); + mkd_define_tag("FOOTER", 0); + mkd_define_tag("HEADER", 0); + mkd_define_tag("HGROUP", 0); + mkd_define_tag("NAV", 0); + mkd_define_tag("SECTION", 0); + mkd_define_tag("ARTICLE", 0); + + mkd_sort_tags(); +} diff --git a/r2/r2/lib/contrib/discount/main.c b/r2/r2/lib/contrib/discount/main.c index 3d6e502603..fcde68a2fc 100644 --- a/r2/r2/lib/contrib/discount/main.c +++ b/r2/r2/lib/contrib/discount/main.c @@ -56,6 +56,8 @@ static struct { { "toc", 0, MKD_TOC }, { "autolink",0, MKD_AUTOLINK }, { "safelink",0, MKD_SAFELINK }, + { "del", 1, MKD_NOSTRIKETHROUGH }, + { "strikethrough", 1, MKD_NOSTRIKETHROUGH }, { "1.0", 0, MKD_1_COMPAT }, } ; @@ -113,6 +115,7 @@ main(int argc, char **argv) int flags = 0; int debug = 0; int toc = 0; + int with_html5 = 0; int use_mkd_line = 0; char *urlflags = 0; char *text = 0; @@ -127,13 +130,16 @@ main(int argc, char **argv) pgm = basename(argv[0]); opterr = 1; - while ( (opt=getopt(argc, argv, "b:df:E:F:o:s:t:TV")) != EOF ) { + while ( (opt=getopt(argc, argv, "5b:df:E:F:o:s:t:TV")) != EOF ) { switch (opt) { + case '5': with_html5 = 1; + break; case 'b': urlbase = optarg; break; case 'd': debug = 1; break; - case 'V': printf("%s: discount %s\n", pgm, markdown_version); + case 'V': printf("%s: discount %s%s\n", pgm, markdown_version, + with_html5 ? " +html5":""); exit(0); case 'E': urlflags = optarg; break; @@ -167,6 +173,9 @@ main(int argc, char **argv) argc -= optind; argv += optind; + if ( with_html5 ) + mkd_with_html5_tags(); + if ( use_mkd_line ) rc = mkd_generateline( text, strlen(text), stdout, flags); else { diff --git a/r2/r2/lib/contrib/discount/markdown.1 b/r2/r2/lib/contrib/discount/markdown.1 index 8f5ea3ee7d..af97649314 100644 --- a/r2/r2/lib/contrib/discount/markdown.1 +++ b/r2/r2/lib/contrib/discount/markdown.1 @@ -59,10 +59,10 @@ Do not process pandoc headers. Do not process Markdown Extra-style tables. .It Ar tabstops Use markdown-standard 4-space tabstops. -.".It Ar strict -."Disable superscript and relaxed emphasis. -.".It Ar relax -."Enable superscript and relaxed emphasis (this is the default.) +.It Ar strict +Disable superscript and relaxed emphasis. +.It Ar relax +Enable superscript and relaxed emphasis (this is the default.) .It Ar toc Enable table-of-contents support .It Ar 1.0 diff --git a/r2/r2/lib/contrib/discount/markdown.c b/r2/r2/lib/contrib/discount/markdown.c index 95b402cd8b..dc7deea6d0 100644 --- a/r2/r2/lib/contrib/discount/markdown.c +++ b/r2/r2/lib/contrib/discount/markdown.c @@ -4,6 +4,8 @@ * The redistribution terms are provided in the COPYRIGHT file that must * be distributed with this source code. */ +#include "config.h" + #include #include #include @@ -11,51 +13,15 @@ #include #include -#include "config.h" - #include "cstring.h" #include "markdown.h" #include "amalloc.h" - -/* block-level tags for passing html blocks through the blender - */ -struct kw { - char *id; - int size; - int selfclose; -} ; - -#define KW(x) { x, sizeof(x)-1, 0 } -#define SC(x) { x, sizeof(x)-1, 1 } - -static struct kw blocktags[] = { KW("!--"), KW("STYLE"), KW("SCRIPT"), - KW("ADDRESS"), KW("BDO"), KW("BLOCKQUOTE"), - KW("CENTER"), KW("DFN"), KW("DIV"), KW("H1"), - KW("H2"), KW("H3"), KW("H4"), KW("H5"), - KW("H6"), KW("LISTING"), KW("NOBR"), - KW("UL"), KW("P"), KW("OL"), KW("DL"), - KW("PLAINTEXT"), KW("PRE"), KW("TABLE"), - KW("WBR"), KW("XMP"), SC("HR"), SC("BR"), - KW("IFRAME"), KW("MAP") }; -#define SZTAGS (sizeof blocktags / sizeof blocktags[0]) -#define MAXTAG 11 /* sizeof "BLOCKQUOTE" */ +#include "tags.h" typedef int (*stfu)(const void*,const void*); typedef ANCHOR(Paragraph) ParagraphRoot; - -/* case insensitive string sort (for qsort() and bsearch() of block tags) - */ -static int -casort(struct kw *a, struct kw *b) -{ - if ( a->size != b->size ) - return a->size - b->size; - return strncasecmp(a->id, b->id, b->size); -} - - /* case insensitive string sort for Footnote tags. */ int @@ -135,19 +101,28 @@ ___mkd_tidy(Cstring *t) } +static struct kw comment = { "!--", 3, 0 }; + static struct kw * isopentag(Line *p) { int i=0, len; - struct kw key, *ret; + char *line; if ( !p ) return 0; + line = T(p->text); len = S(p->text); - if ( len < 3 || T(p->text)[0] != '<' ) + if ( len < 3 || line[0] != '<' ) return 0; + if ( line[1] == '!' && line[2] == '-' && line[3] == '-' ) + /* comments need special case handling, because + * the !-- doesn't need to end in a whitespace + */ + return &comment; + /* find how long the tag is so we can check to see if * it's a block-level tag */ @@ -156,13 +131,8 @@ isopentag(Line *p) && !isspace(T(p->text)[i]); ++i ) ; - key.id = T(p->text)+1; - key.size = i-1; - - if ( ret = bsearch(&key, blocktags, SZTAGS, sizeof key, (stfu)casort)) - return ret; - return 0; + return mkd_search_tags(T(p->text)+1, i-1); } @@ -204,6 +174,25 @@ splitline(Line *t, int cutpoint) } +static Line * +commentblock(Paragraph *p) +{ + Line *t, *ret; + char *end; + + for ( t = p->text; t ; t = t->next) { + if ( end = strstr(T(t->text), "-->") ) { + splitline(t, 3 + (end - T(t->text)) ); + ret = t->next; + t->next = 0; + return ret; + } + } + return t; + +} + + static Line * htmlblock(Paragraph *p, struct kw *tag) { @@ -212,7 +201,10 @@ htmlblock(Paragraph *p, struct kw *tag) int c; int i, closing, depth=0; - if ( tag->selfclose || (tag->size >= MAXTAG) ) { + if ( tag == &comment ) + return commentblock(p); + + if ( tag->selfclose ) { ret = f.t->next; f.t->next = 0; return ret; @@ -263,25 +255,6 @@ htmlblock(Paragraph *p, struct kw *tag) } -static Line * -comment(Paragraph *p) -{ - Line *t, *ret; - char *end; - - for ( t = p->text; t ; t = t->next) { - if ( end = strstr(T(t->text), "-->") ) { - splitline(t, 3 + (end - T(t->text)) ); - ret = t->next; - t->next = 0; - return ret; - } - } - return t; - -} - - /* tables look like * header|header{|header} * ------|------{|......} @@ -384,26 +357,9 @@ ishr(Line *t) static int -ishdr(Line *t, int *htyp) +issetext(Line *t, int *htyp) { int i; - - - /* first check for etx-style ###HEADER### - */ - - /* leading run of `#`'s ? - */ - for ( i=0; T(t->text)[i] == '#'; ++i) - ; - - /* ANY leading `#`'s make this into an ETX header - */ - if ( i && (i < S(t->text) || i > 1) ) { - *htyp = ETX; - return 1; - } - /* then check for setext-style HEADER * ====== */ @@ -428,6 +384,31 @@ ishdr(Line *t, int *htyp) } +static int +ishdr(Line *t, int *htyp) +{ + int i; + + + /* first check for etx-style ###HEADER### + */ + + /* leading run of `#`'s ? + */ + for ( i=0; T(t->text)[i] == '#'; ++i) + ; + + /* ANY leading `#`'s make this into an ETX header + */ + if ( i && (i < S(t->text) || i > 1) ) { + *htyp = ETX; + return 1; + } + + return issetext(t, htyp); +} + + static int isdefinition(Line *t) { @@ -762,11 +743,12 @@ listitem(Paragraph *p, int indent) t->next = 0; return q; } - /* indent as far as the initial line was indented. */ - indent = clip; + /* indent at least 2, and at most as + * as far as the initial line was indented. */ + indent = clip ? clip : 2; } - if ( (q->dle < indent) && (ishr(q) || islist(q,&z)) && !ishdr(q,&z) ) { + if ( (q->dle < indent) && (ishr(q) || islist(q,&z)) && !issetext(q,&z) ) { q = t->next; t->next = 0; return q; @@ -967,10 +949,7 @@ compile_document(Line *ptr, MMIOT *f) T(source) = E(source) = 0; } p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML); - if ( strcmp(tag->id, "!--") == 0 ) - ptr = comment(p); - else - ptr = htmlblock(p, tag); + ptr = htmlblock(p, tag); } else if ( isfootnote(ptr) ) { /* footnotes, like cats, sleep anywhere; pull them @@ -1073,15 +1052,15 @@ compile(Line *ptr, int toplevel, MMIOT *f) } -static void -initialize() +void +mkd_initialize() { static int first = 1; if ( first-- > 0 ) { first = 0; INITRNG(time(0)); - qsort(blocktags, SZTAGS, sizeof blocktags[0], (stfu)casort); + mkd_prepare_tags(); } } @@ -1111,7 +1090,7 @@ mkd_compile(Document *doc, int flags) doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]); CREATE(*doc->ctx->footnotes); - initialize(); + mkd_initialize(); doc->code = compile_document(T(doc->content), doc->ctx); qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes), diff --git a/r2/r2/lib/contrib/discount/markdown.h b/r2/r2/lib/contrib/discount/markdown.h index 26d9269fda..c481e57d22 100644 --- a/r2/r2/lib/contrib/discount/markdown.h +++ b/r2/r2/lib/contrib/discount/markdown.h @@ -86,6 +86,7 @@ typedef struct mmiot { #define NO_PSEUDO_PROTO 0x0040 #define CDATA_OUTPUT 0x0080 #define NOTABLES 0x0400 +#define NOSTRIKETHROUGH 0x0800 #define TOC 0x1000 #define MKD_1_COMPAT 0x2000 #define AUTOLINK 0x4000 @@ -104,6 +105,8 @@ typedef struct mmiot { * root of the linked list of Lines. */ typedef struct document { + int magic; /* "I AM VALID" magic number */ +#define VALID_DOCUMENT 0x19600731 Line *headers; /* title -> author(s) -> date */ ANCHOR(Line) content; /* uncompiled text, not valid after compile() */ Paragraph *code; /* intermediate code generated by compile() */ diff --git a/r2/r2/lib/contrib/discount/mkd-extensions.7 b/r2/r2/lib/contrib/discount/mkd-extensions.7 index 7e96d3a40d..1683748bf2 100644 --- a/r2/r2/lib/contrib/discount/mkd-extensions.7 +++ b/r2/r2/lib/contrib/discount/mkd-extensions.7 @@ -20,7 +20,7 @@ The new image syntax is ![alt text](image =/height/x/width/ "title") .fi .Ss pseudo-protocols -Three pseudo-protocols have been added to links +Five pseudo-protocols have been added to links .Bl -tag -width XXXXX .It Ar id: The @@ -48,9 +48,16 @@ is discarded. The .Ar "alt text" is marked up and written to the output, wrapped with -.Em " +.Em "" and .Em "" . +.It Ar lang: +The +.Ar "alt text" +s marked up and written to the output, wrapped with +.Em "" +and +.Em "" . .El .Ss Pandoc headers If markdown was configured with @@ -163,6 +170,18 @@ is at the start of a column, it tells to align the cell contents to the left; if it's at the end, it aligns right, and if there's one at the start and at the end, it centers. +.Ss strikethrough +A strikethrough syntax is supported in much the same way that +.Ar ` +is used to define a section of code. If you enclose text with +two or more tildes, such as +.Em ~~erased text~~ +it will be written as +.Em "erased text" . +Like code sections, you may use as many +.Ar ~ +as you want, but there must be as many starting tildes as closing +tildes. .Sh AUTHOR David Parsons .%T http://www.pell.portland.or.us/~orc/ diff --git a/r2/r2/lib/contrib/discount/mkd-functions.3 b/r2/r2/lib/contrib/discount/mkd-functions.3 index 2bbaf2af51..fb49c058e6 100644 --- a/r2/r2/lib/contrib/discount/mkd-functions.3 +++ b/r2/r2/lib/contrib/discount/mkd-functions.3 @@ -127,7 +127,7 @@ accepts the same flags that and .Fn mkd_string do; -.Bl -tag -width MKD_NOIMAGE -compact +.Bl -tag -width MKD_NOSTRIKETHROUGH -compact .It Ar MKD_NOIMAGE Do not process `![]' and remove @@ -159,6 +159,8 @@ function. .It Ar MKD_1_COMPAT MarkdownTest_1.0 compatability flag; trim trailing spaces from the first line of code blocks and disable implicit reference links. +.It Ar MKD_NOSTRIKETHROUGH +Disable strikethrough support. .El .Sh RETURN VALUES The functions diff --git a/r2/r2/lib/contrib/discount/mkdio.c b/r2/r2/lib/contrib/discount/mkdio.c index 0e930450ed..324329959e 100644 --- a/r2/r2/lib/contrib/discount/mkdio.c +++ b/r2/r2/lib/contrib/discount/mkdio.c @@ -24,8 +24,10 @@ new_Document() Document *ret = calloc(sizeof(Document), 1); if ( ret ) { - if (( ret->ctx = calloc(sizeof(MMIOT), 1) )) + if (( ret->ctx = calloc(sizeof(MMIOT), 1) )) { + ret->magic = VALID_DOCUMENT; return ret; + } free(ret); } return 0; diff --git a/r2/r2/lib/contrib/discount/mkdio.h b/r2/r2/lib/contrib/discount/mkdio.h index 3e30c6fb7e..0c26f110ad 100644 --- a/r2/r2/lib/contrib/discount/mkdio.h +++ b/r2/r2/lib/contrib/discount/mkdio.h @@ -74,6 +74,7 @@ extern char markdown_version[]; #define MKD_NO_EXT 0x0040 /* don't allow pseudo-protocols */ #define MKD_CDATA 0x0080 /* generate code for xml ![CDATA[...]] */ #define MKD_NOTABLES 0x0400 /* disallow tables */ +#define MKD_NOSTRIKETHROUGH 0x0800/* forbid ~~strikethrough~~ */ #define MKD_TOC 0x1000 /* do table-of-contents processing */ #define MKD_1_COMPAT 0x2000 /* compatability with MarkdownTest_1.0 */ #define MKD_AUTOLINK 0x4000 /* make http://foo.com link even without <>s */ diff --git a/r2/r2/lib/contrib/discount/resource.c b/r2/r2/lib/contrib/discount/resource.c index 3e5628a963..7f1bc2e1ad 100644 --- a/r2/r2/lib/contrib/discount/resource.c +++ b/r2/r2/lib/contrib/discount/resource.c @@ -140,7 +140,7 @@ ___mkd_freeLineRange(Line *anchor, Line *stop) void mkd_cleanup(Document *doc) { - if ( doc ) { + if ( doc && (doc->magic == VALID_DOCUMENT) ) { if ( doc->ctx ) { ___mkd_freemmiot(doc->ctx, 0); free(doc->ctx); diff --git a/r2/r2/lib/contrib/discount/tags.c b/r2/r2/lib/contrib/discount/tags.c new file mode 100644 index 0000000000..3821699dfb --- /dev/null +++ b/r2/r2/lib/contrib/discount/tags.c @@ -0,0 +1,110 @@ +/* block-level tags for passing html blocks through the blender + */ +#define __WITHOUT_AMALLOC 1 +#include "cstring.h" +#include "tags.h" + +STRING(struct kw) blocktags; + + +/* define a html block tag + */ +void +mkd_define_tag(char *id, int selfclose) +{ + struct kw *p = &EXPAND(blocktags); + + p->id = id; + p->size = strlen(id); + p->selfclose = selfclose; +} + + +/* case insensitive string sort (for qsort() and bsearch() of block tags) + */ +static int +casort(struct kw *a, struct kw *b) +{ + if ( a->size != b->size ) + return a->size - b->size; + return strncasecmp(a->id, b->id, b->size); +} + + +/* stupid cast to make gcc shut up about the function types being + * passed into qsort() and bsearch() + */ +typedef int (*stfu)(const void*,const void*); + + +/* sort the list of html block tags for later searching + */ +void +mkd_sort_tags() +{ + qsort(T(blocktags), S(blocktags), sizeof(struct kw), (stfu)casort); +} + + + +/* look for a token in the html block tag list + */ +struct kw* +mkd_search_tags(char *pat, int len) +{ + struct kw key; + + key.id = pat; + key.size = len; + + return bsearch(&key, T(blocktags), S(blocktags), sizeof key, (stfu)casort); +} + + +/* load in the standard collection of html tags that markdown supports + */ +void +mkd_prepare_tags() +{ + +#define KW(x) mkd_define_tag(x, 0) +#define SC(x) mkd_define_tag(x, 1) + + static int populated = 0; + + if ( populated ) return; + populated = 1; + + KW("STYLE"); + KW("SCRIPT"); + KW("ADDRESS"); + KW("BDO"); + KW("BLOCKQUOTE"); + KW("CENTER"); + KW("DFN"); + KW("DIV"); + KW("OBJECT"); + KW("H1"); + KW("H2"); + KW("H3"); + KW("H4"); + KW("H5"); + KW("H6"); + KW("LISTING"); + KW("NOBR"); + KW("UL"); + KW("P"); + KW("OL"); + KW("DL"); + KW("PLAINTEXT"); + KW("PRE"); + KW("TABLE"); + KW("WBR"); + KW("XMP"); + SC("HR"); + SC("BR"); + KW("IFRAME"); + KW("MAP"); + + mkd_sort_tags(); +} /* mkd_prepare_tags */ diff --git a/r2/r2/lib/contrib/discount/tags.h b/r2/r2/lib/contrib/discount/tags.h new file mode 100644 index 0000000000..b5bddb3b2b --- /dev/null +++ b/r2/r2/lib/contrib/discount/tags.h @@ -0,0 +1,18 @@ +/* block-level tags for passing html blocks through the blender + */ +#ifndef _TAGS_D +#define _TAGS_D + +struct kw { + char *id; + int size; + int selfclose; +} ; + + +struct kw* mkd_search_tags(char *, int); +void mkd_prepare_tags(); +void mkd_sort_tags(); +void mkd_define_tag(char *, int); + +#endif diff --git a/r2/r2/lib/contrib/discount/tests/code.t b/r2/r2/lib/contrib/discount/tests/code.t index 3227ecf32a..8e8f20c074 100644 --- a/r2/r2/lib/contrib/discount/tests/code.t +++ b/r2/r2/lib/contrib/discount/tests/code.t @@ -12,8 +12,12 @@ try 'format for code block html' \ code ' +try 'mismatched backticks' '```tick``' '

`tick

' +try 'mismatched backticks(2)' '``tick```' '

``tick```

' try 'unclosed single backtick' '`hi there' '

`hi there

' try 'unclosed double backtick' '``hi there' '

``hi there

' +try 'triple backticks' '```hi there```' '

hi there

' +try 'quadruple backticks' '````hi there````' '

hi there

' try 'remove space around code' '`` hi there ``' '

hi there

' try 'code containing backticks' '`` a```b ``' '

a```b

' try 'backslash before backtick' '`a\`' '

a\

' diff --git a/r2/r2/lib/contrib/discount/tests/html.t b/r2/r2/lib/contrib/discount/tests/html.t index f233451747..7266f1a67e 100644 --- a/r2/r2/lib/contrib/discount/tests/html.t +++ b/r2/r2/lib/contrib/discount/tests/html.t @@ -133,5 +133,7 @@ try 'two adjacent comments' "$COMMENTS" \

line 2

' +try 'comment, no white space' '' '' + summary $0 exit $rc diff --git a/r2/r2/lib/contrib/discount/tests/html5.t b/r2/r2/lib/contrib/discount/tests/html5.t new file mode 100644 index 0000000000..0056f010ec --- /dev/null +++ b/r2/r2/lib/contrib/discount/tests/html5.t @@ -0,0 +1,17 @@ +. tests/functions.sh + +title "html5 blocks (mkd_with_html5_tags)" + +rc=0 +MARKDOWN_FLAGS= + +try -5 'html5 block elements enabled' \ + '' \ + '' + +try 'html5 block elements disabled' \ + '' \ + '

' + +summary $0 +exit $rc diff --git a/r2/r2/lib/contrib/discount/tests/linkylinky.t b/r2/r2/lib/contrib/discount/tests/linkylinky.t index 3a81db36a8..0d3b2ffdad 100644 --- a/r2/r2/lib/contrib/discount/tests/linkylinky.t +++ b/r2/r2/lib/contrib/discount/tests/linkylinky.t @@ -10,6 +10,7 @@ try 'url contains +' '[hehehe](u+rl)' '

hehehe

' try 'url contains "' '[hehehe](u"rl)' '

hehehe

' try 'url contains <' '[hehehe](uhehehe

' try 'url contains whitespace' '[ha](r u)' '

ha

' +try 'label contains escaped []s' '[a\[b\]c](d)' '

a[b]c

' try 'url contains whitespace & title' \ '[hehehe](r u "there")' \ diff --git a/r2/r2/lib/contrib/discount/tests/peculiarities.t b/r2/r2/lib/contrib/discount/tests/peculiarities.t index 3f3b63b584..f97597f23d 100644 --- a/r2/r2/lib/contrib/discount/tests/peculiarities.t +++ b/r2/r2/lib/contrib/discount/tests/peculiarities.t @@ -35,6 +35,35 @@ try 'ol with mixed item prefixes' \
  • B
  • ' +try 'nested lists and a header' \ + '- A list item +That goes over multiple lines + + and paragraphs + +- Another list item + + + with a + + sublist + +## AND THEN A HEADER' \ +'
      +
    • A list item +That goes over multiple lines

      + +

      and paragraphs

    • +
    • Another list item

      + +
        +
      • with a
      • +
      • sublist
      • +
      +
    • +
    + + +

    AND THEN A HEADER

    ' + try 'forcing a
    ' 'this is' '

    this
    is

    ' diff --git a/r2/r2/lib/contrib/discount/tests/strikethrough.t b/r2/r2/lib/contrib/discount/tests/strikethrough.t new file mode 100644 index 0000000000..9016a5c863 --- /dev/null +++ b/r2/r2/lib/contrib/discount/tests/strikethrough.t @@ -0,0 +1,15 @@ +. tests/functions.sh + +title "strikethrough" + +rc=0 +MARKDOWN_FLAGS= + +try 'strikethrough' '~~deleted~~' '

    deleted

    ' +try -fnodel '... with -fnodel' '~~deleted~~' '

    ~~deleted~~

    ' +try 'mismatched tildes' '~~~tick~~' '

    ~tick

    ' +try 'mismatched tildes(2)' '~~tick~~~' '

    ~~tick~~~

    ' +try 'single tildes' '~tick~' '

    ~tick~

    ' + +summary $0 +exit $rc diff --git a/r2/r2/lib/contrib/discount/theme.c b/r2/r2/lib/contrib/discount/theme.c index 97f401aae7..75a093a117 100644 --- a/r2/r2/lib/contrib/discount/theme.c +++ b/r2/r2/lib/contrib/discount/theme.c @@ -554,6 +554,9 @@ char **argv; if ( q ) *q = 0; + else + q = output + strlen(output); + strcat(q, ".html"); } } diff --git a/r2/r2/lib/count.py b/r2/r2/lib/count.py index f62038382b..38a260325b 100644 --- a/r2/r2/lib/count.py +++ b/r2/r2/lib/count.py @@ -31,18 +31,18 @@ def incr_counts(wrapped): pass +def incr_sr_count(sr): + pass + def get_link_counts(period = count_period): links = Link._query(Link.c._date >= utils.timeago(period), limit=50, data = True) return dict((l._fullname, (0, l.sr_id)) for l in links) -def get_sr_counts(period = count_period): - srs = Subreddit._query() +def get_sr_counts(): + srs = utils.fetch_things2(Subreddit._query()) - return dict((l._fullname, (0, l.sr_id)) for l in links) - -def clear_sr_counts(names): - pass + return dict((sr._fullname, sr._ups) for sr in srs) try: from r2admin.lib.count import * diff --git a/r2/r2/lib/cssfilter.py b/r2/r2/lib/cssfilter.py index 18d83bef75..a2d2259bbe 100644 --- a/r2/r2/lib/cssfilter.py +++ b/r2/r2/lib/cssfilter.py @@ -89,6 +89,11 @@ '-webkit-border-top-right-radius': r'(({length}|{percentage}){w}){1,2}', '-webkit-border-bottom-left-radius': r'(({length}|{percentage}){w}){1,2}', '-webkit-border-bottom-right-radius': r'(({length}|{percentage}){w}){1,2}', + 'border-radius': r'(({length}|{percentage}){w}){1,2}', + 'border-radius-topleft': r'(({length}|{percentage}){w}){1,2}', + 'border-radius-topright': r'(({length}|{percentage}){w}){1,2}', + 'border-radius-bottomleft': r'(({length}|{percentage}){w}){1,2}', + 'border-radius-bottomright': r'(({length}|{percentage}){w}){1,2}', # http://www.w3.org/TR/css3-text/#text-shadow 'text-shadow': r'none|({single-text-shadow}{w},{w})*{single-text-shadow}', @@ -112,7 +117,7 @@ def macro_value(m): def _compile_regexes(tokdict): """ Compile all regular expressions into callable objects """ for key, value in tokdict.items(): - tokdict[key] = re.compile('^(?:%s)$' % value, re.I).match + tokdict[key] = re.compile('\A(?:%s)\Z' % value, re.I).match return tokdict _compile_regexes(_expand_macros(custom_values,custom_macros)) @@ -161,7 +166,7 @@ def __str__(self): return "ValidationError%s: %s (%s)" % (line, self.message, obj) # local urls should be in the static directory -local_urls = re.compile(r'^/static/[a-z./-]+$') +local_urls = re.compile(r'\A/static/[a-z./-]+\Z') # substitutable urls will be css-valid labels surrounded by "%%" custom_img_urls = re.compile(r'%%([a-zA-Z0-9\-]+)%%') def valid_url(prop,value,report): @@ -250,8 +255,8 @@ def valid_value(prop,value,report): if value.primitiveType == CSSPrimitiveValue.CSS_URI: valid_url(prop,value,report) -error_message_extract_re = re.compile('.*\\[([0-9]+):[0-9]*:.*\\]$') -only_whitespace = re.compile('^\s*$') +error_message_extract_re = re.compile('.*\\[([0-9]+):[0-9]*:.*\\]\Z') +only_whitespace = re.compile('\A\s*\Z') def validate_css(string): p = CSSParser(raiseExceptions = True) diff --git a/r2/r2/lib/db/queries.py b/r2/r2/lib/db/queries.py index d7fd795f30..d4ad59336b 100644 --- a/r2/r2/lib/db/queries.py +++ b/r2/r2/lib/db/queries.py @@ -132,8 +132,8 @@ def can_delete(self): "True if a item can be removed from the listing, always true for now." return True - def _mutate(self, fn): - self.data = query_cache.mutate(self.iden, fn, default=[]) + def _mutate(self, fn, willread=True): + self.data = query_cache.mutate(self.iden, fn, default=[], willread=willread) self._fetched=True def insert(self, items): @@ -142,7 +142,6 @@ def insert(self, items): self._insert_tuples([self.make_item_tuple(item) for item in tup(items)]) def _insert_tuples(self, t): - def _mutate(data): data = data or [] @@ -185,7 +184,7 @@ def _replace(self, tuples): private API""" def _mutate(data): return tuples - self._mutate(_mutate) + self._mutate(_mutate, willread=False) def update(self): """Runs the query and stores the result in the cache. This is @@ -243,6 +242,10 @@ def __iter__(self): for x in self.data: yield x[0] + def update(self): + for x in self.cached_results: + x.update() + def make_results(query, filter = filter_identity): if g.use_query_cache: return CachedResults(query, filter) @@ -264,8 +267,11 @@ def merge_results(*results): return m def get_links(sr, sort, time): + return _get_links(sr._id, sort, time) + +def _get_links(sr_id, sort, time): """General link query for a subreddit.""" - q = Link._query(Link.c.sr_id == sr._id, + q = Link._query(Link.c.sr_id == sr_id, sort = db_sort(sort), data = True) @@ -403,9 +409,9 @@ def get_domain_links(domain, sort, time): return make_results(q) -def user_query(kind, user, sort, time): +def user_query(kind, user_id, sort, time): """General profile-page query.""" - q = kind._query(kind.c.author_id == user._id, + q = kind._query(kind.c.author_id == user_id, kind.c._spam == (True, False), sort = db_sort(sort)) if time != 'all': @@ -418,16 +424,25 @@ def get_all_comments(): return make_results(q) def get_sr_comments(sr): + return _get_sr_comments(sr._id) + +def _get_sr_comments(sr_id): """the subreddit /r/foo/comments page""" - q = Comment._query(Comment.c.sr_id == sr._id, + q = Comment._query(Comment.c.sr_id == sr_id, sort = desc('_date')) return make_results(q) +def _get_comments(user_id, sort, time): + return user_query(Comment, user_id, sort, time) + def get_comments(user, sort, time): - return user_query(Comment, user, sort, time) + return _get_comments(user._id, sort, time) + +def _get_submitted(user_id, sort, time): + return user_query(Link, user_id, sort, time) def get_submitted(user, sort, time): - return user_query(Link, user, sort, time) + return _get_submitted(user._id, sort, time) def get_overview(user, sort, time): return merge_results(get_comments(user, sort, time), @@ -574,32 +589,49 @@ def new_link(link): def new_comment(comment, inbox_rels): author = Account._byID(comment.author_id) - job = [get_comments(author, 'new', 'all')] + job = [get_comments(author, 'new', 'all'), + get_comments(author, 'top', 'all'), + get_comments(author, 'controversial', 'all')] + + sr = Subreddit._byID(comment.sr_id) + if comment._deleted: + job_key = "delete_items" + job.append(get_sr_comments(sr)) job.append(get_all_comments()) - add_queries(job, delete_items = comment) else: - sr = Subreddit._byID(comment.sr_id) + job_key = "insert_items" if comment._spam: job.append(get_spam_comments(sr)) - add_queries(job, insert_items = comment) amqp.add_item('new_comment', comment._fullname) if not g.amqp_host: add_comment_tree([comment]) + job_dict = { job_key: comment } + add_queries(job, **job_dict) + # note that get_all_comments() is updated by the amqp process # r2.lib.db.queries.run_new_comments (to minimise lock contention) if inbox_rels: for inbox_rel in tup(inbox_rels): inbox_owner = inbox_rel._thing1 + job_dict = { job_key: inbox_rel } if inbox_rel._name == "inbox": - add_queries([get_inbox_comments(inbox_owner)], - insert_items = inbox_rel) + inbox_func = get_inbox_comments + unread_func = get_unread_comments + elif inbox_rel._name == "selfreply": + inbox_func = get_inbox_selfreply + unread_func = get_unread_selfreply + else: + raise ValueError("wtf is " + inbox_rel._name) + + add_queries([inbox_func(inbox_owner)], **job_dict) + + if comment._deleted: + add_queries([unread_func(inbox_owner)], **job_dict) else: - add_queries([get_inbox_selfreply(inbox_owner)], - insert_items = inbox_rel) - set_unread(comment, inbox_owner, True) + set_unread(comment, inbox_owner, True) def new_subreddit(sr): @@ -611,12 +643,13 @@ def new_vote(vote, foreground=False): user = vote._thing1 item = vote._thing2 - if not isinstance(item, Link): + if not isinstance(item, (Link, Comment)): return if vote.valid_thing and not item._spam and not item._deleted: sr = item.subreddit_slow results = [] + author = Account._byID(item.author_id) for sort in ('hot', 'top', 'controversial', 'new'): if isinstance(item, Link): @@ -624,33 +657,35 @@ def new_vote(vote, foreground=False): if isinstance(item, Comment): results.append(get_comments(author, sort, 'all')) + if isinstance(item, Link): + # don't do 'new', because that was done by new_link, and + # the time-filtered versions of top/controversial will be + # done by mr_top + results.extend([get_links(sr, 'hot', 'all'), + get_links(sr, 'top', 'all'), + get_links(sr, 'controversial', 'all'), + ]) - # don't do 'new', because that was done by new_link, and the - # time-filtered versions of top/controversial will be done by - # mr_top - results.extend([get_links(sr, 'hot', 'all'), - get_links(sr, 'top', 'all'), - get_links(sr, 'controversial', 'all'), - ]) - - for domain in utils.UrlParser(item.url).domain_permutations(): - for sort in ("hot", "top", "controversial"): - results.append(get_domain_links(domain, sort, "all")) + for domain in utils.UrlParser(item.url).domain_permutations(): + for sort in ("hot", "top", "controversial"): + results.append(get_domain_links(domain, sort, "all")) add_queries(results, insert_items = item, foreground=foreground) vote._fast_query_timestamp_touch(user) - #must update both because we don't know if it's a changed vote - if vote._name == '1': - add_queries([get_liked(user)], insert_items = vote) - add_queries([get_disliked(user)], delete_items = vote) - elif vote._name == '-1': - add_queries([get_liked(user)], delete_items = vote) - add_queries([get_disliked(user)], insert_items = vote) - else: - add_queries([get_liked(user)], delete_items = vote) - add_queries([get_disliked(user)], delete_items = vote) + if isinstance(item, Link): + # must update both because we don't know if it's a changed + # vote + if vote._name == '1': + add_queries([get_liked(user)], insert_items = vote, foreground = foreground) + add_queries([get_disliked(user)], delete_items = vote, foreground = foreground) + elif vote._name == '-1': + add_queries([get_liked(user)], delete_items = vote, foreground = foreground) + add_queries([get_disliked(user)], insert_items = vote, foreground = foreground) + else: + add_queries([get_liked(user)], delete_items = vote, foreground = foreground) + add_queries([get_disliked(user)], delete_items = vote, foreground = foreground) def new_message(message, inbox_rels): from r2.lib.comment_tree import add_message @@ -712,7 +747,7 @@ def changed(things, boost_only=False): message_id = thing._fullname, delivery_mode = amqp.DELIVERY_TRANSIENT) -def _by_srid(things): +def _by_srid(things,srs=True): """Takes a list of things and returns them in a dict separated by sr_id, in addition to the looked-up subreddits""" ret = {} @@ -721,9 +756,11 @@ def _by_srid(things): if getattr(thing, 'sr_id', None) is not None: ret.setdefault(thing.sr_id, []).append(thing) - srs = Subreddit._byID(ret.keys(), return_dict=True) if ret else {} - - return ret, srs + if srs: + _srs = Subreddit._byID(ret.keys(), return_dict=True) if ret else {} + return ret, _srs + else: + return ret def ban(things): del_or_ban(things, "ban") @@ -876,21 +913,24 @@ def add_comment_tree(comments): # amqp queue processing functions -def run_new_comments(): +def run_new_comments(limit=1000): """Add new incoming comments to the /comments page""" # this is done as a queue because otherwise the contention for the # lock on the query would be very high - def _run_new_comment(msg): - fname = msg.body - comment = Comment._by_fullname(fname,data=True) - sr = Subreddit._byID(comment.sr_id) + def _run_new_comments(msgs, chan): + fnames = [msg.body for msg in msgs] - add_queries([get_all_comments(), - get_sr_comments(sr)], - insert_items = [comment]) + comments = Comment._by_fullname(fnames, data=True, return_dict=False) + add_queries([get_all_comments()], + insert_items=comments) - amqp.consume_items('newcomments_q', _run_new_comment) + bysrid = _by_srid(comments, False) + for srid, sr_comments in bysrid.iteritems(): + add_queries([_get_sr_comments(srid)], + insert_items=sr_comments) + + amqp.handle_items('newcomments_q', _run_new_comments, limit=limit) def run_commentstree(limit=100): """Add new incoming comments to their respective comments trees""" @@ -904,6 +944,9 @@ def _run_commentstree(msgs, chan): amqp.handle_items('commentstree_q', _run_commentstree, limit = limit) +vote_link_q = 'vote_link_q' +vote_comment_q = 'vote_comment_q' + def queue_vote(user, thing, dir, ip, organic = False, cheater = False, store = True): # set the vote in memcached so the UI gets updated immediately @@ -912,7 +955,12 @@ def queue_vote(user, thing, dir, ip, organic = False, # queue the vote to be stored unless told not to if store: if g.amqp_host: - amqp.add_item('register_vote_q', + if isinstance(thing, Link): + qname = vote_link_q + elif isinstance(thing, Comment): + qname = vote_comment_q + + amqp.add_item(qname, pickle.dumps((user._id, thing._fullname, dir, ip, organic, cheater))) else: @@ -924,26 +972,46 @@ def prequeued_vote_key(user, item): def get_likes(user, items): if not user or not items: return {} - keys = {} + res = {} - keys = dict((prequeued_vote_key(user, item), (user,item)) - for item in items) - r = g.cache.get_multi(keys.keys()) - # populate the result set based on what we fetched from the cache first - for k, v in r.iteritems(): - res[keys[k]] = v + # check the prequeued_vote_keys + keys = {} + for item in items: + if (user, item) in res: + continue + + key = prequeued_vote_key(user, item) + keys[key] = (user, item) + if keys: + r = g.cache.get_multi(keys.keys()) + for key, v in r.iteritems(): + res[keys[key]] = (True if v == '1' + else False if v == '-1' + else None) + + # avoid requesting items that they can't have voted on (we're + # still using the tdb_sql Thing API for this). TODO: we should do + # this before the prequeued_vote_keys lookup, so that in extreme + # cases we can avoid hitting the cache for them at all, but in the + # current state that precludes brand new comments from appearing + # to have been voted on + for item in items: + if (user, item) in res: + continue + + # we can only vote on links and comments + if isinstance(item, (Link, Comment)): + rel = Vote.rel(user.__class__, item.__class__) + if rel._can_skip_lookup(user, item): + res[(user, item)] = None + else: + res[(user, item)] = None - # now hit the vote db with the remainder + # now hit Cassandra with the remainder likes = Vote.likes(user, [i for i in items if (user, i) not in res]) - for k, v in likes.iteritems(): - res[k] = v._name - - # lastly, translate into boolean: - for k in res.keys(): - res[k] = (True if res[k] == '1' - else False if res[k] == '-1' else None) + res.update(likes) return res @@ -956,19 +1024,10 @@ def handle_vote(user, thing, dir, ip, organic, cheater=False, foreground=False): g.log.error("duplicate vote for: %s" % str((user, thing, dir))) return - # keep track of upvotes in the hard cache by subreddit - #sr_id = getattr(thing, "sr_id", None) - #if (sr_id and dir > 0 and getattr(thing, "author_id", None) != user._id - # and v.valid_thing): - # now = datetime.now(g.tz).strftime("%Y/%m/%d") - # g.hardcache.add("subreddit_vote-%s_%s_%s" % (now, sr_id, user._id), - # sr_id, time = 86400 * 7) # 1 week for now - if isinstance(thing, Link): new_vote(v, foreground=foreground) #update the modified flags - set_last_modified(user, 'liked') if user._id == thing.author_id: set_last_modified(user, 'overview') set_last_modified(user, 'submitted') @@ -977,8 +1036,10 @@ def handle_vote(user, thing, dir, ip, organic, cheater=False, foreground=False): #update sup listings if dir: + set_last_modified(user, 'liked') sup.add_update(user, 'liked') elif dir is False: + set_last_modified(user, 'disliked') sup.add_update(user, 'disliked') elif isinstance(thing, Comment): @@ -990,7 +1051,7 @@ def handle_vote(user, thing, dir, ip, organic, cheater=False, foreground=False): sup.add_update(user, 'commented') -def process_votes_single(**kw): +def process_votes_single(qname, limit=0): # limit is taken but ignored for backwards compatibility def _handle_vote(msg): @@ -1008,11 +1069,11 @@ def _handle_vote(msg): if isinstance(votee, (Link, Comment)): print (voter, votee, dir, ip, organic, cheater) handle_vote(voter, votee, dir, ip, organic, - cheater = cheater, foreground=False) + cheater = cheater, foreground=True) - amqp.consume_items('register_vote_q', _handle_vote, verbose = False) + amqp.consume_items(qname, _handle_vote, verbose = False) -def process_votes_multi(limit=100): +def process_votes_multi(qname, limit=100): # limit is taken but ignored for backwards compatibility def _handle_vote(msgs, chan): comments = [] @@ -1042,19 +1103,10 @@ def _handle_vote(msgs, chan): update_comment_votes(comments) - amqp.handle_items('register_vote_q', _handle_vote, limit = limit) + amqp.handle_items(qname, _handle_vote, limit = limit) process_votes = process_votes_single -def process_comment_sorts(limit=500): - def _handle_sort(msgs, chan): - cids = list(set(int(msg.body) for msg in msgs)) - comments = Comment._byID(cids, data = True, return_dict = False) - print comments - update_comment_votes(comments) - - amqp.handle_items('commentsort_q', _handle_sort, limit = limit) - try: from r2admin.lib.admin_queries import * except ImportError: diff --git a/r2/r2/lib/db/tdb_cassandra.py b/r2/r2/lib/db/tdb_cassandra.py index f70f1fa527..2a02a8233e 100644 --- a/r2/r2/lib/db/tdb_cassandra.py +++ b/r2/r2/lib/db/tdb_cassandra.py @@ -24,9 +24,9 @@ from pylons import g -import pycassa -import cassandra.ttypes -from cassandra.ttypes import ConsistencyLevel +from pycassa import ColumnFamily +from pycassa.cassandra.ttypes import ConsistencyLevel, NotFoundException +from pycassa.system_manager import SystemManager, UTF8_TYPE from r2.lib.utils import tup, Storage from r2.lib.db.sorts import epoch_seconds @@ -36,20 +36,21 @@ cassandra = g.cassandra thing_cache = g.thing_cache +seeds = g.cassandra_seeds keyspace = 'reddit' disallow_db_writes = g.disallow_db_writes tz = g.tz +log = g.log read_consistency_level = g.cassandra_rcl write_consistency_level = g.cassandra_wcl - -# descriptions of the CFs available on boot. -boot_cfs = cassandra.describe_keyspace(keyspace) +debug = g.debug +make_lock = g.make_lock +db_create_tables = g.db_create_tables thing_types = {} # The available consistency levels -CL = Storage(ZERO = ConsistencyLevel.ZERO, - ANY = ConsistencyLevel.ANY, +CL = Storage(ANY = ConsistencyLevel.ANY, ONE = ConsistencyLevel.ONE, QUORUM = ConsistencyLevel.QUORUM, ALL = ConsistencyLevel.ALL) @@ -77,21 +78,29 @@ class TdbException(CassandraException): """Exceptions caused by bugs in our callers or subclasses""" pass -class NotFound(CassandraException): +class NotFound(CassandraException, NotFoundException): """Someone asked us for an ID that isn't stored in the DB at all. This is probably an end-user's fault.""" pass -def will_write(): - if disallow_db_writes: - raise CassandraException("Not so fast! DB writes have been disabled") +def will_write(fn): + """Decorator to indicate that a given function intends to write + out to Cassandra""" + def _fn(*a, **kw): + if disallow_db_writes: + raise CassandraException("Not so fast! DB writes have been disabled") + return fn(*a, **kw) + return _fn + +def get_manager(): + # n.b. does not retry against multiple servers + server = seeds[0] + return SystemManager(server) class ThingMeta(type): def __init__(cls, name, bases, dct): type.__init__(cls, name, bases, dct) - global boot_cfs - if cls._use_db: if cls._type_prefix is None: # default to the class name @@ -102,33 +111,39 @@ def __init__(cls, name, bases, dct): % (cls._type_prefix, name)) if cls._type_prefix in thing_types: - raise InvariantException("Redefining type #%s?" % (cls._type_prefix)) + raise InvariantException("Redefining type %r?" % (cls._type_prefix)) + # if we weren't given a specific _cf_name, we can use the + # classes's name cf_name = cls._cf_name or name - # make sure the CF for this type exists, or refuse to - # start - if cf_name not in boot_cfs: - # do another lookup in case both this class and the CF - # were created after boot (this may have the effect of - # doubling the connection load on the seed node(s) if - # someone rolls a patch without first creating the - # appropriate CFs if that drives reddit into a restart - # loop; if that happens often just remove the next two - # lines) - boot_cfs = cassandra.describe_keyspace(keyspace) - if name not in boot_cfs: - raise ConfigurationException("ColumnFamily %r does not exist" % (name,)) - thing_types[cls._type_prefix] = cls cls._read_consistency_level = read_consistency_level cls._write_consistency_level = write_consistency_level - cls._cf = pycassa.ColumnFamily(cassandra, keyspace, - cf_name, - read_consistency_level = read_consistency_level, - write_consistency_level = write_consistency_level) + try: + cls._cf = ColumnFamily(cassandra, + cf_name, + read_consistency_level = read_consistency_level, + write_consistency_level = write_consistency_level) + except NotFoundException: + if not db_create_tables: + raise + + manager = get_manager() + + log.warning("Creating Cassandra Column Family %s" % (cf_name,)) + with make_lock('cassandra_schema'): + manager.create_column_family(keyspace, cf_name, + comparator_type = cls._compare_with) + log.warning("Created Cassandra Column Family %s" % (cf_name,)) + + # try again to look it up + cls._cf = ColumnFamily(cassandra, + cf_name, + read_consistency_level = read_consistency_level, + write_consistency_level = write_consistency_level) cls._kind = name @@ -136,7 +151,7 @@ def __repr__(cls): return '' % cls.__name__ class ThingBase(object): - # base class for Things and Relation + # base class for Thing and Relation __metaclass__ = ThingMeta @@ -149,6 +164,11 @@ class ThingBase(object): _use_db = False + # the Cassandra column-comparator (internally orders column + # names). In real life you can't change this without some changes + # to tdb_cassandra to support other attr types + _compare_with = UTF8_TYPE + _value_type = None # if set, overrides all of the _props types # below. Used for Views. One of 'int', 'float', # 'bool', 'pickle', 'date', 'bytes', 'str' @@ -169,11 +189,38 @@ class ThingBase(object): # also have to set it in _bool_props _defaults = {} + # The default TTL in seconds to add to all columns. Note: if an + # entire object is expected to have a TTL, it should be considered + # immutable! (You don't want to write out an object with an author + # and date, then go update author or add a new column, then have + # the original columns expire. Then when you go to look it up, the + # inherent properties author and/or date will be gone and only the + # updated columns will be present.) This is an expected convention + # and is not enforced. + _ttl = None + + # A per-class dictionary of default TTLs that new columns of this + # class should have + _default_ttls = {} + + # A per-instance property defining the TTL of individual columns + # (that must also appear in self._dirties) + _column_ttls = {} + # a timestamp property that will automatically be added to newly # created Things (disable by setting to None) _timestamp_prop = None - def __init__(self, _id = None, _committed = False, **kw): + # a per-instance property indicating that this object was + # partially loaded: i.e. only some properties were requested from + # the DB + _partial = None + + # a per-instance property that specifies that the columns backing + # these attributes are to be removed on _commit() + _deletes = set() + + def __init__(self, _id = None, _committed = False, _partial = None, **kw): # things that have changed self._dirties = kw.copy() @@ -186,6 +233,11 @@ def __init__(self, _id = None, _committed = False, **kw): # whether this item has ever been created self._committed = _committed + self._partial = None if _partial is None else frozenset(_partial) + + self._deletes = set() + self._column_ttls = {} + # our row key self._id = _id @@ -193,33 +245,73 @@ def __init__(self, _id = None, _committed = False, **kw): raise TdbException("Cannot make instances of %r" % (self.__class__,)) @classmethod - def _byID(cls, ids): + def _byID(cls, ids, properties=None): ids, is_single = tup(ids, True) + if properties is not None: + asked_properties = frozenset(properties) + willask_properties = set(properties) + if not len(ids): if is_single: raise InvariantException("whastis?") - else: - return {} + return {} # all keys must be strings or directly convertable to strings assert all(isinstance(_id, basestring) and str(_id) for _id in ids) + def reject_bad_partials(cached, still_need): + # tell sgm that the match it found in the cache isn't good + # enough if it's a partial that doesn't include our + # properties. we still need to look those items up to get + # the properties that we're after + stillfind = set() + + for k, v in cached.iteritems(): + if properties is None: + if v._partial is not None: + # there's a partial in the cache but we're not + # looking for partials + stillfind.add(k) + elif v._partial is not None and not asked_properties.issubset(v._partial): + # we asked for a partial, and this is a partial, + # but it doesn't have all of the properties that + # we need + stillfind.add(k) + + # other callers in our request are now expecting + # to find the properties that were on that + # partial, so we'll have to preserve them + for prop in v._partial: + willask_properties.add(prop) + + for k in stillfind: + del cached[k] + still_need.add(k) + def lookup(l_ids): # TODO: if we get back max_column_count columns for a # given row, check a flag on the class as to whether to # refetch for more of them. This could be important with # large Views, for instance - rows = cls._cf.multiget(l_ids, column_count=max_column_count) + + if properties is None: + rows = cls._cf.multiget(l_ids, column_count=max_column_count) + else: + rows = cls._cf.multiget(l_ids, columns = willask_properties) l_ret = {} for t_id, row in rows.iteritems(): t = cls._from_serialized_columns(t_id, row) + if properties is not None: + # make sure that the item is marked as a _partial + t._partial = willask_properties l_ret[t._id] = t return l_ret - ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix()) + ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix(), + found_fn=reject_bad_partials) if is_single and not ret: raise NotFound("<%s %r>" % (cls.__name__, @@ -358,35 +450,42 @@ def _from_columns(cls, t_id, columns): @property def _dirty(self): - return len(self._dirties) or not self._committed - - def _commit(self): - will_write() + return len(self._dirties) or len(self._deletes) or not self._committed + @will_write + def _commit(self, write_consistency_level = None): if not self._dirty: return if self._id is None: raise TdbException("Can't commit %r without an ID" % (self,)) + if self._committed and self._ttl: + log.warning("Using a full-TTL object %r in a mutable fashion" + % (self,)) + if not self._committed: # if this has never been committed we should also consider # the _orig columns as dirty (but "less dirty" than the # _dirties) upd = self._orig.copy() + self._orig.clear() upd.update(self._dirties) self._dirties = upd - self._orig.clear() # Cassandra values are untyped byte arrays, so we need to - # serialize everything, filtering out anything that's been - # dirtied but doesn't actually differ from what's written out + # serialize everything while filtering out anything that's + # been dirtied but doesn't actually differ from what's already + # in the DB updates = dict((attr, self._serialize_column(attr, val)) for (attr, val) in self._dirties.iteritems() if (attr not in self._orig or val != self._orig[attr])) + # n.b. deleted columns are applied *after* the updates. our + # __setattr__/__delitem__ tries to make sure that this always + # works if not self._committed and self._timestamp_prop and self._timestamp_prop not in updates: # auto-create timestamps on classes that request them @@ -404,13 +503,29 @@ def _commit(self): updates[self._timestamp_prop] = s_now self._dirties[self._timestamp_prop] = now - if not updates: + if not updates and not self._deletes: return - self._cf.insert(self._id, updates) + # actually write out the changes to the CF + wcl = self._wcl(write_consistency_level) + with self._cf.batch(write_consistency_level = wcl) as b: + if updates: + for k, v in updates.iteritems(): + b.insert(self._id, + {k: v}, + ttl=self._column_ttls.get(k, self._ttl)) + if self._deletes: + b.remove(self._id, self._deletes) self._orig.update(self._dirties) + self._column_ttls.clear() self._dirties.clear() + for k in self._deletes: + try: + del self._orig[k] + except KeyError: + pass + self._deletes.clear() if not self._committed: self._on_create() @@ -424,6 +539,8 @@ def _revert(self): raise TdbException("Revert to what?") self._dirties.clear() + self._deletes.clear() + self._column_ttls.clear() def __getattr__(self, attr): if attr.startswith('_'): @@ -432,12 +549,16 @@ def __getattr__(self, attr): except KeyError: raise AttributeError, attr - if attr in self._dirties: + if attr in self._deletes: + raise AttributeError("%r has no %r because you deleted it", (self, attr)) + elif attr in self._dirties: return self._dirties[attr] elif attr in self._orig: return self._orig[attr] elif attr in self._defaults: return self._defaults[attr] + elif self._partial is not None and attr not in self._partial: + raise AttributeError("%r has no %r but you didn't request it" % (self, attr)) else: raise AttributeError('%r has no %r' % (self, attr)) @@ -448,12 +569,22 @@ def __setattr__(self, attr, val): if attr.startswith('_'): return object.__setattr__(self, attr, val) + try: + self._deletes.remove(attr) + except KeyError: + pass self._dirties[attr] = val + if attr in self._default_ttls: + self._column_ttls[attr] = self._default_ttls[attr] def __eq__(self, other): - return (self.__class__ == other.__class__ # yes equal, not a subclass - and self._id == other._id - and self._t == other._t) + if self.__class__ != other.__class__: + return False + + if self._partial or other._partial and self._partial != other._partial: + raise ValueError("Can't compare incompatible partials") + + return self._id == other._id and self._t == other._t def __ne__(self, other): return not (self == other) @@ -462,15 +593,23 @@ def __ne__(self, other): def _t(self): """Emulate the _t property from tdb_sql: a dictionary of all values that are or will be stored in the database, (not - including _defaults)""" + including _defaults or unrequested properties on + partials)""" ret = self._orig.copy() ret.update(self._dirties) + for k in self._deletes: + try: + del ret[k] + except KeyError: + pass return ret # allow the dictionary mutation syntax; it makes working some some - # keys a bit easier + # keys a bit easier. Go through our regular + # __getattr__/__setattr__ functions where all of the appropriate + # work is done def __getitem__(self, key): - return self.__getattr__(self, attr) + return self.__getattr__(key) def __setitem__(self, key, value): return self.__setattr__(key, value) @@ -479,8 +618,15 @@ def _get(self, key, default = None): try: return self.__getattr__(key) except AttributeError: + if self._partial is not None and key not in self._partial: + raise AttributeError("_get on unrequested key from partial") return default + def _set_ttl(self, key, ttl): + assert key in self._dirties + assert isinstance(ttl, (long, int)) + self._column_ttls[key] = ttl + def _on_create(self): """A hook executed on creation, good for creation of static Views. Subclasses should call their parents' hook(s) as @@ -502,15 +648,23 @@ def __repr__(self): # its error messages id_str = self._id comm_str = '' if self._committed else ' (uncommitted)' - return "<%s %r%s>" % (self.__class__.__name__, + part_str = '' if self._partial is None else ' (partial)' + return "<%s %r%s%s>" % (self.__class__.__name__, id_str, - comm_str) - - def __del__(self): - if not self._committed: - # normally we'd log this with g.log or something, but we - # can't guarantee what thread is destructing us - print "Warning: discarding uncomitted %r; this is usually a bug" % (self,) + comm_str, part_str) + + if debug: + # we only want this with g.debug because overriding __del__ + # can play hell with memory leaks + def __del__(self): + if not self._committed: + # normally we'd log this with g.log or something, + # but we can't guarantee that the thread + # destructing us has access to g + print "Warning: discarding uncomitted %r; this is usually a bug" % (self,) + elif self._dirty: + print ("Warning: discarding dirty %r; this is usually a bug (_dirties=%r, _deletes=%r)" + % (self,self._dirties,self._deletes)) class Thing(ThingBase): _timestamp_prop = 'date' @@ -521,34 +675,62 @@ class Relation(ThingBase): def __init__(self, thing1_id, thing2_id, **kw): # NB! When storing relations between postgres-backed Thing # objects, these IDs are actually ID36s - return ThingBase.__init__(self, - _id = '%s_%s' % (thing1_id, thing2_id), - thing1_id=thing1_id, thing2_id=thing2_id, - **kw) + ThingBase.__init__(self, + _id = self._rowkey(thing1_id, thing2_id), + **kw) + self._orig['thing1_id'] = thing1_id + self._orig['thing2_id'] = thing2_id + + @will_write + def _destroy(self, write_consistency_level = None): + # only implemented on relations right now, but at present + # there's no technical reason for this + self._cf.remove(self._id, + write_consistency_level = self._wcl(write_consistency_level)) + self._on_destroy() + thing_cache.delete(self._cache_key()) + + def _on_destroy(self): + """Called *after* the destruction of the Thing on the + destroyed Thing's mortal shell""" + # only implemented on relations right now, but at present + # there's no technical reason for this + pass @classmethod - def _fast_query(cls, thing1_ids, thing2_ids, **kw): + def _fast_query(cls, thing1_ids, thing2_ids, properties = None, **kw): """Find all of the relations of this class between all of the members of thing1_ids and thing2_ids""" thing1_ids, thing1s_is_single = tup(thing1_ids, True) thing2_ids, thing2s_is_single = tup(thing2_ids, True) + if not thing1_ids or not thing2_ids: + # nothing to permute + return {} + + if properties is not None: + properties = set(properties) + + # all relations must load these properties, even if + # unrequested + properties.add('thing1_id') + properties.add('thing2_id') + # permute all of the pairs - ids = set(('%s_%s' % (x, y)) + ids = set(cls._rowkey(x, y) for x in thing1_ids for y in thing2_ids) - rels = cls._byID(ids).values() + rels = cls._byID(ids, properties = properties).values() - # does anybody actually use us this way? if thing1s_is_single and thing2s_is_single: if rels: assert len(rels) == 1 return rels[0] else: - raise NotFound("<%s '%s_%s'>" % (cls.__name__, - thing1_ids[0], - thing2_ids[0])) + raise NotFound("<%s %r>" % (cls.__name__, + cls._rowkey(thing1_ids[0], + thing2_ids[0]))) return dict(((rel.thing1_id, rel.thing2_id), rel) for rel in rels) @@ -559,20 +741,28 @@ def _from_columns(cls, t_id, columns): # throw our toys on the floor if they don't have thing1_id and # thing2_id if not ('thing1_id' in columns and 'thing2_id' in columns - and t_id == ('%s_%s' % (columns['thing1_id'], columns['thing2_id']))): + and t_id == cls._rowkey(columns['thing1_id'], columns['thing2_id'])): raise InvariantException("Looked up %r with unmatched IDs (%r)" % (cls, t_id)) - r = cls(thing1_id=columns['thing1_id'], thing2_id=columns['thing2_id']) - r._orig = columns - assert r._id == t_id - r._committed = True - return r + # if modifying this, check ThingBase._from_columns and see if + # you should change it as well + thing1_id, thing2_id = columns['thing1_id'], columns['thing2_id'] + t = cls(thing1_id = thing1_id, thing2_id = thing2_id) + assert t._id == t_id + t._orig = columns + t._committed = True + return t + + @staticmethod + def _rowkey(thing1_id36, thing2_id36): + assert isinstance(thing1_id36, basestring) and isinstance(thing2_id36, basestring) + return '%s_%s' % (thing1_id36, thing2_id36) - def _commit(self): - assert self._id == '%s_%s' % (self.thing1_id, self.thing2_id) + def _commit(self, *a, **kw): + assert self._id == self._rowkey(self.thing1_id, self.thing2_id) - return ThingBase._commit(self) + return ThingBase._commit(self, *a, **kw) @classmethod def _rel(cls, thing1_cls, thing2_cls): @@ -586,18 +776,21 @@ def _datekey(cls, date): class Query(object): """A query across a CF. Note that while you can query rows from a - CF that has a RandomPartitioner, you won't get them in any sort - of order, which makes 'after' unreliable""" - def __init__(self, cls, after=None, limit=100, chunk_size=100, - _max_column_count = max_column_count): + CF that has a RandomPartitioner, you won't get them in any sort + of order""" + def __init__(self, cls, after=None, properties=None, limit=100, + chunk_size=100, _max_column_count = max_column_count): self.cls = cls self.after = after + self.properties = properties self.limit = limit self.chunk_size = chunk_size self.max_column_count = _max_column_count def __copy__(self): - return Query(self.cls, after=self.after, limit=self.limit, + return Query(self.cls, after=self.after, + properties = self.properties, + limit=self.limit, chunk_size=self.chunk_size, _max_column_count = self.max_column_count) copy = __copy__ @@ -611,10 +804,14 @@ def _dump(self): for col, val in row._t.iteritems(): print '\t%s: %r' % (col, val) + @will_write def _delete_all(self, write_consistency_level = None): + # uncomment to use on purpose raise InvariantException("Nice try, FBI") - will_write() + # TODO: this could use cf.truncate instead and be *way* + # faster, but it wouldn't flush the thing_cache at the same + # time that way q = self.copy() q.after = q.limit = None @@ -630,6 +827,8 @@ def _delete_all(self, write_consistency_level = None): for row in q: print row + + # n.b. we're not calling _on_destroy! q.cls._cf.remove(row._id, write_consistency_level = wcl) thing_cache.delete(q.cls._cache_key_id(row._id)) @@ -641,8 +840,13 @@ def __iter__(self): after = '' if self.after is None else self.after._id limit = self.limit - r = self.cls._cf.get_range(start=after, row_count=limit, - column_count = self.max_column_count) + if self.properties is None: + r = self.cls._cf.get_range(start=after, row_count=limit, + column_count = self.max_column_count) + else: + r = self.cls._cf.get_range(start=after, row_count=limit, + columns = self.properties) + for t_id, columns in r: if not columns: # a ghost row @@ -655,12 +859,14 @@ class View(ThingBase): # Views are Things like any other, but may have special key # characteristics + # these default to not having a timestamp column _timestamp_prop = None _value_type = 'str' def _values(self): """Retrieve the entire contents of the view""" + # TODO: at present this only grabs max_column_count columns return self._t @staticmethod @@ -672,29 +878,62 @@ def _gen_uuid(): return uuid1() @classmethod - def _set_values(cls, row_key, col_values, write_consistency_level = None): + @will_write + def _set_values(cls, row_key, col_values, + write_consistency_level = None, + ttl=None): """Set a set of column values in a row of a View without looking up the whole row first""" # col_values =:= dict(col_name -> col_value) - will_write() - updates = dict((col_name, cls._serialize_column(col_name, col_val)) for (col_name, col_val) in col_values.iteritems()) - # with some quick tweaks to pycassa we could have a version - # that takes multiple row-keys too, if that ever becomes a - # problem - cls._cf.insert(row_key, updates, write_consistency_level = cls._wcl(write_consistency_level)) + # if they didn't give us a TTL, use the default TTL for the + # class. This will be further overwritten below per-column + # based on the _default_ttls class dict. Note! There is no way + # to use this API to express that you don't want a TTL if + # there is a default set on either the row or the column + default_ttl = None if ttl is None else self._ttl + + with cls._cf.batch(write_consistency_level = cls._wcl(write_consistency_level)) as b: + # with some quick tweaks we could have a version that + # operates across multiple row keys, but this is not it + for k, v in updates.iteritems(): + ttl = cls._default_ttls.get(k, default_ttl) + b.insert(row_key, + {k: v}, + ttl = cls._default_ttls.get(k, default_ttl)) # can we be smarter here? thing_cache.delete(cls._cache_key_id(row_key)) + def __delitem__(self, key): + # only implemented on Views right now, but at present there's + # no technical reason for this + try: + del self._dirties[key] + except KeyError: + pass + try: + del self._column_ttls[key] + except KeyError: + pass + self._deletes.add(key) + +def schema_report(): + manager = get_manager() + print manager.describe_keyspace(keyspace) + def ring_report(): + # uses a silly algorithm to pick natural endpoints that requires N>=RF+1 + sizes = {} nodes = {} # token -> node - ring = cassandra.describe_ring(keyspace) + manager = get_manager() + + ring = manager.describe_ring(keyspace) ring.sort(key=lambda tr: long(tr.start_token)) for x, tr in enumerate(ring): diff --git a/r2/r2/lib/db/thing.py b/r2/r2/lib/db/thing.py index bed0f140e7..e3a51eea2a 100644 --- a/r2/r2/lib/db/thing.py +++ b/r2/r2/lib/db/thing.py @@ -144,9 +144,9 @@ def __getattr__(self, attr): cl = "???" if self._loaded: - nl = "it IS loaded." + nl = "it IS loaded" else: - nl = "it is NOT loaded." + nl = "it is NOT loaded" # The %d format is nicer since it has no "L" at the # end, but if we can't do that, fall back on %r. @@ -167,7 +167,14 @@ def __getattr__(self, attr): print "Some dumbass forgot a comma." essentials = essentials, - if attr in essentials: + deleted = object.__getattribute__(self, "_deleted") + + if deleted: + nl += " and IS deleted." + else: + nl += " and is NOT deleted." + + if attr in essentials and not deleted: log_text ("essentials-bandaid-reload", "%s not found; %s Forcing reload." % (descr, nl), "warning") @@ -331,7 +338,7 @@ def _fullname(self): #TODO error when something isn't found? @classmethod - def _byID(cls, ids, data=False, return_dict=True, extra_props=None): + def _byID(cls, ids, data=False, return_dict=True, extra_props=None, stale=False): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) @@ -345,7 +352,7 @@ def items_db(ids): return items - bases = sgm(cache, ids, items_db, prefix) + bases = sgm(cache, ids, items_db, prefix, stale=stale) #check to see if we found everything we asked for for i in ids: @@ -406,7 +413,7 @@ def _byID36(cls, id36s, return_dict = True, **kw): @classmethod def _by_fullname(cls, names, return_dict = True, - data=False, extra_props=None): + **kw): names, single = tup(names, True) table = {} @@ -431,8 +438,7 @@ def _by_fullname(cls, names, # lookup ids for each type identified = {} for real_type, thing_ids in table.iteritems(): - i = real_type._byID(thing_ids, data = data, - extra_props = extra_props) + i = real_type._byID(thing_ids, **kw) identified[real_type] = i # interleave types in original order of the name @@ -744,6 +750,26 @@ def _delete(self): def _fast_query_timestamp_touch(cls, thing1): thing_utils.set_last_modified_for_cls(thing1, cls._type_name) + @classmethod + def _can_skip_lookup(cls, thing1, thing2): + # we can't possibly have voted on things that were created + # after the last time we voted. for relations that have an + # invariant like this we can avoid doing these lookups as + # long as the relation takes responsibility for keeping + # the timestamp up-to-date + + last_done = thing_utils.get_last_modified_for_cls( + thing1, cls._type_name) + + if not last_done: + return False + + if thing2._date > last_done: + return True + + return False + + @classmethod def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False, timestamp_optimize = False): @@ -759,26 +785,6 @@ def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, name = tup(name) - def can_skip_lookup(t1, t2, name): - # we can't possibly have voted on things that were - # created after the last time we voted. for relations - # that have an invariant like this we can avoid doing - # these lookups as long as the relation takes - # responsibility for keeping the timestamp up-to-date - thing1 = thing1_dict[t1] - thing2 = thing2_dict[t2] - - last_done = thing_utils.get_last_modified_for_cls( - thing1, cls._type_name) - - if not last_done: - return False - - if thing2._date > last_done: - return True - - return False - # permute all of the pairs pairs = set((x, y, n) for x in thing1_ids @@ -792,7 +798,8 @@ def items_db(pairs): t2_ids = set() names = set() for t1, t2, name in pairs: - if timestamp_optimize and can_skip_lookup(t1, t2, name): + if timestamp_optimize and cls._can_skip_lookup(thing1_dict[t1], + thing2_dict[t2]): continue t1_ids.add(t1) t2_ids.add(t2) @@ -1049,9 +1056,9 @@ def load_things(rels, load_data=False): for rel in rels: t1_ids.add(rel._thing1_id) t2_ids.add(rel._thing2_id) - kind._type1._byID(t1_ids, load_data) + kind._type1._byID(t1_ids, data=load_data) if not kind._gay(): - t2_items = kind._type2._byID(t2_ids, load_data) + t2_items = kind._type2._byID(t2_ids, data=load_data) class Relations(Query): #params are thing1, thing2, name, date diff --git a/r2/r2/lib/emailer.py b/r2/r2/lib/emailer.py index 6b716d7d10..dae698400d 100644 --- a/r2/r2/lib/emailer.py +++ b/r2/r2/lib/emailer.py @@ -138,15 +138,19 @@ def send_queued_mail(test = False): session = smtplib.SMTP(g.smtp_server) def sendmail(email): try: + mimetext = email.to_MIMEText() + if mimetext is None: + print ("Got None mimetext for email from %r and to %r" + % (email.fr_addr, email.to_addr)) if test: - print email.to_MIMEText().as_string() + print mimetext.as_string() else: session.sendmail(email.fr_addr, email.to_addr, - email.to_MIMEText().as_string()) + mimetext.as_string()) email.set_sent(rejected = False) # exception happens only for local recipient that doesn't exist except (smtplib.SMTPRecipientsRefused, smtplib.SMTPSenderRefused, - UnicodeDecodeError): + UnicodeDecodeError, AttributeError): # handle error and print, but don't stall the rest of the queue print "Handled error sending mail (traceback to follow)" traceback.print_exc(file = sys.stdout) @@ -240,9 +244,9 @@ def finished_promo(thing): return _promo_email(thing, Email.Kind.FINISHED_PROMO) -def send_html_email(to_addr, from_addr, subject, html): +def send_html_email(to_addr, from_addr, subject, html, subtype="html"): from r2.lib.filters import _force_utf8 - msg = MIMEText(_force_utf8(html), "html") + msg = MIMEText(_force_utf8(html), subtype) msg["Subject"] = subject msg["From"] = from_addr msg["To"] = to_addr diff --git a/r2/r2/lib/filters.py b/r2/r2/lib/filters.py index 6c32cea543..a54b92771b 100644 --- a/r2/r2/lib/filters.py +++ b/r2/r2/lib/filters.py @@ -27,6 +27,7 @@ from xml.sax.handler import ContentHandler from lxml.sax import saxify import lxml.etree +from BeautifulSoup import BeautifulSoup from pylons import g, c @@ -160,7 +161,7 @@ def startElementNS(self, tagname, qname, attrs): } markdown_boring_tags = ('p', 'em', 'strong', 'br', 'ol', 'ul', 'hr', 'li', 'pre', 'code', 'blockquote', 'center', - 'tbody', 'thead', "tr", + 'tbody', 'thead', 'tr', 'sup', 'del', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',) for bt in markdown_boring_tags: markdown_ok_tags[bt] = () diff --git a/r2/r2/lib/indextank.py b/r2/r2/lib/indextank.py index 2eda569694..8152c6081e 100644 --- a/r2/r2/lib/indextank.py +++ b/r2/r2/lib/indextank.py @@ -26,11 +26,13 @@ from pylons import g, config import cPickle as pickle from time import sleep +from datetime import datetime from r2.models import * +from r2.lib.db.sorts import epoch_seconds from r2.lib import amqp from r2.lib.contrib import indextank_clientv1 -from r2.lib.contrib.indextank_clientv1 import HttpException as IndextankException +from r2.lib.contrib.indextank_clientv1 import HttpException as IndextankException, InvalidQuery as InvalidIndextankQuery from r2.lib.utils import in_chunks, progress, get_after, UrlParser from r2.lib.utils import domain, strordict_fullname @@ -78,7 +80,7 @@ def run(self, after=None, reverse=False, num=1000, _update=False): def _req_fs(self, sr_ids, field='sr_id'): if len(sr_ids) == 1: - return '+%s:%d' % (field, sr_ids[0]) + return '+%s:%s' % (field, sr_ids[0]) else: return '+(%s)' % ' OR '.join(('%s:%s' % (field, sr_id)) for sr_id in sr_ids) @@ -96,6 +98,8 @@ def _run(self, start=0, num=1000, _update=False): elif isinstance(self.sr, MultiReddit): q.append(self._req_fs( self.sr.sr_ids)) + elif isinstance(self.sr, DomainSR): + q.append('+site:%s' % (self.sr.domain)) elif self.sr == Friends and c.user_is_loggedin and c.user.friends: friend_ids = c.user.friends[:100] # we're not about to # look up more than 100 @@ -222,10 +226,14 @@ def inject(things, boost_only=False): if update_things: maps = maps_from_things(update_things, boost_only = boost_only) + + indexstart = epoch_seconds(datetime.now(g.tz)) + if update_things: inject_maps(maps, boost_only=boost_only) if delete_things: for thing in delete_things: delete_thing(thing) + return epoch_seconds(datetime.now(g.tz)) - indexstart def rebuild_index(after_id = None, estimate=10000000): cls = Link @@ -253,6 +261,8 @@ def run_changed(drain=False, limit=1000): IndexTank """ def _run_changed(msgs, chan): + start = datetime.now(g.tz) + changed = map(lambda x: strordict_fullname(x.body), msgs) boost = set() @@ -282,18 +292,23 @@ def _run_changed(msgs, chan): things = Thing._by_fullname(boost | add, data=True, return_dict=True) - print ("%d messages: %d docs, %d boosts (%d duplicates, %s remaining)" - % (len(changed), - len(add), - len(boost), - len(changed) - len(things), - msgs[-1].delivery_info.get('message_count', 'unknown'), - )) - + boost_time = add_time = 0.0 if boost: - inject([things[fname] for fname in boost], boost_only=True) + boost_time = inject([things[fname] for fname in boost], boost_only=True) if add: - inject([things[fname] for fname in add]) + add_time = inject([things[fname] for fname in add]) + + totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start) + + print ("%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)" + % (start, + len(changed), + len(add), add_time, + len(boost), boost_time, + totaltime, + len(changed) - len(things), + msgs[-1].delivery_info.get('message_count', 'unknown'), + )) amqp.handle_items('indextank_changes', _run_changed, limit=limit, drain=drain, verbose=False) diff --git a/r2/r2/lib/jsontemplates.py b/r2/r2/lib/jsontemplates.py index 4abde1ee94..d20e88950d 100644 --- a/r2/r2/lib/jsontemplates.py +++ b/r2/r2/lib/jsontemplates.py @@ -24,7 +24,7 @@ from mako.template import Template from r2.lib.filters import spaceCompress, safemarkdown import time, pytz -from pylons import c +from pylons import c, g def api_type(subtype = ''): return 'api-' + subtype if subtype else 'api' @@ -184,10 +184,18 @@ class SubredditJsonTemplate(ThingJsonTemplate): _data_attrs_ = ThingJsonTemplate.data_attrs(subscribers = "_ups", title = "title", url = "path", - over18 = "over_18", + over18 = "over_18", description = "description", display_name = "name") + def thing_attr(self, thing, attr): + # Don't reveal revenue information via /r/lounge's subscribers + if (attr == "_ups" and g.lounge_reddit + and thing.name == g.lounge_reddit): + return 0 + else: + return ThingJsonTemplate.thing_attr(self, thing, attr) + class AccountJsonTemplate(ThingJsonTemplate): _data_attrs_ = ThingJsonTemplate.data_attrs(name = "name", link_karma = "safe_karma", diff --git a/r2/r2/lib/lock.py b/r2/r2/lib/lock.py index c399e6e7a0..0b89234183 100644 --- a/r2/r2/lib/lock.py +++ b/r2/r2/lib/lock.py @@ -41,7 +41,7 @@ class MemcacheLock(object): attempt to grab a lock by 'adding' the lock name. If the response is True, we have the lock. If it's False, someone else has it.""" - def __init__(self, key, cache, time = 30, timeout = 30): + def __init__(self, key, cache, time = 30, timeout = 30, verbose=True): # get a thread-local set of locks that we own self.locks = locks.locks = getattr(locks, 'locks', set()) @@ -50,6 +50,7 @@ def __init__(self, key, cache, time = 30, timeout = 30): self.time = time self.timeout = timeout self.have_lock = False + self.verbose = verbose def __enter__(self): start = datetime.now() @@ -63,14 +64,17 @@ def __enter__(self): #try and fetch the lock, looping until it's available while not self.cache.add(self.key, my_info, time = self.time): if (datetime.now() - start).seconds > self.timeout: - info = self.cache.get(self.key) - if info: - info = "%s %s\n%s" % info + if self.verbose: + info = self.cache.get(self.key) + if info: + info = "%s %s\n%s" % info + else: + info = "(nonexistent)" + msg = ("\nSome jerk is hogging %s:\n%s" % + (self.key, info)) + msg += "^^^ that was the stack trace of the lock hog, not me." else: - info = "(nonexistent)" - msg = ("\nSome jerk is hogging %s:\n%s" % - (self.key, info)) - msg += "^^^ that was the stack trace of the lock hog, not me." + msg = "Timed out waiting for %s" % self.key raise TimeoutExpired(msg) sleep(.01) diff --git a/r2/r2/lib/manager/db_manager.py b/r2/r2/lib/manager/db_manager.py index b6514d8a7b..b270a19d45 100644 --- a/r2/r2/lib/manager/db_manager.py +++ b/r2/r2/lib/manager/db_manager.py @@ -27,14 +27,16 @@ logger = logging.getLogger('dm_manager') logger.addHandler(logging.StreamHandler()) -def get_engine(name, db_host='', db_user='', db_pass='', +def get_engine(name, db_host='', db_user='', db_pass='', db_port='5432', pool_size = 5, max_overflow = 5): + db_port = int(db_port) + host = db_host if db_host else '' if db_user: if db_pass: - host = "%s:%s@%s" % (db_user, db_pass, db_host) + host = "%s:%s@%s:%s" % (db_user, db_pass, db_host, db_port) else: - host = "%s@%s" % (db_user, db_host) + host = "%s@%s:%s" % (db_user, db_host,db_port) return sa.create_engine('postgres://%s/%s' % (host, name), strategy='threadlocal', pool_size = int(pool_size), diff --git a/r2/r2/lib/media.py b/r2/r2/lib/media.py index 8791e4b3ef..efc4fee161 100644 --- a/r2/r2/lib/media.py +++ b/r2/r2/lib/media.py @@ -93,6 +93,16 @@ def set_media(link, force = False): thumbnail = scraper.thumbnail() media_object = scraper.media_object() + if media_object: + # the scraper should be able to make a media embed out of the + # media object it just gave us. if not, null out the media object + # to protect downstream code + res = scraper.media_embed(**media_object) + + if not res: + print "%s made a bad media obj for link %s" % (scraper, link._id36) + media_object = None + if thumbnail: upload_thumb(link, thumbnail) diff --git a/r2/r2/lib/menus.py b/r2/r2/lib/menus.py index 8f9ceddbe9..c163a64484 100644 --- a/r2/r2/lib/menus.py +++ b/r2/r2/lib/menus.py @@ -101,7 +101,7 @@ def __getattr__(self, attr): ad_inq = _("inquire about advertising"), random = _('random'), iphone = _("iPhone app"), - gold = _('gold'), + gold = _('reddit gold'), #preferences options = _('options'), diff --git a/r2/r2/lib/migrate/migrate.py b/r2/r2/lib/migrate/migrate.py index 52eb7959ee..a806530384 100644 --- a/r2/r2/lib/migrate/migrate.py +++ b/r2/r2/lib/migrate/migrate.py @@ -171,8 +171,6 @@ def gen_keys(): for link in fetch_things2(l_q, verbosity): yield comments_key(link._id) yield last_modified_key(link, 'comments') - if not getattr(link, 'is_self', False) and hasattr(link, 'url'): - yield Link.by_url_key(link.url) a_q = Account._query(Account.c._spam == (True, False), sort=desc('_date'), @@ -224,52 +222,6 @@ def gen_keys(): print 'Done %d: %r' % (done, keys[-1]) populate(keys) -def add_byurl_prefix(): - """Run one before the byurl prefix is set, and once after (killing - it after it gets when it started the first time""" - - from datetime import datetime - from r2.models import Link - from r2.lib.filters import _force_utf8 - from pylons import g - from r2.lib.utils import fetch_things2 - from r2.lib.db.operators import desc - from r2.lib.utils import base_url - - now = datetime.now(g.tz) - print 'started at %s' % (now,) - - l_q = Link._query( - Link.c._date < now, - data=True, - sort=desc('_date')) - - # from link.py - def by_url_key(url, prefix=''): - s = _force_utf8(base_url(url.lower())) - return '%s%s' % (prefix, s) - - done = 0 - for links in fetch_things2(l_q, 1000, chunks=True): - done += len(links) - print 'Doing: %r, %s..%s' % (done, links[-1]._date, links[0]._date) - - # only links with actual URLs - links = filter(lambda link: (not getattr(link, 'is_self', False) - and getattr(link, 'url', '')), - links) - - # old key -> new key - translate = dict((by_url_key(link.url), - by_url_key(link.url, prefix='byurl_')) - for link in links) - - old = g.permacache.get_multi(translate.keys()) - new = dict((translate[old_key], value) - for (old_key, value) - in old.iteritems()) - g.permacache.set_multi(new) - # alter table bids DROP constraint bids_pkey; # alter table bids add column campaign integer; # update bids set campaign = 0; @@ -318,88 +270,95 @@ def promote_v2(): else: print "no campaign information: ", l +def port_cassavotes(): + from r2.models import Vote, Account, Link, Comment + from r2.models.vote import CassandraVote, CassandraLinkVote, CassandraCommentVote + from r2.lib.db.tdb_cassandra import CL + from r2.lib.utils import fetch_things2, to36, progress + + ts = [(Vote.rel(Account, Link), CassandraLinkVote), + (Vote.rel(Account, Comment), CassandraCommentVote)] + + dataattrs = set(['valid_user', 'valid_thing', 'ip', 'organic']) + + for prel, crel in ts: + vq = prel._query(sort=desc('_date'), + data=True, + eager_load=False) + vq = fetch_things2(vq) + vq = progress(vq, persec=True) + for v in vq: + t1 = to36(v._thing1_id) + t2 = to36(v._thing2_id) + cv = crel(thing1_id = t1, + thing2_id = t2, + date=v._date, + name=v._name) + for dkey, dval in v._t.iteritems(): + if dkey in dataattrs: + setattr(cv, dkey, dval) + + cv._commit(write_consistency_level=CL.ONE) + +def port_cassasaves(after_id=None, estimate=12489897): + from r2.models import SaveHide, CassandraSave + from r2.lib.db.operators import desc + from r2.lib.db.tdb_cassandra import CL + from r2.lib.utils import fetch_things2, to36, progress -def shorten_byurl_keys(): - """We changed by_url keys from a format like - byurl_google.com... - to: - byurl(1d5920f4b44b27a802bd77c4f0536f5a, google.com...) - so that they would fit in memcache's 251-char limit - """ + q = SaveHide._query( + SaveHide.c._name == 'save', + sort=desc('_date'), + data=False, + eager_load=False) - from datetime import datetime - from hashlib import md5 - from r2.models import Link - from r2.lib.filters import _force_utf8 - from pylons import g - from r2.lib.utils import fetch_things2, in_chunks + if after_id is not None: + q._after(SaveHide._byID(after_id)) + + for sh in progress(fetch_things2(q), estimate=estimate): + + csh = CassandraSave(thing1_id = to36(sh._thing1_id), + thing2_id = to36(sh._thing2_id), + date = sh._date) + csh._commit(write_consistency_level = CL.ONE) + +def port_cassaurls(after_id=None, estimate=15231317): + from r2.models import Link, LinksByUrl + from r2.lib.db import tdb_cassandra from r2.lib.db.operators import desc - from r2.lib.utils import base_url, progress - - # from link.py - def old_by_url_key(url): - prefix='byurl_' - s = _force_utf8(base_url(url.lower())) - return '%s%s' % (prefix, s) - def new_by_url_key(url): - maxlen = 250 - template = 'byurl(%s,%s)' - keyurl = _force_utf8(base_url(url.lower())) - hexdigest = md5(keyurl).hexdigest() - usable_len = maxlen-len(template)-len(hexdigest) - return template % (hexdigest, keyurl[:usable_len]) - - verbosity = 1000 - - l_q = Link._query( - Link.c._spam == (True, False), - data=True, - sort=desc('_date')) - for links in ( - in_chunks( - progress( - fetch_things2(l_q, verbosity), - key = lambda link: link._date, - verbosity=verbosity, - estimate=int(9.9e6), - persec=True, - ), - verbosity)): - # only links with actual URLs - links = filter(lambda link: (not getattr(link, 'is_self', False) - and getattr(link, 'url', '')), - links) - - # old key -> new key - translate = dict((old_by_url_key(link.url), - new_by_url_key(link.url)) - for link in links) - - old = g.permacache.get_multi(translate.keys()) - new = dict((translate[old_key], value) - for (old_key, value) - in old.iteritems()) - g.permacache.set_multi(new) - -def prime_url_cache(f, verbosity = 10000): - import gzip, time - from pylons import g - handle = gzip.open(f, 'rb') - counter = 0 - start_time = time.time() - for line in handle: - try: - tid, key, url, kind = line.split('|') - tid = int(tid) - if url.lower() != "self": - key = Link.by_url_key_new(url) - link_ids = g.urlcache.get(key) or [] - if tid not in link_ids: - link_ids.append(tid) - g.urlcache.set(key, link_ids) - except ValueError: - print "FAIL: %s" % line - counter += 1 - if counter % verbosity == 0: - print "%6d: %s" % (counter, line) - print "--> doing %5.2f / s" % (float(counter) / (time.time() - start_time)) + from r2.lib.db.tdb_cassandra import CL + from r2.lib.utils import fetch_things2, in_chunks, progress + + q = Link._query(Link.c._spam == (True, False), + sort=desc('_date'), data=True) + if after_id: + q._after(Link._byID(after_id,data=True)) + q = fetch_things2(q, chunk_size=500) + q = progress(q, estimate=estimate) + q = (l for l in q + if getattr(l, 'url', 'self') != 'self' + and not getattr(l, 'is_self', False)) + chunks = in_chunks(q, 500) + + for chunk in chunks: + with LinksByUrl._cf.batch(write_consistency_level = CL.ONE) as b: + for l in chunk: + k = LinksByUrl._key_from_url(l.url) + if k: + b.insert(k, {l._id36: l._id36}) + +def port_cassahides(): + from r2.models import SaveHide, CassandraHide + from r2.lib.db.tdb_cassandra import CL + from r2.lib.db.operators import desc + from r2.lib.utils import fetch_things2, timeago, progress + + q = SaveHide._query(SaveHide.c._date > timeago('1 week'), + SaveHide.c._name == 'hide', + sort=desc('_date')) + q = fetch_things2(q) + q = progress(q, estimate=1953374) + + for sh in q: + CassandraHide._hide(sh._thing1, sh._thing2, + write_consistency_level=CL.ONE) diff --git a/r2/r2/lib/migrate/mr_permacache.py b/r2/r2/lib/migrate/mr_permacache.py index 92f3b702f8..1c03a6f3ed 100644 --- a/r2/r2/lib/migrate/mr_permacache.py +++ b/r2/r2/lib/migrate/mr_permacache.py @@ -50,7 +50,7 @@ pv reddit_linkvote.dump | ./mr_permacache "linkvote_listings()" | sort -T. -S200m > linkvotes.listings #savehide -psql -F"\t" -A -t -d newreddit -U ri -h $VOTEDBHOST \ +psql -F"\t" -A -t -d newreddit -U ri -h $SAVEHIDEDBHOST \ -c "\\copy (select r.rel_id, 'savehide', r.thing1_id, r.thing2_id, r.name, extract(epoch from r.date) from reddit_rel_savehide r) to 'reddit_savehide.dump'" @@ -65,9 +65,10 @@ """ -import os +import os, os.path, errno import sys import itertools +from hashlib import md5 from r2.lib import mr_tools from r2.lib.mr_tools import dataspec_m_thing, dataspec_m_rel, join_things @@ -77,7 +78,7 @@ from r2.models import * from r2.lib.db.sorts import epoch_seconds, score, controversy, _hot -from r2.lib.utils import fetch_things2, in_chunks, progress, UniqueIterator +from r2.lib.utils import fetch_things2, in_chunks, progress, UniqueIterator, tup from r2.lib import comment_tree from r2.lib.db import queries @@ -192,9 +193,39 @@ def store_keys(key, maxes): def top1k_writefiles(dirname): """Divide up the top 1k of each key into its own file to make - restarting after a failure much easier""" + restarting after a failure much easier. Pairs with + write_permacache_from_dir""" + def hashdir(name, levels = [3]): + # levels is a list of how long each stage if the hashdirname + # should be. So [2,2] would make dirs like + # 'ab/cd/thelisting.txt' (and this function would just return + # the string 'ab/cd', so that you have the dirname that you + # can create before os.path.joining to the filename) + h = md5(name).hexdigest() + + last = 0 + dirs = [] + for l in levels: + dirs.append(h[last:last+l]) + last += l + + return os.path.join(*dirs) + def post(key, maxes): - with open(os.path.join(dirname, key), 'w') as f: + # we're taking a hash like 12345678901234567890123456789012 + # and making a directory name two deep out of the first half + # of the characters. We may want to tweak this as the number + # of listings + + hd = os.path.join(dirname, hashdir(key)) + try: + os.makedirs(hd) + except OSError as e: + if e.errno != errno.EEXIST: + raise + filename = os.path.join(hd, key) + + with open(filename, 'w') as f: for item in maxes: f.write('%s\t' % key) f.write('\t'.join(item)) @@ -209,15 +240,35 @@ def top1k_writepermacache(fd = sys.stdin): fd = fd) def write_permacache_from_dir(dirname): - for fname in progress(os.listdir(dirname), persec=True): + # we want the whole list so that we can display accurate progress + # information. If we're operating on more than tens of millions of + # files, we should either bail out or tweak this to not need the + # whole list at once + allfiles = [] + for root, dirs, files in os.walk(dirname): + for f in files: + allfiles.append(os.path.join(root, f)) + + for fname in progress(allfiles, persec=True): try: - fpath = os.path.join(dirname, fname) - write_permacache_from_file(fpath) - os.unlink(fpath) + write_permacache_from_file(fname) + os.unlink(fname) except: mr_tools.status('failed on %r' % fname) raise + mr_tools.status('Removing empty directories') + for root, dirs, files in os.walk(dirname, topdown=False): + for d in dirs: + dname = os.path.join(root, d) + try: + os.rmdir(dname) + except OSError as e: + if e.errno == errno.ENOTEMPTY: + mr_tools.status('%s not empty' % (dname,)) + else: + raise + def write_permacache_from_file(fname): with open(fname) as fd: top1k_writepermacache(fd = fd) diff --git a/r2/r2/lib/migrate/mr_urls.py b/r2/r2/lib/migrate/mr_urls.py deleted file mode 100644 index d6de69ff2c..0000000000 --- a/r2/r2/lib/migrate/mr_urls.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -export LINKDBHOST=prec01 -export USER=ri -export INI=production.ini -cd ~/reddit/r2 -time psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \ - -c "\\copy (select t.thing_id, 'thing', 'link', - t.ups, t.downs, t.deleted, t.spam, extract(epoch from t.date) - from reddit_thing_link t - where not t.spam and not t.deleted - ) - to '/scratch/reddit_thing_link.dump'" -time psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \ - -c "\\copy (select d.thing_id, 'data', 'link', - d.key, d.value - from reddit_data_link d - where d.key = 'url' ) to '/scratch/reddit_data_link.dump'" -cat /scratch/reddit_data_link.dump /scratch/reddit_thing_link.dump | sort -T. -S200m | paster --plugin=r2 run $INI r2/lib/migrate/mr_urls.py -c "join_links()" > /scratch/links.joined -cat /scratch/links.joined | paster --plugin=r2 run $INI r2/lib/migrate/mr_urls.py -c "time_listings()" | sort -T. -S200m | paster --plugin=r2 run $INI r2/lib/migrate/mr_urls.py -c "write_permacache()" -""" - -import sys -from pylons import g - -from r2.models import Account, Subreddit, Link -from r2.lib import mr_tools - -def join_links(): - mr_tools.join_things(('url',)) - -def listings(): - @mr_tools.dataspec_m_thing(("url", str),) - def process(link): - if link.url: - yield (Link.by_url_key_new(link.url), link.timestamp, - link.thing_id) - - mr_tools.mr_map(process) - - -def store_keys(key, maxes): - if key.startswith('byurl'): - r = set(g.urlcache_new.get(key) or []) - new = set(int(x[-1]) for x in maxes) - r.update(new) - g.urlcache_new.set(key, list(sorted(r))) - -def write_permacache(fd = sys.stdin): - mr_tools.mr_reduce_max_per_key(lambda x: map(float, x[:-1]), num=1000, - post=store_keys, - fd = fd) diff --git a/r2/r2/lib/mr_account.py b/r2/r2/lib/mr_account.py index 6e7826a83a..8546d36418 100644 --- a/r2/r2/lib/mr_account.py +++ b/r2/r2/lib/mr_account.py @@ -47,7 +47,7 @@ """ import sys -from r2.models import Account, Subreddit, Link, Comment +from r2.models import Account, Subreddit, Link, Comment, NotFound from r2.lib.db.sorts import epoch_seconds, score, controversy, _hot from r2.lib.db import queries from r2.lib import mr_tools @@ -59,36 +59,6 @@ def join_links(): mr_tools.join_things(('author_id',)) - -def join_authors(): - """A reducer that joins thing table dumps and data table dumps""" - def process(thing_id, vals): - data = {} - authors = [] - gold = None - - for val in vals: - if ('comment' in val) or ("link" in val): - authors.append(mr_tools.format_dataspec(val, - ['data_type', # e.g. 'data' - 'thing_type', # e.g. 'link' - 'key', # e.g. 'sr_id' - 'tid' - ])) - elif 'account' in val: - gold = mr_tools.format_dataspec(val, - ['data_type', # e.g. 'data' - 'thing_type', # e.g. 'link' - 'key', # e.g. 'sr_id' - 'value']) - - if gold is not None: - for author in authors: - yield (author.tid, author.data_type, author.thing_type, - author.key, thing_id) - - mr_tools.mr_reduce(process) - def year_listings(): """ With an 'all' dump, generate the top and controversial per user per year @@ -165,14 +135,14 @@ def store_keys(key, maxes): acc_str, sort, time, account_id = key.split('-') account_id = int(account_id) - fn = queries.get_submitted if key.startswith('link-') else queries.get_comments + fn = queries._get_submitted if key.startswith('link-') else queries._get_comments - q = fn(Account._byID(account_id), sort, time) + q = fn(account_id, sort, time) if time == 'all': if sort == 'new': q._insert_tuples([(item[-1], float(item[0])) for item in maxes]) - else: + else: q._insert_tuples([tuple([item[-1]] + map(float, item[:-1])) for item in maxes]) else: diff --git a/r2/r2/lib/mr_tools.py b/r2/r2/lib/mr_tools.py deleted file mode 100755 index f944dedc56..0000000000 --- a/r2/r2/lib/mr_tools.py +++ /dev/null @@ -1,233 +0,0 @@ -import sys - -class LineReader(object): - """A simple class to read lines from a File (like stdin) that - supports pushing lines back into the buffer""" - def __init__(self, stream): - self.stream = stream - self.pushed_back = [] - - def readline(self): - if self.pushed_back: - return self.pushed_back.pop() - else: - return self.stream.readline() - - def push_back(self, line): - self.pushed_back.append(line) - -def in_chunks(it, size=25): - chunk = [] - it = iter(it) - try: - while True: - chunk.append(it.next()) - if len(chunk) >= size: - yield chunk - chunk = [] - except StopIteration: - if chunk: - yield chunk - -def valiter(key, lr, firstline): - line = firstline - while line: - linevals = line.strip('\n').split('\t') - readkey, vals = linevals[0], linevals[1:] - if readkey == key: - yield vals - line = lr.readline() - else: - lr.push_back(line) - line = None - -def keyiter(stream): - lr = LineReader(stream) - - line = lr.readline() - while line: - key = line.strip('\n').split('\t',1)[0] - - vi = valiter(key, lr, line) - yield key, vi - # read the rest of the valueiter before reading any more lines - try: - while vi.next(): - pass - except StopIteration: - pass - - line = lr.readline() - -def status(msg, **opts): - if opts: - msg = msg % opts - sys.stderr.write("%s\n" % msg) - -def emit(vals): - print '\t'.join(str(val) for val in vals) - -def emit_all(vals): - for val in vals: - emit(val) - -class Storage(dict): - def __getattr__(self, attr): - return self[attr] - -def format_dataspec(msg, specs): - # spec() =:= name | (name, fn) - # specs =:= [ spec() ] - ret = Storage() - for val, spec in zip(msg, specs): - if isinstance(spec, basestring): - name = spec - ret[name] = val - else: - name, fn = spec - ret[name] = fn(val) - return Storage(**ret) - -class dataspec_m(object): - def __init__(self, *specs): - self.specs = specs - - def __call__(self, fn): - specs = self.specs - def wrapped_fn(args): - return fn(format_dataspec(args, specs)) - return wrapped_fn - -class dataspec_r(object): - def __init__(self, *specs): - self.specs = specs - - def __call__(self, fn): - specs = self.specs - def wrapped_fn(key, msgs): - return fn(key, ( format_dataspec(msg, specs) - for msg in msgs )) - return wrapped_fn - -def mr_map(process, fd = sys.stdin): - for line in fd: - vals = line.strip('\n').split('\t') - for res in process(vals): - emit(res) - -def mr_reduce(process, fd = sys.stdin): - for key, vals in keyiter(fd): - for res in process(key, vals): - emit(res) - -def mr_foldl(process, init, emit = False, fd = sys.stdin): - acc = init - for key, vals in keyiter(fd): - acc = process(key, vals, acc) - - if emit: - emit(acc) - - return acc - -def mr_max(process, idx = 0, num = 10, emit = False, fd = sys.stdin): - """a reducer that, in the process of reduction, only returns the - top N results""" - maxes = [] - for key, vals in keyiter(fd): - for newvals in in_chunks(process(key, vals)): - for val in newvals: - if len(maxes) < num or newval[idx] > maxes[-1][idx]: - maxes.append(newval) - maxes.sort(reverse=True) - maxes = maxes[:num] - - if emit: - emit_all(maxes) - - return maxes - -def mr_reduce_max_per_key(sort_key, post = None, num = 10, fd = sys.stdin): - def process(key, vals): - maxes = [] - for val_chunk in in_chunks(vals, num): - maxes.extend(val_chunk) - maxes.sort(reverse=True, key=sort_key) - maxes = maxes[:num] - if post: - # if we were passed a "post" function, he takes - # responsibility for emitting - post(key, maxes) - else: - for item in maxes: - yield [key] + item - - return mr_reduce(process, fd = fd) - -def join_things(fields, deleted=False, spam=True): - """A reducer that joins thing table dumps and data table dumps""" - def process(thing_id, vals): - data = {} - thing = None - - for val in vals: - if val[0] == 'thing': - thing = format_dataspec(val, - ['data_type', # e.g. 'thing' - 'thing_type', # e.g. 'link' - 'ups', - 'downs', - 'deleted', - 'spam', - 'timestamp']) - elif val[0] == 'data': - val = format_dataspec(val, - ['data_type', # e.g. 'data' - 'thing_type', # e.g. 'link' - 'key', # e.g. 'sr_id' - 'value']) - if val.key in fields: - data[val.key] = val.value - - if ( - # silently ignore if we didn't see the 'thing' row - thing is not None - - # remove spam and deleted as appriopriate - and (deleted or thing.deleted == 'f') - and (spam or thing.spam == 'f') - - # and silently ignore items that don't have all of the - # data that we need - and all(field in data for field in fields)): - - yield ((thing_id, thing.thing_type, thing.ups, thing.downs, - thing.deleted, thing.spam, thing.timestamp) - + tuple(data[field] for field in fields)) - - mr_reduce(process) - -def dataspec_m_rel(*fields): - return dataspec_m(*((('rel_id', int), - 'rel_type', - ('thing1_id', int), - ('thing2_id', int), - 'name', - ('timestamp', float)) - + fields)) - -def dataspec_m_thing(*fields): - return dataspec_m(*((('thing_id', int), - 'thing_type', - ('ups', int), - ('downs', int), - ('deleted', lambda x: x == 't'), - ('spam', lambda x: x == 't'), - ('timestamp', float)) - + fields)) - -if __name__ == '__main__': - for key, vals in keyiter(sys.stdin): - print key, vals - for val in vals: - print '\t', val diff --git a/r2/r2/lib/mr_tools/__init__.py b/r2/r2/lib/mr_tools/__init__.py new file mode 100644 index 0000000000..680add2f2e --- /dev/null +++ b/r2/r2/lib/mr_tools/__init__.py @@ -0,0 +1,2 @@ +from r2.lib.mr_tools._mr_tools import * +from r2.lib.mr_tools.mr_tools import * diff --git a/r2/r2/lib/mr_tools/_mr_tools.pyx b/r2/r2/lib/mr_tools/_mr_tools.pyx new file mode 100644 index 0000000000..fb2b5bbcda --- /dev/null +++ b/r2/r2/lib/mr_tools/_mr_tools.pyx @@ -0,0 +1,192 @@ +import sys +from itertools import imap, groupby + +stdin = sys.stdin +stderr = sys.stderr + +class _Chunker(object): + __slots__ = ('_size', '_done', '_it') + + def __init__(self, it, long size=25): + self._it = iter(it) + self._size = range(size) + self._done = 0 + + def next(self): + if self._done: + raise StopIteration + + cdef list chunk = [] + + for x in self._size: + try: + chunk.append(next(self._it)) + except StopIteration: + if chunk: + self._done = 1 + return chunk + else: + raise + return chunk + +cdef class in_chunks(object): + cdef it + cdef int size + + def __init__(self, it, int size=25): + self.it = it + self.size = size + + def __iter__(self): + return _Chunker(self.it, self.size) + +cdef class Storage(dict): + def __getattr__(self, attr): + return self[attr] + +def valiter(grouper): + key, group = grouper + return key, imap(lambda x: x[1:], group) + +cpdef list _keyiter_splitter(str x): + x = x.strip('\n') + return x.split('\t') + +def keyiter(stream=stdin): + lines = imap(_keyiter_splitter, stream) + groups = groupby(lines, lambda x: x[0]) + return imap(valiter, groups) + +def emit(vals): + print '\t'.join(map(str, vals)) + +def emit_all(vals): + for val in vals: + emit(val) + +def status(msg, **opts): + if opts: + msg = msg % opts + stderr.write("%s\n" % msg) + +cpdef Storage format_dataspec(msg, specs): + # spec() =:= name | (name, fn) + # specs =:= [ spec() ] + cdef Storage ret = Storage() + for val, spec in zip(msg, specs): + if isinstance(spec, basestring): + # the spec is just a name + name = spec + ret[name] = val + else: + # the spec is a tuple of the name and the function to pass + # the string through to make the real value + name, fn = spec + ret[name] = fn(val) + return Storage(**ret) + +cdef class dataspec_m(object): + cdef specs + + def __init__(self, *specs): + self.specs = specs + + def __call__(self, fn): + specs = self.specs + def wrapped_fn_m(args): + return fn(format_dataspec(args, specs)) + return wrapped_fn_m + +cdef class dataspec_r(object): + cdef specs + + def __init__(self, *specs): + self.specs = specs + + def __call__(self, fn): + specs = self.specs + def wrapped_fn_r(key, msgs): + return fn(key, imap(lambda msg: format_dataspec(msg, specs), + msgs)) + return wrapped_fn_r + +cpdef mr_map(process, fd = stdin): + for line in fd: + vals = line.strip('\n').split('\t') + for res in process(vals): + emit(res) + +cpdef mr_reduce(process, fd = stdin): + for key, vals in keyiter(fd): + for res in process(key, vals): + emit(res) + +cpdef mr_foldl(process, init, emit = False, fd = stdin): + acc = init + for key, vals in keyiter(fd): + acc = process(key, vals, acc) + + if emit: + emit(acc) + + return acc + +cpdef mr_max(process, int idx = 0, int num = 10, emit = False, fd = stdin): + """a reducer that, in the process of reduction, only returns the + top N results""" + cdef list maxes = [] + for key, vals in keyiter(fd): + for newvals in in_chunks(process(key, vals)): + for val in newvals: + if len(maxes) < num or val[idx] > maxes[-1][idx]: + maxes.append(val) + maxes.sort(reverse=True) + maxes = maxes[:num] + + if emit: + emit_all(maxes) + + return maxes + +cpdef _sbool(str x): + return x == 't' + +def dataspec_m_rel(*fields): + return dataspec_m(*((('rel_id', int), + 'rel_type', + ('thing1_id', int), + ('thing2_id', int), + 'name', + ('timestamp', float)) + + fields)) + +def dataspec_m_thing(*fields): + return dataspec_m(*((('thing_id', int), + 'thing_type', + ('ups', int), + ('downs', int), + ('deleted', _sbool), + ('spam', _sbool), + ('timestamp', float)) + + fields)) + +def mr_reduce_max_per_key(sort_key, post = None, num = 10, fd = sys.stdin): + def process(key, vals): + cdef list maxes = [] + + # pull items out in chunks (of the total number that we want), + # sorting and slicing after each batch + for val_chunk in in_chunks(vals, num): + maxes.extend(val_chunk) + maxes.sort(reverse=True, key=sort_key) + maxes = maxes[:num] + + if post: + # if we were passed a "post" function, he takes + # responsibility for emitting + post(key, maxes) + return [] + return [ ([key] + item) + for item in maxes ] + + return mr_reduce(process, fd = fd) diff --git a/r2/r2/lib/mr_tools/mr_tools.py b/r2/r2/lib/mr_tools/mr_tools.py new file mode 100755 index 0000000000..1a0f58da95 --- /dev/null +++ b/r2/r2/lib/mr_tools/mr_tools.py @@ -0,0 +1,53 @@ +import sys +from r2.lib.mr_tools._mr_tools import mr_reduce, format_dataspec + +def join_things(fields, deleted=False, spam=True): + """A reducer that joins thing table dumps and data table dumps""" + def process(thing_id, vals): + data = {} + thing = None + + for val in vals: + if val[0] == 'thing': + thing = format_dataspec(val, + ['data_type', # e.g. 'thing' + 'thing_type', # e.g. 'link' + 'ups', + 'downs', + 'deleted', + 'spam', + 'timestamp']) + elif val[0] == 'data': + val = format_dataspec(val, + ['data_type', # e.g. 'data' + 'thing_type', # e.g. 'link' + 'key', # e.g. 'sr_id' + 'value']) + if val.key in fields: + data[val.key] = val.value + + if ( + # silently ignore if we didn't see the 'thing' row + thing is not None + + # remove spam and deleted as appriopriate + and (deleted or thing.deleted == 'f') + and (spam or thing.spam == 'f') + + # and silently ignore items that don't have all of the + # data that we need + and all(field in data for field in fields)): + + yield ((thing_id, thing.thing_type, thing.ups, thing.downs, + thing.deleted, thing.spam, thing.timestamp) + + tuple(data[field] for field in fields)) + + mr_reduce(process) + +def test(): + from r2.lib.mr_tools._mr_tools import keyiter + + for key, vals in keyiter(): + print key, vals + for val in vals: + print '\t', val diff --git a/r2/r2/lib/mr_top.py b/r2/r2/lib/mr_top.py index d64ac301e1..4b9688244f 100644 --- a/r2/r2/lib/mr_top.py +++ b/r2/r2/lib/mr_top.py @@ -133,8 +133,8 @@ def store_keys(key, maxes): if key.startswith('user-'): acc_str, keytype, account_id = key.split('-') account_id = int(account_id) - fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments - q = fn(Account._byID(account_id), 'new', 'all') + fn = queries._get_submitted if keytype == 'submitted' else queries._get_comments + q = fn(account_id, 'new', 'all') q._replace([(fname, float(timestamp)) for (timestamp, fname) in maxes]) @@ -148,7 +148,7 @@ def store_keys(key, maxes): # it sort = 'controversial' - q = queries.get_links(Subreddit._byID(sr_id), sort, time) + q = queries._get_links(sr_id, sort, time) q._replace([tuple([item[-1]] + map(float, item[:-1])) for item in maxes]) elif key.startswith('domain/'): diff --git a/r2/r2/lib/pages/graph.py b/r2/r2/lib/pages/graph.py index 6af2270a08..af7e459976 100644 --- a/r2/r2/lib/pages/graph.py +++ b/r2/r2/lib/pages/graph.py @@ -39,10 +39,11 @@ def make_date_axis_labels(series): _min = min(series) delta = _max - _min zero = datetime.timedelta(0) - if delta != zero and delta < datetime.timedelta(0, 0.5 * 86400): + has_hour = isinstance(_min, datetime.datetime) + if delta != zero and has_hour and delta < datetime.timedelta(0, 0.5*86400): test = lambda cur, prev: cur.hour != prev.hour and cur.hour % 3 == 0 format = "%H:00" - elif delta != zero and delta < datetime.timedelta(2): + elif delta != zero and has_hour and delta < datetime.timedelta(2): test = lambda cur, prev: cur.hour != prev.hour and cur.hour % 6 == 0 format = "%H:00" elif delta == zero or delta < datetime.timedelta(7): diff --git a/r2/r2/lib/pages/pages.py b/r2/r2/lib/pages/pages.py index e064f07e92..e1eade2781 100644 --- a/r2/r2/lib/pages/pages.py +++ b/r2/r2/lib/pages/pages.py @@ -114,7 +114,8 @@ class Reddit(Templated): def __init__(self, space_compress = True, nav_menus = None, loginbox = True, infotext = '', content = None, title = '', robots = None, - show_sidebar = True, footer = True, **context): + show_sidebar = True, footer = True, srbar = True, + **context): Templated.__init__(self, **context) self.title = title self.robots = robots @@ -124,7 +125,7 @@ def __init__(self, space_compress = True, nav_menus = None, loginbox = True, self.space_compress = space_compress # instantiate a footer self.footer = RedditFooter() if footer else None - + #put the sort menus at the top self.nav_menu = MenuArea(menus = nav_menus) if nav_menus else None @@ -151,7 +152,7 @@ def __init__(self, space_compress = True, nav_menus = None, loginbox = True, self.infobar = InfoBar(message = infotext) self.srtopbar = None - if not c.cname and not is_api(): + if srbar and not c.cname and not is_api(): self.srtopbar = SubredditTopBar() if c.user_is_loggedin and self.show_sidebar and not is_api(): @@ -162,7 +163,7 @@ def __init__(self, space_compress = True, nav_menus = None, loginbox = True, self.toolbars = self.build_toolbars() def sr_admin_menu(self): - buttons = [NavButton('community_settings', css_class = 'reddit-edit', + buttons = [NavButton(menu.community_settings, css_class = 'reddit-edit', dest = "edit"), NamedButton('modmail', dest = "message/inbox", css_class = 'moderator-mail'), @@ -202,7 +203,8 @@ def rightbox(self): if not c.user_is_loggedin and self.loginbox and not g.read_only_mode: ps.append(LoginFormWide()) - ps.append(SponsorshipBox()) + if c.user.pref_show_sponsorships or not c.user.gold: + ps.append(SponsorshipBox()) no_ads_yet = True if isinstance(c.site, MultiReddit) and c.user_is_loggedin: @@ -221,7 +223,8 @@ def rightbox(self): if self.submit_box: ps.append(SideBox(_('Submit a link'), '/submit', 'submit', - sr_path = True, + sr_path = (isinstance(c.site,DefaultSR) + or not isinstance(c.site, FakeSubreddit)), subtitles = [strings.submit_box_text], show_cover = True)) @@ -233,13 +236,13 @@ def rightbox(self): subtitles = rand_strings.get("create_reddit", 2), show_cover = True, nocname=True)) - if False and not c.user.gold and self.submit_box: - ps.append(SideBox(_('New: Google Checkout!'), - 'http://blog.reddit.com/2010/08/reddit-gold-now-takes-google-checkout.html', + if False and self.submit_box: + ps.append(SideBox(_('reddit gold creddits: now for sale!'), + 'http://www.reddit.com/tb/efpn0', 'gold', sr_path = False, - subtitles = ["reddit gold just got better!", - "(read all about it on the blog)"], + subtitles = ["check it out: you can now give gold", + "subscriptions to deserving redditors"], show_cover = False, nocname = True)) if not isinstance(c.site, FakeSubreddit) and not c.cname: @@ -368,8 +371,6 @@ def cachable_attrs(self): def __init__(self): self.nav = [NavMenu([ - OffsiteButton(_("gold"), nocname=True, - dest = '/help/gold', css_class = "buygold"), OffsiteButton("mobile", "/static/reddit_mobile/index.htm"), OffsiteButton("rss", dest = '/.rss'), @@ -397,7 +398,10 @@ def __init__(self): NamedButton("widget", True)], title = _('reddit tools'), type = 'flat_vert', separator = ''), - NavMenu([NamedButton("blog", False, nocname=True), + NavMenu([ + NamedButton("blog", False, nocname=True), + NamedButton("gold", False, nocname=True, + dest = '/help/gold', css_class = "buygold"), NamedButton("promote", False, nocname=True, dest = '/promoted', css_class = "red"), NamedButton("ad_inq", False, nocname=True)], @@ -480,11 +484,11 @@ def __init__(self, site = None): # we want to cache on the number of subscribers self.subscribers = self.sr._ups - + #so the menus cache properly self.path = request.path CachedTemplate.__init__(self) - + def nav(self): buttons = [NavButton(plurals.moderators, 'moderators')] if self.type != 'public': @@ -613,9 +617,12 @@ class MessageCompose(Templated): """Compose message form.""" def __init__(self,to='', subject='', message='', success='', captcha = None): + from r2.models.admintools import admintools + Templated.__init__(self, to = to, subject = subject, - message = message, success = success, - captcha = captcha) + message = message, success = success, + captcha = captcha, + admins = admintools.admin_list()) class BoringPage(Reddit): @@ -631,6 +638,7 @@ def __init__(self, pagename, **context): name = c.site.name or g.default_sr if "title" not in context: context['title'] = "%s: %s" % (name, pagename) + Reddit.__init__(self, **context) def build_toolbars(self): @@ -847,6 +855,9 @@ def rightbox(self): rb.insert(1, LinkInfoBar(a = self.link)) return rb +class LinkCommentSep(Templated): + pass + class CommentPane(Templated): def cache_key(self): num = self.article.num_comments @@ -1002,6 +1013,7 @@ def __init__(self, prev_search = '', num_results = 0, elapsed_time = 0, header = _('search reddits'), search_params = {}, simple=True, + subreddit_search=True ) self.sr_infobar = InfoBar(message = strings.sr_subscribe) @@ -1127,16 +1139,36 @@ def __init__(self, user): self.is_friend = None self.my_fullname = None self.gold_remaining = None + running_out_of_gold = False + if c.user_is_loggedin: - if (user._id == c.user._id or c.user_is_admin) and getattr(user, "gold", None): + if ((user._id == c.user._id or c.user_is_admin) + and getattr(user, "gold", None)): self.gold_expiration = getattr(user, "gold_expiration", None) if self.gold_expiration is None: self.gold_remaining = _("an unknown amount") - elif (self.gold_expiration - datetime.datetime.now(g.tz)).days < 1: - self.gold_remaining = _("less than a day") else: - self.gold_remaining = timeuntil(self.gold_expiration, - precision=60 * 60 * 24 * 30) # months + gold_days_left = (self.gold_expiration - + datetime.datetime.now(g.tz)).days + if gold_days_left < 7: + running_out_of_gold = True + + if gold_days_left < 1: + self.gold_remaining = _("less than a day") + else: + self.gold_remaining = timeuntil(self.gold_expiration, + precision=60 * 60 * 24 * 30) # months + if user._id != c.user._id: + self.goldlink = "/gold?goldtype=gift&recipient=" + user.name + self.giftmsg = _("buy %(user)s a month of reddit gold" % + dict(user=user.name)) + elif running_out_of_gold: + self.goldlink = "/gold" + self.giftmsg = _("renew your reddit gold") + elif not c.user.gold: + self.goldlink = "/gold" + self.giftmsg = _("treat yourself to reddit gold") + self.my_fullname = c.user._fullname self.is_friend = self.user._id in c.user.friends @@ -1163,7 +1195,7 @@ def __init__(self, title, message = None): class Reddit404(BoringPage): site_tracking = False def __init__(self): - ch=random.choice(['a','b','c']) + ch=random.choice(['a','b','c','d']) BoringPage.__init__(self, _("page not found"), loginbox=False, show_sidebar = False, content=UnfoundPage(ch)) @@ -1330,7 +1362,9 @@ def __init__(self,status,img_src, name="", errors = {}, form_id = ""): class Thanks(Templated): """The page to claim reddit gold trophies""" def __init__(self, secret=None): - if g.cache.get("recent-gold-" + c.user.name): + if secret and secret.startswith("cr_"): + status = "creddits" + elif g.cache.get("recent-gold-" + c.user.name): status = "recent" elif c.user.gold: status = "gold" @@ -1347,6 +1381,109 @@ def __init__(self, secret=None): Templated.__init__(self, status=status, secret=secret, lounge_html=lounge_html) +class Gold(Templated): + def __init__(self, goldtype, period, months, signed, + recipient, recipient_name): + + if c.user_is_admin: + user_creddits = 50 + else: + user_creddits = c.user.gold_creddits + + Templated.__init__(self, goldtype = goldtype, period = period, + months = months, signed = signed, + recipient_name = recipient_name, + user_creddits = user_creddits, + bad_recipient = + bool(recipient_name and not recipient)) + +class GoldPayment(Templated): + def __init__(self, goldtype, period, months, signed, + recipient, giftmessage, passthrough): + pay_from_creddits = False + + if period == "monthly" or 1 <= months < 12: + price = 3.99 + else: + price = 29.99 + + if c.user_is_admin: + user_creddits = 50 + else: + user_creddits = c.user.gold_creddits + + if goldtype == "autorenew": + summary = strings.gold_summary_autorenew % dict(user=c.user.name) + if period == "monthly": + paypal_buttonid = g.PAYPAL_BUTTONID_AUTORENEW_BYMONTH + elif period == "yearly": + paypal_buttonid = g.PAYPAL_BUTTONID_AUTORENEW_BYYEAR + + quantity = None + google_id = None + elif goldtype == "onetime": + if months < 12: + paypal_buttonid = g.PAYPAL_BUTTONID_ONETIME_BYMONTH + quantity = months + else: + paypal_buttonid = g.PAYPAL_BUTTONID_ONETIME_BYYEAR + quantity = months / 12 + months = quantity * 12 + + summary = strings.gold_summary_onetime % dict(user=c.user.name, + amount=Score.somethings(months, "month")) + + google_id = g.GOOGLE_ID + else: + if months < 12: + paypal_buttonid = g.PAYPAL_BUTTONID_CREDDITS_BYMONTH + quantity = months + else: + paypal_buttonid = g.PAYPAL_BUTTONID_CREDDITS_BYYEAR + quantity = months / 12 + + if goldtype == "creddits": + months = quantity * 12 + summary = strings.gold_summary_creddits % dict( + amount=Score.somethings(months, "month")) + elif goldtype == "gift": + if signed: + format = strings.gold_summary_signed_gift + else: + format = strings.gold_summary_anonymous_gift + + if months <= user_creddits: + pay_from_creddits = True + elif months >= 12: + # If you're not paying with creddits, you have to either + # buy by month or spend a multiple of 12 months + months = quantity * 12 + + summary = format % dict( + amount=Score.somethings(months, "month"), + recipient = recipient.name) + else: + raise ValueError("wtf is %r" % goldtype) + + google_id = g.GOOGLE_ID + + Templated.__init__(self, goldtype=goldtype, period=period, + months=months, quantity=quantity, price=price, + summary=summary, giftmessage=giftmessage, + pay_from_creddits=pay_from_creddits, + passthrough=passthrough, + google_id=google_id, + paypal_buttonid=paypal_buttonid) + +class GiftGold(Templated): + """The page to gift reddit gold trophies""" + def __init__(self, recipient): + if c.user_is_admin: + gold_creddits = 500 + else: + gold_creddits = c.user.gold_creddits + Templated.__init__(self, recipient=recipient, gold_creddits=gold_creddits) + class Password(Templated): """Form encountered when 'recover password' is clicked in the LoginFormWide.""" def __init__(self, success=False): @@ -1386,7 +1523,7 @@ def __init__(self, comments_url): class PaneStack(Templated): """Utility class for storing and rendering a list of block elements.""" - + def __init__(self, panes=[], div_id = None, css_class=None, div=False, title="", title_buttons = []): div = div or div_id or css_class or False @@ -1401,7 +1538,7 @@ def __init__(self, panes=[], div_id = None, css_class=None, div=False, def append(self, item): """Appends an element to the end of the current stack""" self.stack.append(item) - + def push(self, item): """Prepends an element to the top of the current stack""" self.stack.insert(0, item) @@ -1415,10 +1552,12 @@ class SearchForm(Templated): """The simple search form in the header of the page. prev_search is the previous search.""" def __init__(self, prev_search = '', search_params = {}, - site=None, simple=True, restrict_sr=False): + site=None, simple=True, restrict_sr=False, + subreddit_search=False): Templated.__init__(self, prev_search = prev_search, search_params = search_params, site=site, - simple=simple, restrict_sr=restrict_sr) + simple=simple, restrict_sr=restrict_sr, + subreddit_search=subreddit_search) class SearchBar(Templated): @@ -1427,7 +1566,8 @@ class SearchBar(Templated): and num_results if any.""" def __init__(self, num_results = 0, prev_search = '', elapsed_time = 0, search_params = {}, show_feedback=False, - simple=False, restrict_sr=False, site=None, + simple=False, restrict_sr=False, site=None, + subreddit_search=False, **kw): # not listed explicitly in args to ensure it translates properly @@ -1445,7 +1585,7 @@ def __init__(self, num_results = 0, prev_search = '', elapsed_time = 0, Templated.__init__(self, search_params = search_params, simple=simple, restrict_sr=restrict_sr, - site=site) + site=site, subreddit_search=subreddit_search) class SearchFail(Templated): """Search failure page.""" @@ -1472,13 +1612,13 @@ def __init__(self, url='', title='', fullname=None, thumbnail=None): Templated.__init__(self, url = url, title = title, fullname = fullname, thumbnail = thumbnail) -dorks_re = re.compile(r"https?://?([-\w.]*\.)?digg\.com/\w+\.\w+(/|$)") class FrameToolbar(Wrapped): """The reddit voting toolbar used together with Frame.""" cachable = True extension_handling = False cache_ignore = Link.cache_ignore + site_tracking = True def __init__(self, link, title = None, url = None, expanded = False, **kw): if link: self.title = link.title @@ -1496,7 +1636,6 @@ def __init__(self, link, title = None, url = None, expanded = False, **kw): self.site_description = c.site.description self.default_sr = c.default_sr - self.dorks = bool( dorks_re.match(self.url) ) Wrapped.__init__(self, link) if link is None: self.add_props(c.user, [self]) @@ -1749,7 +1888,7 @@ def __init__(self): date_groupings = {} hexkeys_seen = {} - idses = hcb.ids_by_category("error") + idses = hcb.ids_by_category("error", limit=5000) errors = g.hardcache.get_multi(prefix="error-", keys=idses) for ids in idses: @@ -1764,7 +1903,7 @@ def __init__(self): "warning") continue - tpl = (len(d['occurrences']), hexkey, d) + tpl = (d.get('times_seen', 1), hexkey, d) date_groupings.setdefault(date, []).append(tpl) self.nicknames = {} @@ -2478,7 +2617,12 @@ def make_link_child(item): if isinstance(item.media_object, basestring): media_embed = item.media_object else: - media_embed = get_media_embed(item.media_object) + try: + media_embed = get_media_embed(item.media_object) + except TypeError: + g.log.warning("link %s has a bad media object" % item) + media_embed = None + if media_embed: media_embed = MediaEmbed(media_domain = g.media_domain, height = media_embed.height + 10, @@ -2859,7 +3003,8 @@ def __init__(self, **kw): class PaymentForm(Templated): def __init__(self, link, indx, **kw): - self.countries = pycountry.countries + self.countries = [pycountry.countries.get(name=n) + for n in g.allowed_pay_countries] self.link = promote.editable_add_props(link) self.campaign = self.link.campaigns[indx] self.indx = indx @@ -3121,11 +3266,12 @@ def render(self, *a, **kw): return unsafe(self.s) class Dart_Ad(CachedTemplate): - def __init__(self, tag = None): + def __init__(self, dartsite, tag): tag = tag or "homepage" tracker_url = AdframeInfo.gen_url(fullname = "dart_" + tag, ip = request.ip) - Templated.__init__(self, tag = tag, tracker_url = tracker_url) + Templated.__init__(self, tag = tag, dartsite = dartsite, + tracker_url = tracker_url) def render(self, *a, **kw): res = CachedTemplate.render(self, *a, **kw) @@ -3147,10 +3293,23 @@ class ComScore(CachedTemplate): def render_ad(reddit_name=None, codename=None): if not reddit_name: reddit_name = g.default_sr + if g.frontpage_dart: + return Dart_Ad("reddit.dart", reddit_name).render() + + try: + sr = Subreddit._by_name(reddit_name) + except NotFound: + return Dart_Ad("reddit.dart", g.default_sr).render() + + if sr.over_18: + dartsite = "reddit.dart.nsfw" + else: + dartsite = "reddit.dart" + if codename: if codename == "DART": - return Dart_Ad(reddit_name).render() + return Dart_Ad(dartsite, reddit_name).render() else: try: ad = Ad._by_codename(codename) @@ -3159,17 +3318,11 @@ def render_ad(reddit_name=None, codename=None): attrs = ad.important_attrs() return HouseAd(**attrs).render() - try: - sr = Subreddit._by_name(reddit_name) - except NotFound: - return Dart_Ad(g.default_sr).render() - ads = {} for adsr in AdSR.by_sr_merged(sr): ad = adsr._thing1 - if not (ad.codename == "DART" and sr.over_18): - ads[ad.codename] = (ad, adsr.weight) + ads[ad.codename] = (ad, adsr.weight) total_weight = sum(t[1] for t in ads.values()) @@ -3185,7 +3338,7 @@ def render_ad(reddit_name=None, codename=None): winner = t[0] if winner.codename == "DART": - return Dart_Ad(reddit_name).render() + return Dart_Ad(dartsite, reddit_name).render() else: attrs = winner.important_attrs() return HouseAd(**attrs).render() @@ -3197,7 +3350,7 @@ def render_ad(reddit_name=None, codename=None): (reddit_name, total_weight), "error") - return Dart_Ad(reddit_name).render() + return Dart_Ad(dartsite, reddit_name).render() class TryCompact(Reddit): def __init__(self, dest, **kw): diff --git a/r2/r2/lib/pages/things.py b/r2/r2/lib/pages/things.py index 9084f803ee..c8c1338674 100644 --- a/r2/r2/lib/pages/things.py +++ b/r2/r2/lib/pages/things.py @@ -146,13 +146,15 @@ class MessageButtons(PrintableButtons): def __init__(self, thing, delete = False, report = True): was_comment = getattr(thing, 'was_comment', False) permalink = thing.permalink + can_reply = c.user_is_loggedin and getattr(thing, "repliable", True) + PrintableButtons.__init__(self, "messagebuttons", thing, profilepage = c.profilepage, permalink = permalink, was_comment = was_comment, unread = thing.new, recipient = thing.recipient, - can_reply = c.user_is_loggedin, + can_reply = can_reply, parent_id = getattr(thing, "parent_id", None), show_report = True, show_delete = False) diff --git a/r2/r2/lib/promote.py b/r2/r2/lib/promote.py index 90e952b551..f0579c0f6d 100644 --- a/r2/r2/lib/promote.py +++ b/r2/r2/lib/promote.py @@ -48,7 +48,7 @@ @memoize("get_promote_srid") def get_promote_srid(name = 'promos'): try: - sr = Subreddit._by_name(name) + sr = Subreddit._by_name(name, stale=True) except NotFound: sr = Subreddit._new(name = name, title = "promoted links", @@ -508,6 +508,9 @@ def accept_promotion(link): promotion_log(link, "requeued") #TODO: smarter would be nice, but this will have to do for now make_daily_promotions() + if link._spam: + link._spam = False + link._commit() emailer.accept_promo(link) def reject_promotion(link, reason = None): @@ -691,6 +694,8 @@ def get_promotion_list(user, site): # site is specified, pick an ad from that site if not isinstance(site, FakeSubreddit): srids = set([site._id]) + elif isinstance(site, MultiReddit): + srids = set(site.sr_ids) # site is Fake, user is not. Pick based on their subscriptions. elif user and not isinstance(user, FakeAccount): srids = set(Subreddit.reverse_subscriber_ids(user) + [""]) diff --git a/r2/r2/lib/queues.py b/r2/r2/lib/queues.py index 6732a1f008..b88c7b2b97 100644 --- a/r2/r2/lib/queues.py +++ b/r2/r2/lib/queues.py @@ -70,6 +70,8 @@ def queues(self): # this isn't in use until the spam_q plumbing is #self._q('newpage_q') self._q('register_vote_q', self_refer=True) + self._q('vote_link_q', self_refer=True) + self._q('vote_comment_q', self_refer=True) self._q('log_q', self_refer=True) self._q('usage_q', self_refer=True, durable=False) diff --git a/r2/r2/lib/scraper.py b/r2/r2/lib/scraper.py index 35d0741a96..b725063ec8 100644 --- a/r2/r2/lib/scraper.py +++ b/r2/r2/lib/scraper.py @@ -724,116 +724,563 @@ class EmbedlyOEmbed(OEmbed): ======================= documentation: http://api.embed.ly """ - domains = ['youtube.com', 'veoh.com', 'justin.tv', 'ustream.com', - 'qik.com', 'revision3.com', 'dailymotion.com', 'collegehumor.com', - 'twitvid.com', 'break.com', 'vids.myspace.com', 'metacafe.com', - 'blip.tv', 'video.google.com','revver.com', 'video.yahoo.com', - 'viddler.com', 'liveleak.com', 'animoto.com', 'yfrog.com', - 'tweetphoto.com', 'flickr.com', 'twitpic.com', 'imgur.com', - 'posterous.com', 'twitgoo.com', 'photobucket.com', 'phodroid.com', - 'xkcd.com', 'asofterword.com', 'qwantz.com', '23hq.com', 'hulu.com', - 'movieclips.com', 'crackle.com', 'fancast.com', 'funnyordie.com', - 'vimeo.com', 'ted.com', 'omnisio.com', 'nfb.ca', 'thedailyshow.com', - 'movies.yahoo.com', 'colbertnation.com', 'comedycentral.com', - 'theonion.com', 'wordpress.tv', 'traileraddict.com', 'soundcloud.com', - 'slideshare.net', 'scribd.com', 'screenr.com', '5min.com', - 'howcast.com', 'my.opera.com', 'escapistmagazine.com', ] - - url_re = re.compile('^http://.+\.youtube\.com/watch.+|'+\ - '^http://.+\.youtube\.com/v/.+|'+\ - '^http://youtube\.com/watch.+|'+\ - '^http://youtube\.com/v/.+|'+\ - '^http://youtu\.be/.+|'+\ - '^http://www\.veoh\.com/.*/watch/.+|'+\ - '^http://www\.justin\.tv/clip/.+|'+\ - '^http://www\.justin\.tv/.+|'+\ - '^http://justin\.tv/clip/.+|'+\ - '^http://justin\.tv/.+|'+\ - '^http://www\.ustream\.tv/recorded/.+|'+\ - '^http://www\.ustream\.tv/channel/.+|'+\ - '^http://qik\.com/video/.+|'+\ - '^http://qik\.com/.+|'+\ - '^http://.*revision3\.com/.+|'+\ - '^http://www.dailymotion\.com/video/.+|'+\ - '^http://www.dailymotion\.com/.+/video/.+|'+\ - '^http://dailymotion\.com/video/.+|'+\ - '^http://dailymotion\.com/.+/video/.+|'+\ - '^http://www\.collegehumor\.com/video:.+|'+\ - '^http://www\.twitvid\.com/.+|'+\ - '^http://www\.break\.com/.*/.+|'+\ - '^http://vids\.myspace\.com/index\.cfm\?fuseaction=vids\.individual&videoid.+|'+\ - '^http://www\.myspace\.com/index\.cfm\?fuseaction=.*&videoid.+|'+\ - '^http://www\.metacafe\.com/watch/.+|'+\ - '^http://blip\.tv/file/.+|'+\ - '^http://.+\.blip\.tv/file/.+|'+\ - '^http://video\.google\.com/videoplay\?.+|'+\ - '^http://revver\.com/video/.+|'+\ - '^http://www\.revver\.com/video/.+|'+\ - '^http://video\.yahoo\.com/watch/.*/.+|'+\ - '^http://video\.yahoo\.com/network/.+|'+\ - '^http://.*viddler\.com/explore/.*/videos/.+|'+\ - '^http://liveleak\.com/view\?.+|'+\ - '^http://www\.liveleak\.com/view\?.+|'+\ - '^http://animoto\.com/play/.+|'+\ - '^http://yfrog\..*/.+|'+\ - '^http://.+\.yfrog\..*/.+|'+\ - '^http://tweetphoto\.com/.+|'+\ - '^http://www\.flickr\.com/photos/.+|'+\ - '^http://twitpic\.com/.+|'+\ - '^http://.*imgur\.com/.+|'+\ - '^http://.*\.posterous\.com/.+|'+\ - '^http://twitgoo\.com/.+|'+\ - '^http://i.*\.photobucket\.com/albums/.+|'+\ - '^http://gi.*\.photobucket\.com/groups/.+|'+\ - '^http://phodroid\.com/.*/.*/.+|'+\ - '^http://xkcd\.com/.+|'+\ - '^http://www\.asofterworld\.com/index\.php\?id=.+|'+\ - '^http://www\.qwantz\.com/index\.php\?comic=.+|'+\ - '^http://23hq\.com/.*/photo/.+|'+\ - '^http://www\.23hq\.com/.*/photo/.+|'+\ - '^http://www\.hulu\.com/watch/.+|'+\ - '^http://movieclips\.com/watch/.*/.*/|'+\ - '^http://movieclips\.com/watch/.*/.*/.*/.+|'+\ - '^http://.*crackle\.com/c/.+|'+\ - '^http://www\.fancast\.com/.*/videos|'+\ - '^http://www\.funnyordie\.com/videos/.+|'+\ - '^http://www\.vimeo\.com/groups/.*/videos/.+|'+\ - '^http://www\.vimeo\.com/.+|'+\ - '^http://vimeo\.com/groups/.*/videos/.+|'+\ - '^http://vimeo\.com/.+|'+\ - '^http://www\.ted\.com/.+|'+\ - '^http://www\.omnisio\.com/.+|'+\ - '^http://.*nfb\.ca/film/.+|'+\ - '^http://www\.thedailyshow\.com/watch/.+|'+\ - '^http://www\.thedailyshow\.com/full-episodes/.+|'+\ - '^http://www\.thedailyshow\.com/collection/.*/.*/.+|'+\ - '^http://movies\.yahoo\.com/.*movie/.*/video/.+|'+\ - '^http://movies\.yahoo\.com/movie/.*/info|'+\ - '^http://movies\.yahoo\.com/movie/.*/trailer|'+\ - '^http://www\.colbertnation\.com/the-colbert-report-collections/.+|'+\ - '^http://www\.colbertnation\.com/full-episodes/.+|'+\ - '^http://www\.colbertnation\.com/the-colbert-report-videos/.+|'+\ - '^http://www\.comedycentral\.com/videos/index\.jhtml\?.+|'+\ - '^http://www\.theonion\.com/video/.+|'+\ - '^http://theonion\.com/video/.+|'+\ - '^http://wordpress\.tv/.*/.*/.*/.*/|'+\ - '^http://www\.traileraddict\.com/trailer/.+|'+\ - '^http://www\.traileraddict\.com/clip/.+|'+\ - '^http://www\.traileraddict\.com/poster/.+|'+\ - '^http://soundcloud\.com/.+|'+\ - '^http://soundcloud\.com/.*/.+|'+\ - '^http://soundcloud\.com/.*/sets/.+|'+\ - '^http://soundcloud\.com/groups/.+|'+\ - '^http://www\.slideshare\.net/.*/.+|'+\ - '^http://.*\.scribd\.com/doc/.+|'+\ - '^http://screenr\.com/.+|'+\ - '^http://www\.5min\.com/Video/.+|'+\ - '^http://www\.howcast\.com/videos/.+|'+\ - '^http://my\.opera\.com/.*/albums/show\.dml\?id=.+|'+\ - '^http://my\.opera\.com/.*/albums/showpic\.dml\?album=.+&picture=.+|'+\ - '^http://escapistmagazine\.com/videos/.+|'+\ - '^http://www\.escapistmagazine\.com/videos/.+', re.I + domains = ['23hq.com', '5min.com', '99dollarmusicvideos.com', + 'abcnews.go.com', 'achewood.com', 'allthingsd.com', 'amazon.com', + 'aniboom.com', 'animoto.com', 'asofterworld.com', 'atom.com', + 'audioboo.com', 'bambuser.com', 'bandcamp.com', 'barelydigital.com', + 'barelypolitical.com', 'bigthink.com', 'blip.tv', 'boston.com', + 'brainbird.net', 'bravotv.com', 'break.com', 'brizzly.com', + 'cbsnews.com', 'channelfrederator.com', 'chart.ly', 'cl.ly', + 'clikthrough.com', 'clipshack.com', 'cnbc.com', 'cnn.com', + 'colbertnation.com', 'collegehumor.com', 'comedycentral.com', + 'compete.com', 'confreaks.net', 'crackle.com', 'craigslist.org', + 'crocodoc.com', 'crunchbase.com', 'dailybooth.com', 'dailymile.com', + 'dailymotion.com', 'deviantart.com', 'digg.com', 'dipdive.com', + 'discovery.com', 'dotsub.com', 'dribbble.com', 'edition.cnn.com', + 'emberapp.com', 'entertonement.com', 'escapistmagazine.com', + 'espn.go.com', 'facebook.com', 'fancast.com', 'flickr.com', 'fora.tv', + 'formspring.me', 'fotopedia.com', 'foxsports.com', + 'freemusicarchive.org', 'funnyordie.com', 'gametrailers.com', + 'gist.github.com', 'globalpost.com', 'godtube.com', 'google.com', + 'grindtv.com', 'grooveshark.com', 'guardian.co.uk', 'howcast.com', + 'huffduffer.com', 'hulu.com', 'hungrynation.tv', 'ifood.tv', 'img.ly', + 'imgur.com', 'indenti.ca', 'indymogul.com', 'instagr.am', 'issuu.com', + 'justin.tv', 'kickstarter.com', 'kinomap.com', 'kiva.org', + 'koldcast.tv', 'last.fm', 'liveleak.com', 'livestream.com', + 'logotv.com', 'lonelyplanet.com', 'maps.google.com', 'meadd.com', + 'mediamatters.org', 'meetup.com', 'metacafe.com', 'metacdn.com', + 'mixcloud.com', 'mixergy.com', 'mobypicture.com', 'money.cnn.com', + 'movies.yahoo.com', 'msnbc.com', 'my.opera.com', 'myloc.me', + 'nationalgeographic.com', 'nfb.ca', 'npr.org', 'nzonscreen.com', + 'overstream.net', 'pastebin.com', 'pastie.org', 'phodroid.com', + 'photobucket.com', 'photozou.jp', 'picasaweb.google.com', 'picplz.com', + 'pikchur.com', 'ping.fm', 'plixi.com', 'polldaddy.com', + 'polleverywhere.com', 'posterous.com', 'prezi.com', 'qik.com', + 'quantcast.com', 'questionablecontent.net', 'qwantz.com', 'qwiki.com', + 'radionomy.com', 'radioreddit.com', 'rdio.com', 'redux.com', + 'revision3.com', 'revver.com', 'saynow.com', 'schooltube.com', + 'scrapblog.com', 'screencast.com', 'screenr.com', 'scribd.com', + 'sendables.jibjab.com', 'share.ovi.com', 'shitmydadsays.com', + 'shopstyle.com', 'skitch.com', 'slideshare.net', 'smugmug.com', + 'snotr.com', 'someecards.com', 'soundcloud.com', 'speakerdeck.com', + 'spike.com', 'statsheet.com', 'status.net', 'storify.com', + 'streetfire.net', 'studivz.net', 'tangle.com', 'teachertube.com', + 'techcrunch.tv', 'ted.com', 'thedailyshow.com', 'theonion.com', + 'threadbanger.com', 'timetoast.com', 'tinypic.com', 'tmiweekly.com', + 'traileraddict.com', 'trailerspy.com', 'trooptube.tv', 'trutv.com', + 'tumblr.com', 'tweetphoto.com', 'twitgoo.com', 'twitlonger.com', + 'twitpic.com', 'twitrpix.com', 'twitter.com', 'twitvid.com', + 'ultrakawaii.com', 'uservoice.com', 'ustream.com', 'viddler.com', + 'video.google.com', 'video.pbs.org', 'video.yahoo.com', + 'vids.myspace.com', 'vimeo.com', 'vodcars.com', 'washingtonpost.com', + 'whitehouse.gov', 'wistia.com', 'wordpress.tv', 'worldstarhiphop.com', + 'xiami.com', 'xkcd.com', 'xtranormal.com', 'yfrog.com', 'youku.com', + 'youtube.com', 'zapiks.com', 'zero-inch.com'] + + url_re = re.compile( + 'http:\\/\\/.*youtube\\.com\\/watch.*|' + + 'http:\\/\\/.*\\.youtube\\.com\\/v\\/.*|' + + 'http:\\/\\/youtu\\.be\\/.*|' + + 'http:\\/\\/.*\\.youtube\\.com\\/user\\/.*|' + + 'http:\\/\\/.*\\.youtube\\.com\\/.*\\#.*\\/.*|' + + 'http:\\/\\/m\\.youtube\\.com\\/watch.*|' + + 'http:\\/\\/m\\.youtube\\.com\\/index.*|' + + 'http:\\/\\/.*\\.youtube\\.com\\/profile.*|' + + 'http:\\/\\/.*justin\\.tv\\/.*|' + + 'http:\\/\\/.*justin\\.tv\\/.*\\/b\\/.*|' + + 'http:\\/\\/.*justin\\.tv\\/.*\\/w\\/.*|' + + 'http:\\/\\/www\\.ustream\\.tv\\/recorded\\/.*|' + + 'http:\\/\\/www\\.ustream\\.tv\\/channel\\/.*|' + + 'http:\\/\\/www\\.ustream\\.tv\\/.*|' + + 'http:\\/\\/qik\\.com\\/video\\/.*|' + + 'http:\\/\\/qik\\.com\\/.*|' + + 'http:\\/\\/qik\\.ly\\/.*|' + + 'http:\\/\\/.*revision3\\.com\\/.*|' + + 'http:\\/\\/.*\\.dailymotion\\.com\\/video\\/.*|' + + 'http:\\/\\/.*\\.dailymotion\\.com\\/.*\\/video\\/.*|' + + 'http:\\/\\/www\\.collegehumor\\.com\\/video:.*|' + + 'http:\\/\\/.*twitvid\\.com\\/.*|' + + 'http:\\/\\/www\\.break\\.com\\/.*\\/.*|' + + 'http:\\/\\/vids\\.myspace\\.com\\/index\\.cfm\\?fuseaction=vids\\.individual&videoid.*|' + + 'http:\\/\\/www\\.myspace\\.com\\/index\\.cfm\\?fuseaction=.*&videoid.*|' + + 'http:\\/\\/www\\.metacafe\\.com\\/watch\\/.*|' + + 'http:\\/\\/www\\.metacafe\\.com\\/w\\/.*|' + + 'http:\\/\\/blip\\.tv\\/file\\/.*|' + + 'http:\\/\\/.*\\.blip\\.tv\\/file\\/.*|' + + 'http:\\/\\/video\\.google\\.com\\/videoplay\\?.*|' + + 'http:\\/\\/.*revver\\.com\\/video\\/.*|' + + 'http:\\/\\/video\\.yahoo\\.com\\/watch\\/.*\\/.*|' + + 'http:\\/\\/video\\.yahoo\\.com\\/network\\/.*|' + + 'http:\\/\\/.*viddler\\.com\\/explore\\/.*\\/videos\\/.*|' + + 'http:\\/\\/liveleak\\.com\\/view\\?.*|' + + 'http:\\/\\/www\\.liveleak\\.com\\/view\\?.*|' + + 'http:\\/\\/animoto\\.com\\/play\\/.*|' + + 'http:\\/\\/dotsub\\.com\\/view\\/.*|' + + 'http:\\/\\/www\\.overstream\\.net\\/view\\.php\\?oid=.*|' + + 'http:\\/\\/www\\.livestream\\.com\\/.*|' + + 'http:\\/\\/www\\.worldstarhiphop\\.com\\/videos\\/video.*\\.php\\?v=.*|' + + 'http:\\/\\/worldstarhiphop\\.com\\/videos\\/video.*\\.php\\?v=.*|' + + 'http:\\/\\/teachertube\\.com\\/viewVideo\\.php.*|' + + 'http:\\/\\/www\\.teachertube\\.com\\/viewVideo\\.php.*|' + + 'http:\\/\\/www1\\.teachertube\\.com\\/viewVideo\\.php.*|' + + 'http:\\/\\/www2\\.teachertube\\.com\\/viewVideo\\.php.*|' + + 'http:\\/\\/bambuser\\.com\\/v\\/.*|' + + 'http:\\/\\/bambuser\\.com\\/channel\\/.*|' + + 'http:\\/\\/bambuser\\.com\\/channel\\/.*\\/broadcast\\/.*|' + + 'http:\\/\\/www\\.schooltube\\.com\\/video\\/.*\\/.*|' + + 'http:\\/\\/bigthink\\.com\\/ideas\\/.*|' + + 'http:\\/\\/bigthink\\.com\\/series\\/.*|' + + 'http:\\/\\/sendables\\.jibjab\\.com\\/view\\/.*|' + + 'http:\\/\\/sendables\\.jibjab\\.com\\/originals\\/.*|' + + 'http:\\/\\/www\\.xtranormal\\.com\\/watch\\/.*|' + + 'http:\\/\\/dipdive\\.com\\/media\\/.*|' + + 'http:\\/\\/dipdive\\.com\\/member\\/.*\\/media\\/.*|' + + 'http:\\/\\/dipdive\\.com\\/v\\/.*|' + + 'http:\\/\\/.*\\.dipdive\\.com\\/media\\/.*|' + + 'http:\\/\\/.*\\.dipdive\\.com\\/v\\/.*|' + + 'http:\\/\\/v\\.youku\\.com\\/v_show\\/.*\\.html|' + + 'http:\\/\\/v\\.youku\\.com\\/v_playlist\\/.*\\.html|' + + 'http:\\/\\/www\\.snotr\\.com\\/video\\/.*|' + + 'http:\\/\\/snotr\\.com\\/video\\/.*|' + + 'http:\\/\\/www\\.whitehouse\\.gov\\/photos-and-video\\/video\\/.*|' + + 'http:\\/\\/www\\.whitehouse\\.gov\\/video\\/.*|' + + 'http:\\/\\/wh\\.gov\\/photos-and-video\\/video\\/.*|' + + 'http:\\/\\/wh\\.gov\\/video\\/.*|' + + 'http:\\/\\/www\\.hulu\\.com\\/watch.*|' + + 'http:\\/\\/www\\.hulu\\.com\\/w\\/.*|' + + 'http:\\/\\/hulu\\.com\\/watch.*|' + + 'http:\\/\\/hulu\\.com\\/w\\/.*|' + + 'http:\\/\\/.*crackle\\.com\\/c\\/.*|' + + 'http:\\/\\/www\\.fancast\\.com\\/.*\\/videos|' + + 'http:\\/\\/www\\.funnyordie\\.com\\/videos\\/.*|' + + 'http:\\/\\/www\\.funnyordie\\.com\\/m\\/.*|' + + 'http:\\/\\/funnyordie\\.com\\/videos\\/.*|' + + 'http:\\/\\/funnyordie\\.com\\/m\\/.*|' + + 'http:\\/\\/www\\.vimeo\\.com\\/groups\\/.*\\/videos\\/.*|' + + 'http:\\/\\/www\\.vimeo\\.com\\/.*|' + + 'http:\\/\\/vimeo\\.com\\/groups\\/.*\\/videos\\/.*|' + + 'http:\\/\\/vimeo\\.com\\/.*|' + + 'http:\\/\\/vimeo\\.com\\/m\\/\\#\\/.*|' + + 'http:\\/\\/www\\.ted\\.com\\/talks\\/.*\\.html.*|' + + 'http:\\/\\/www\\.ted\\.com\\/talks\\/lang\\/.*\\/.*\\.html.*|' + + 'http:\\/\\/www\\.ted\\.com\\/index\\.php\\/talks\\/.*\\.html.*|' + + 'http:\\/\\/www\\.ted\\.com\\/index\\.php\\/talks\\/lang\\/.*\\/.*\\.html.*|' + + 'http:\\/\\/.*nfb\\.ca\\/film\\/.*|' + + 'http:\\/\\/www\\.thedailyshow\\.com\\/watch\\/.*|' + + 'http:\\/\\/www\\.thedailyshow\\.com\\/full-episodes\\/.*|' + + 'http:\\/\\/www\\.thedailyshow\\.com\\/collection\\/.*\\/.*\\/.*|' + + 'http:\\/\\/movies\\.yahoo\\.com\\/movie\\/.*\\/video\\/.*|' + + 'http:\\/\\/movies\\.yahoo\\.com\\/movie\\/.*\\/trailer|' + + 'http:\\/\\/movies\\.yahoo\\.com\\/movie\\/.*\\/video|' + + 'http:\\/\\/www\\.colbertnation\\.com\\/the-colbert-report-collections\\/.*|' + + 'http:\\/\\/www\\.colbertnation\\.com\\/full-episodes\\/.*|' + + 'http:\\/\\/www\\.colbertnation\\.com\\/the-colbert-report-videos\\/.*|' + + 'http:\\/\\/www\\.comedycentral\\.com\\/videos\\/index\\.jhtml\\?.*|' + + 'http:\\/\\/www\\.theonion\\.com\\/video\\/.*|' + + 'http:\\/\\/theonion\\.com\\/video\\/.*|' + + 'http:\\/\\/wordpress\\.tv\\/.*\\/.*\\/.*\\/.*\\/|' + + 'http:\\/\\/www\\.traileraddict\\.com\\/trailer\\/.*|' + + 'http:\\/\\/www\\.traileraddict\\.com\\/clip\\/.*|' + + 'http:\\/\\/www\\.traileraddict\\.com\\/poster\\/.*|' + + 'http:\\/\\/www\\.escapistmagazine\\.com\\/videos\\/.*|' + + 'http:\\/\\/www\\.trailerspy\\.com\\/trailer\\/.*\\/.*|' + + 'http:\\/\\/www\\.trailerspy\\.com\\/trailer\\/.*|' + + 'http:\\/\\/www\\.trailerspy\\.com\\/view_video\\.php.*|' + + 'http:\\/\\/www\\.atom\\.com\\/.*\\/.*\\/|' + + 'http:\\/\\/fora\\.tv\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.spike\\.com\\/video\\/.*|' + + 'http:\\/\\/www\\.gametrailers\\.com\\/video\\/.*|' + + 'http:\\/\\/gametrailers\\.com\\/video\\/.*|' + + 'http:\\/\\/www\\.koldcast\\.tv\\/video\\/.*|' + + 'http:\\/\\/www\\.koldcast\\.tv\\/\\#video:.*|' + + 'http:\\/\\/techcrunch\\.tv\\/watch.*|' + + 'http:\\/\\/techcrunch\\.tv\\/.*\\/watch.*|' + + 'http:\\/\\/mixergy\\.com\\/.*|' + + 'http:\\/\\/video\\.pbs\\.org\\/video\\/.*|' + + 'http:\\/\\/www\\.zapiks\\.com\\/.*|' + + 'http:\\/\\/tv\\.digg\\.com\\/diggnation\\/.*|' + + 'http:\\/\\/tv\\.digg\\.com\\/diggreel\\/.*|' + + 'http:\\/\\/tv\\.digg\\.com\\/diggdialogg\\/.*|' + + 'http:\\/\\/www\\.trutv\\.com\\/video\\/.*|' + + 'http:\\/\\/www\\.nzonscreen\\.com\\/title\\/.*|' + + 'http:\\/\\/nzonscreen\\.com\\/title\\/.*|' + + 'http:\\/\\/app\\.wistia\\.com\\/embed\\/medias\\/.*|' + + 'https:\\/\\/app\\.wistia\\.com\\/embed\\/medias\\/.*|' + + 'http:\\/\\/hungrynation\\.tv\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.hungrynation\\.tv\\/.*\\/episode\\/.*|' + + 'http:\\/\\/hungrynation\\.tv\\/episode\\/.*|' + + 'http:\\/\\/www\\.hungrynation\\.tv\\/episode\\/.*|' + + 'http:\\/\\/indymogul\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.indymogul\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/indymogul\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.indymogul\\.com\\/episode\\/.*|' + + 'http:\\/\\/channelfrederator\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.channelfrederator\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/channelfrederator\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.channelfrederator\\.com\\/episode\\/.*|' + + 'http:\\/\\/tmiweekly\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.tmiweekly\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/tmiweekly\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.tmiweekly\\.com\\/episode\\/.*|' + + 'http:\\/\\/99dollarmusicvideos\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.99dollarmusicvideos\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/99dollarmusicvideos\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.99dollarmusicvideos\\.com\\/episode\\/.*|' + + 'http:\\/\\/ultrakawaii\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.ultrakawaii\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/ultrakawaii\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.ultrakawaii\\.com\\/episode\\/.*|' + + 'http:\\/\\/barelypolitical\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.barelypolitical\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/barelypolitical\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.barelypolitical\\.com\\/episode\\/.*|' + + 'http:\\/\\/barelydigital\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.barelydigital\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/barelydigital\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.barelydigital\\.com\\/episode\\/.*|' + + 'http:\\/\\/threadbanger\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.threadbanger\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/threadbanger\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.threadbanger\\.com\\/episode\\/.*|' + + 'http:\\/\\/vodcars\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/www\\.vodcars\\.com\\/.*\\/episode\\/.*|' + + 'http:\\/\\/vodcars\\.com\\/episode\\/.*|' + + 'http:\\/\\/www\\.vodcars\\.com\\/episode\\/.*|' + + 'http:\\/\\/confreaks\\.net\\/videos\\/.*|' + + 'http:\\/\\/www\\.confreaks\\.net\\/videos\\/.*|' + + 'http:\\/\\/video\\.allthingsd\\.com\\/video\\/.*|' + + 'http:\\/\\/aniboom\\.com\\/animation-video\\/.*|' + + 'http:\\/\\/www\\.aniboom\\.com\\/animation-video\\/.*|' + + 'http:\\/\\/clipshack\\.com\\/Clip\\.aspx\\?.*|' + + 'http:\\/\\/www\\.clipshack\\.com\\/Clip\\.aspx\\?.*|' + + 'http:\\/\\/grindtv\\.com\\/.*\\/video\\/.*|' + + 'http:\\/\\/www\\.grindtv\\.com\\/.*\\/video\\/.*|' + + 'http:\\/\\/ifood\\.tv\\/recipe\\/.*|' + + 'http:\\/\\/ifood\\.tv\\/video\\/.*|' + + 'http:\\/\\/ifood\\.tv\\/channel\\/user\\/.*|' + + 'http:\\/\\/www\\.ifood\\.tv\\/recipe\\/.*|' + + 'http:\\/\\/www\\.ifood\\.tv\\/video\\/.*|' + + 'http:\\/\\/www\\.ifood\\.tv\\/channel\\/user\\/.*|' + + 'http:\\/\\/logotv\\.com\\/video\\/.*|' + + 'http:\\/\\/www\\.logotv\\.com\\/video\\/.*|' + + 'http:\\/\\/lonelyplanet\\.com\\/Clip\\.aspx\\?.*|' + + 'http:\\/\\/www\\.lonelyplanet\\.com\\/Clip\\.aspx\\?.*|' + + 'http:\\/\\/streetfire\\.net\\/video\\/.*\\.htm.*|' + + 'http:\\/\\/www\\.streetfire\\.net\\/video\\/.*\\.htm.*|' + + 'http:\\/\\/trooptube\\.tv\\/videos\\/.*|' + + 'http:\\/\\/www\\.trooptube\\.tv\\/videos\\/.*|' + + 'http:\\/\\/www\\.godtube\\.com\\/featured\\/video\\/.*|' + + 'http:\\/\\/godtube\\.com\\/featured\\/video\\/.*|' + + 'http:\\/\\/www\\.godtube\\.com\\/watch\\/.*|' + + 'http:\\/\\/godtube\\.com\\/watch\\/.*|' + + 'http:\\/\\/www\\.tangle\\.com\\/view_video.*|' + + 'http:\\/\\/mediamatters\\.org\\/mmtv\\/.*|' + + 'http:\\/\\/www\\.clikthrough\\.com\\/theater\\/video\\/.*|' + + 'http:\\/\\/gist\\.github\\.com\\/.*|' + + 'http:\\/\\/twitter\\.com\\/.*\\/status\\/.*|' + + 'http:\\/\\/twitter\\.com\\/.*\\/statuses\\/.*|' + + 'http:\\/\\/www\\.twitter\\.com\\/.*\\/status\\/.*|' + + 'http:\\/\\/www\\.twitter\\.com\\/.*\\/statuses\\/.*|' + + 'http:\\/\\/mobile\\.twitter\\.com\\/.*\\/status\\/.*|' + + 'http:\\/\\/mobile\\.twitter\\.com\\/.*\\/statuses\\/.*|' + + 'https:\\/\\/twitter\\.com\\/.*\\/status\\/.*|' + + 'https:\\/\\/twitter\\.com\\/.*\\/statuses\\/.*|' + + 'https:\\/\\/www\\.twitter\\.com\\/.*\\/status\\/.*|' + + 'https:\\/\\/www\\.twitter\\.com\\/.*\\/statuses\\/.*|' + + 'https:\\/\\/mobile\\.twitter\\.com\\/.*\\/status\\/.*|' + + 'https:\\/\\/mobile\\.twitter\\.com\\/.*\\/statuses\\/.*|' + + 'http:\\/\\/www\\.crunchbase\\.com\\/.*\\/.*|' + + 'http:\\/\\/crunchbase\\.com\\/.*\\/.*|' + + 'http:\\/\\/www\\.slideshare\\.net\\/.*\\/.*|' + + 'http:\\/\\/www\\.slideshare\\.net\\/mobile\\/.*\\/.*|' + + 'http:\\/\\/slidesha\\.re\\/.*|' + + 'http:\\/\\/.*\\.scribd\\.com\\/doc\\/.*|' + + 'http:\\/\\/screenr\\.com\\/.*|' + + 'http:\\/\\/polldaddy\\.com\\/community\\/poll\\/.*|' + + 'http:\\/\\/polldaddy\\.com\\/poll\\/.*|' + + 'http:\\/\\/answers\\.polldaddy\\.com\\/poll\\/.*|' + + 'http:\\/\\/www\\.5min\\.com\\/Video\\/.*|' + + 'http:\\/\\/www\\.howcast\\.com\\/videos\\/.*|' + + 'http:\\/\\/www\\.screencast\\.com\\/.*\\/media\\/.*|' + + 'http:\\/\\/screencast\\.com\\/.*\\/media\\/.*|' + + 'http:\\/\\/www\\.screencast\\.com\\/t\\/.*|' + + 'http:\\/\\/screencast\\.com\\/t\\/.*|' + + 'http:\\/\\/issuu\\.com\\/.*\\/docs\\/.*|' + + 'http:\\/\\/www\\.kickstarter\\.com\\/projects\\/.*\\/.*|' + + 'http:\\/\\/www\\.scrapblog\\.com\\/viewer\\/viewer\\.aspx.*|' + + 'http:\\/\\/ping\\.fm\\/p\\/.*|' + + 'http:\\/\\/chart\\.ly\\/symbols\\/.*|' + + 'http:\\/\\/chart\\.ly\\/.*|' + + 'http:\\/\\/maps\\.google\\.com\\/maps\\?.*|' + + 'http:\\/\\/maps\\.google\\.com\\/\\?.*|' + + 'http:\\/\\/maps\\.google\\.com\\/maps\\/ms\\?.*|' + + 'http:\\/\\/.*\\.craigslist\\.org\\/.*\\/.*|' + + 'http:\\/\\/my\\.opera\\.com\\/.*\\/albums\\/show\\.dml\\?id=.*|' + + 'http:\\/\\/my\\.opera\\.com\\/.*\\/albums\\/showpic\\.dml\\?album=.*&picture=.*|' + + 'http:\\/\\/tumblr\\.com\\/.*|' + + 'http:\\/\\/.*\\.tumblr\\.com\\/post\\/.*|' + + 'http:\\/\\/www\\.polleverywhere\\.com\\/polls\\/.*|' + + 'http:\\/\\/www\\.polleverywhere\\.com\\/multiple_choice_polls\\/.*|' + + 'http:\\/\\/www\\.polleverywhere\\.com\\/free_text_polls\\/.*|' + + 'http:\\/\\/www\\.quantcast\\.com\\/wd:.*|' + + 'http:\\/\\/www\\.quantcast\\.com\\/.*|' + + 'http:\\/\\/siteanalytics\\.compete\\.com\\/.*|' + + 'http:\\/\\/statsheet\\.com\\/statplot\\/charts\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/statsheet\\.com\\/statplot\\/charts\\/e\\/.*|' + + 'http:\\/\\/statsheet\\.com\\/.*\\/teams\\/.*\\/.*|' + + 'http:\\/\\/statsheet\\.com\\/tools\\/chartlets\\?chart=.*|' + + 'http:\\/\\/.*\\.status\\.net\\/notice\\/.*|' + + 'http:\\/\\/identi\\.ca\\/notice\\/.*|' + + 'http:\\/\\/brainbird\\.net\\/notice\\/.*|' + + 'http:\\/\\/shitmydadsays\\.com\\/notice\\/.*|' + + 'http:\\/\\/www\\.studivz\\.net\\/Profile\\/.*|' + + 'http:\\/\\/www\\.studivz\\.net\\/l\\/.*|' + + 'http:\\/\\/www\\.studivz\\.net\\/Groups\\/Overview\\/.*|' + + 'http:\\/\\/www\\.studivz\\.net\\/Gadgets\\/Info\\/.*|' + + 'http:\\/\\/www\\.studivz\\.net\\/Gadgets\\/Install\\/.*|' + + 'http:\\/\\/www\\.studivz\\.net\\/.*|' + + 'http:\\/\\/www\\.meinvz\\.net\\/Profile\\/.*|' + + 'http:\\/\\/www\\.meinvz\\.net\\/l\\/.*|' + + 'http:\\/\\/www\\.meinvz\\.net\\/Groups\\/Overview\\/.*|' + + 'http:\\/\\/www\\.meinvz\\.net\\/Gadgets\\/Info\\/.*|' + + 'http:\\/\\/www\\.meinvz\\.net\\/Gadgets\\/Install\\/.*|' + + 'http:\\/\\/www\\.meinvz\\.net\\/.*|' + + 'http:\\/\\/www\\.schuelervz\\.net\\/Profile\\/.*|' + + 'http:\\/\\/www\\.schuelervz\\.net\\/l\\/.*|' + + 'http:\\/\\/www\\.schuelervz\\.net\\/Groups\\/Overview\\/.*|' + + 'http:\\/\\/www\\.schuelervz\\.net\\/Gadgets\\/Info\\/.*|' + + 'http:\\/\\/www\\.schuelervz\\.net\\/Gadgets\\/Install\\/.*|' + + 'http:\\/\\/www\\.schuelervz\\.net\\/.*|' + + 'http:\\/\\/myloc\\.me\\/.*|' + + 'http:\\/\\/pastebin\\.com\\/.*|' + + 'http:\\/\\/pastie\\.org\\/.*|' + + 'http:\\/\\/www\\.pastie\\.org\\/.*|' + + 'http:\\/\\/redux\\.com\\/stream\\/item\\/.*\\/.*|' + + 'http:\\/\\/redux\\.com\\/f\\/.*\\/.*|' + + 'http:\\/\\/www\\.redux\\.com\\/stream\\/item\\/.*\\/.*|' + + 'http:\\/\\/www\\.redux\\.com\\/f\\/.*\\/.*|' + + 'http:\\/\\/cl\\.ly\\/.*|' + + 'http:\\/\\/cl\\.ly\\/.*\\/content|' + + 'http:\\/\\/speakerdeck\\.com\\/u\\/.*\\/p\\/.*|' + + 'http:\\/\\/www\\.kiva\\.org\\/lend\\/.*|' + + 'http:\\/\\/www\\.timetoast\\.com\\/timelines\\/.*|' + + 'http:\\/\\/storify\\.com\\/.*\\/.*|' + + 'http:\\/\\/.*meetup\\.com\\/.*|' + + 'http:\\/\\/meetu\\.ps\\/.*|' + + 'http:\\/\\/www\\.dailymile\\.com\\/people\\/.*\\/entries\\/.*|' + + 'http:\\/\\/.*\\.kinomap\\.com\\/.*|' + + 'http:\\/\\/www\\.metacdn\\.com\\/api\\/users\\/.*\\/content\\/.*|' + + 'http:\\/\\/www\\.metacdn\\.com\\/api\\/users\\/.*\\/media\\/.*|' + + 'http:\\/\\/prezi\\.com\\/.*\\/.*|' + + 'http:\\/\\/.*\\.uservoice\\.com\\/.*\\/suggestions\\/.*|' + + 'http:\\/\\/formspring\\.me\\/.*|' + + 'http:\\/\\/www\\.formspring\\.me\\/.*|' + + 'http:\\/\\/formspring\\.me\\/.*\\/q\\/.*|' + + 'http:\\/\\/www\\.formspring\\.me\\/.*\\/q\\/.*|' + + 'http:\\/\\/twitlonger\\.com\\/show\\/.*|' + + 'http:\\/\\/www\\.twitlonger\\.com\\/show\\/.*|' + + 'http:\\/\\/tl\\.gd\\/.*|' + + 'http:\\/\\/www\\.qwiki\\.com\\/q\\/.*|' + + 'http:\\/\\/crocodoc\\.com\\/.*|' + + 'http:\\/\\/.*\\.crocodoc\\.com\\/.*|' + + 'https:\\/\\/crocodoc\\.com\\/.*|' + + 'https:\\/\\/.*\\.crocodoc\\.com\\/.*|' + + 'http:\\/\\/.*yfrog\\..*\\/.*|' + + 'http:\\/\\/tweetphoto\\.com\\/.*|' + + 'http:\\/\\/www\\.flickr\\.com\\/photos\\/.*|' + + 'http:\\/\\/flic\\.kr\\/.*|' + + 'http:\\/\\/twitpic\\.com\\/.*|' + + 'http:\\/\\/www\\.twitpic\\.com\\/.*|' + + 'http:\\/\\/twitpic\\.com\\/photos\\/.*|' + + 'http:\\/\\/www\\.twitpic\\.com\\/photos\\/.*|' + + 'http:\\/\\/.*imgur\\.com\\/.*|' + + 'http:\\/\\/.*\\.posterous\\.com\\/.*|' + + 'http:\\/\\/post\\.ly\\/.*|' + + 'http:\\/\\/twitgoo\\.com\\/.*|' + + 'http:\\/\\/i.*\\.photobucket\\.com\\/albums\\/.*|' + + 'http:\\/\\/s.*\\.photobucket\\.com\\/albums\\/.*|' + + 'http:\\/\\/phodroid\\.com\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.mobypicture\\.com\\/user\\/.*\\/view\\/.*|' + + 'http:\\/\\/moby\\.to\\/.*|' + + 'http:\\/\\/xkcd\\.com\\/.*|' + + 'http:\\/\\/www\\.xkcd\\.com\\/.*|' + + 'http:\\/\\/imgs\\.xkcd\\.com\\/.*|' + + 'http:\\/\\/www\\.asofterworld\\.com\\/index\\.php\\?id=.*|' + + 'http:\\/\\/www\\.asofterworld\\.com\\/.*\\.jpg|' + + 'http:\\/\\/asofterworld\\.com\\/.*\\.jpg|' + + 'http:\\/\\/www\\.qwantz\\.com\\/index\\.php\\?comic=.*|' + + 'http:\\/\\/23hq\\.com\\/.*\\/photo\\/.*|' + + 'http:\\/\\/www\\.23hq\\.com\\/.*\\/photo\\/.*|' + + 'http:\\/\\/.*dribbble\\.com\\/shots\\/.*|' + + 'http:\\/\\/drbl\\.in\\/.*|' + + 'http:\\/\\/.*\\.smugmug\\.com\\/.*|' + + 'http:\\/\\/.*\\.smugmug\\.com\\/.*\\#.*|' + + 'http:\\/\\/emberapp\\.com\\/.*\\/images\\/.*|' + + 'http:\\/\\/emberapp\\.com\\/.*\\/images\\/.*\\/sizes\\/.*|' + + 'http:\\/\\/emberapp\\.com\\/.*\\/collections\\/.*\\/.*|' + + 'http:\\/\\/emberapp\\.com\\/.*\\/categories\\/.*\\/.*\\/.*|' + + 'http:\\/\\/embr\\.it\\/.*|' + + 'http:\\/\\/picasaweb\\.google\\.com.*\\/.*\\/.*\\#.*|' + + 'http:\\/\\/picasaweb\\.google\\.com.*\\/lh\\/photo\\/.*|' + + 'http:\\/\\/picasaweb\\.google\\.com.*\\/.*\\/.*|' + + 'http:\\/\\/dailybooth\\.com\\/.*\\/.*|' + + 'http:\\/\\/brizzly\\.com\\/pic\\/.*|' + + 'http:\\/\\/pics\\.brizzly\\.com\\/.*\\.jpg|' + + 'http:\\/\\/img\\.ly\\/.*|' + + 'http:\\/\\/www\\.tinypic\\.com\\/view\\.php.*|' + + 'http:\\/\\/tinypic\\.com\\/view\\.php.*|' + + 'http:\\/\\/www\\.tinypic\\.com\\/player\\.php.*|' + + 'http:\\/\\/tinypic\\.com\\/player\\.php.*|' + + 'http:\\/\\/www\\.tinypic\\.com\\/r\\/.*\\/.*|' + + 'http:\\/\\/tinypic\\.com\\/r\\/.*\\/.*|' + + 'http:\\/\\/.*\\.tinypic\\.com\\/.*\\.jpg|' + + 'http:\\/\\/.*\\.tinypic\\.com\\/.*\\.png|' + + 'http:\\/\\/meadd\\.com\\/.*\\/.*|' + + 'http:\\/\\/meadd\\.com\\/.*|' + + 'http:\\/\\/.*\\.deviantart\\.com\\/art\\/.*|' + + 'http:\\/\\/.*\\.deviantart\\.com\\/gallery\\/.*|' + + 'http:\\/\\/.*\\.deviantart\\.com\\/\\#\\/.*|' + + 'http:\\/\\/fav\\.me\\/.*|' + + 'http:\\/\\/.*\\.deviantart\\.com|' + + 'http:\\/\\/.*\\.deviantart\\.com\\/gallery|' + + 'http:\\/\\/.*\\.deviantart\\.com\\/.*\\/.*\\.jpg|' + + 'http:\\/\\/.*\\.deviantart\\.com\\/.*\\/.*\\.gif|' + + 'http:\\/\\/.*\\.deviantart\\.net\\/.*\\/.*\\.jpg|' + + 'http:\\/\\/.*\\.deviantart\\.net\\/.*\\/.*\\.gif|' + + 'http:\\/\\/plixi\\.com\\/p\\/.*|' + + 'http:\\/\\/plixi\\.com\\/profile\\/home\\/.*|' + + 'http:\\/\\/plixi\\.com\\/.*|' + + 'http:\\/\\/www\\.fotopedia\\.com\\/.*\\/.*|' + + 'http:\\/\\/fotopedia\\.com\\/.*\\/.*|' + + 'http:\\/\\/photozou\\.jp\\/photo\\/show\\/.*\\/.*|' + + 'http:\\/\\/photozou\\.jp\\/photo\\/photo_only\\/.*\\/.*|' + + 'http:\\/\\/instagr\\.am\\/p\\/.*|' + + 'http:\\/\\/skitch\\.com\\/.*\\/.*\\/.*|' + + 'http:\\/\\/img\\.skitch\\.com\\/.*|' + + 'https:\\/\\/skitch\\.com\\/.*\\/.*\\/.*|' + + 'https:\\/\\/img\\.skitch\\.com\\/.*|' + + 'http:\\/\\/share\\.ovi\\.com\\/media\\/.*\\/.*|' + + 'http:\\/\\/www\\.questionablecontent\\.net\\/|' + + 'http:\\/\\/questionablecontent\\.net\\/|' + + 'http:\\/\\/www\\.questionablecontent\\.net\\/view\\.php.*|' + + 'http:\\/\\/questionablecontent\\.net\\/view\\.php.*|' + + 'http:\\/\\/questionablecontent\\.net\\/comics\\/.*\\.png|' + + 'http:\\/\\/www\\.questionablecontent\\.net\\/comics\\/.*\\.png|' + + 'http:\\/\\/picplz\\.com\\/user\\/.*\\/pic\\/.*\\/|' + + 'http:\\/\\/twitrpix\\.com\\/.*|' + + 'http:\\/\\/.*\\.twitrpix\\.com\\/.*|' + + 'http:\\/\\/www\\.someecards\\.com\\/.*\\/.*|' + + 'http:\\/\\/someecards\\.com\\/.*\\/.*|' + + 'http:\\/\\/some\\.ly\\/.*|' + + 'http:\\/\\/www\\.some\\.ly\\/.*|' + + 'http:\\/\\/pikchur\\.com\\/.*|' + + 'http:\\/\\/achewood\\.com\\/.*|' + + 'http:\\/\\/www\\.achewood\\.com\\/.*|' + + 'http:\\/\\/achewood\\.com\\/index\\.php.*|' + + 'http:\\/\\/www\\.achewood\\.com\\/index\\.php.*|' + + 'http:\\/\\/.*amazon\\..*\\/gp\\/product\\/.*|' + + 'http:\\/\\/.*amazon\\..*\\/.*\\/dp\\/.*|' + + 'http:\\/\\/.*amazon\\..*\\/dp\\/.*|' + + 'http:\\/\\/.*amazon\\..*\\/o\\/ASIN\\/.*|' + + 'http:\\/\\/.*amazon\\..*\\/gp\\/offer-listing\\/.*|' + + 'http:\\/\\/.*amazon\\..*\\/.*\\/ASIN\\/.*|' + + 'http:\\/\\/.*amazon\\..*\\/gp\\/product\\/images\\/.*|' + + 'http:\\/\\/www\\.amzn\\.com\\/.*|' + + 'http:\\/\\/amzn\\.com\\/.*|' + + 'http:\\/\\/www\\.shopstyle\\.com\\/browse.*|' + + 'http:\\/\\/www\\.shopstyle\\.com\\/action\\/apiVisitRetailer.*|' + + 'http:\\/\\/api\\.shopstyle\\.com\\/action\\/apiVisitRetailer.*|' + + 'http:\\/\\/www\\.shopstyle\\.com\\/action\\/viewLook.*|' + + 'http:\\/\\/soundcloud\\.com\\/.*|' + + 'http:\\/\\/soundcloud\\.com\\/.*\\/.*|' + + 'http:\\/\\/soundcloud\\.com\\/.*\\/sets\\/.*|' + + 'http:\\/\\/soundcloud\\.com\\/groups\\/.*|' + + 'http:\\/\\/snd\\.sc\\/.*|' + + 'http:\\/\\/www\\.last\\.fm\\/music\\/.*|' + + 'http:\\/\\/www\\.last\\.fm\\/music\\/+videos\\/.*|' + + 'http:\\/\\/www\\.last\\.fm\\/music\\/+images\\/.*|' + + 'http:\\/\\/www\\.last\\.fm\\/music\\/.*\\/_\\/.*|' + + 'http:\\/\\/www\\.last\\.fm\\/music\\/.*\\/.*|' + + 'http:\\/\\/www\\.mixcloud\\.com\\/.*\\/.*\\/|' + + 'http:\\/\\/www\\.radionomy\\.com\\/.*\\/radio\\/.*|' + + 'http:\\/\\/radionomy\\.com\\/.*\\/radio\\/.*|' + + 'http:\\/\\/www\\.entertonement\\.com\\/clips\\/.*|' + + 'http:\\/\\/www\\.rdio\\.com\\/\\#\\/artist\\/.*\\/album\\/.*|' + + 'http:\\/\\/www\\.rdio\\.com\\/artist\\/.*\\/album\\/.*|' + + 'http:\\/\\/www\\.zero-inch\\.com\\/.*|' + + 'http:\\/\\/.*\\.bandcamp\\.com\\/|' + + 'http:\\/\\/.*\\.bandcamp\\.com\\/track\\/.*|' + + 'http:\\/\\/.*\\.bandcamp\\.com\\/album\\/.*|' + + 'http:\\/\\/freemusicarchive\\.org\\/music\\/.*|' + + 'http:\\/\\/www\\.freemusicarchive\\.org\\/music\\/.*|' + + 'http:\\/\\/freemusicarchive\\.org\\/curator\\/.*|' + + 'http:\\/\\/www\\.freemusicarchive\\.org\\/curator\\/.*|' + + 'http:\\/\\/www\\.npr\\.org\\/.*\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.npr\\.org\\/.*\\/.*\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.npr\\.org\\/.*\\/.*\\/.*\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.npr\\.org\\/templates\\/story\\/story\\.php.*|' + + 'http:\\/\\/huffduffer\\.com\\/.*\\/.*|' + + 'http:\\/\\/www\\.audioboo\\.fm\\/boos\\/.*|' + + 'http:\\/\\/audioboo\\.fm\\/boos\\/.*|' + + 'http:\\/\\/boo\\.fm\\/b.*|' + + 'http:\\/\\/www\\.xiami\\.com\\/song\\/.*|' + + 'http:\\/\\/xiami\\.com\\/song\\/.*|' + + 'http:\\/\\/www\\.saynow\\.com\\/playMsg\\.html.*|' + + 'http:\\/\\/www\\.saynow\\.com\\/playMsg\\.html.*|' + + 'http:\\/\\/listen\\.grooveshark\\.com\\/s\\/.*|' + + 'http:\\/\\/radioreddit\\.com\\/songs.*|' + + 'http:\\/\\/www\\.radioreddit\\.com\\/songs.*|' + + 'http:\\/\\/radioreddit\\.com\\/\\?q=songs.*|' + + 'http:\\/\\/www\\.radioreddit\\.com\\/\\?q=songs.*|' + + 'http:\\/\\/espn\\.go\\.com\\/video\\/clip.*|' + + 'http:\\/\\/espn\\.go\\.com\\/.*\\/story.*|' + + 'http:\\/\\/abcnews\\.com\\/.*\\/video\\/.*|' + + 'http:\\/\\/abcnews\\.com\\/video\\/playerIndex.*|' + + 'http:\\/\\/washingtonpost\\.com\\/wp-dyn\\/.*\\/video\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.washingtonpost\\.com\\/wp-dyn\\/.*\\/video\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.boston\\.com\\/video.*|' + + 'http:\\/\\/boston\\.com\\/video.*|' + + 'http:\\/\\/www\\.facebook\\.com\\/photo\\.php.*|' + + 'http:\\/\\/www\\.facebook\\.com\\/video\\/video\\.php.*|' + + 'http:\\/\\/www\\.facebook\\.com\\/v\\/.*|' + + 'http:\\/\\/cnbc\\.com\\/id\\/.*\\?.*video.*|' + + 'http:\\/\\/www\\.cnbc\\.com\\/id\\/.*\\?.*video.*|' + + 'http:\\/\\/cnbc\\.com\\/id\\/.*\\/play\\/1\\/video\\/.*|' + + 'http:\\/\\/www\\.cnbc\\.com\\/id\\/.*\\/play\\/1\\/video\\/.*|' + + 'http:\\/\\/cbsnews\\.com\\/video\\/watch\\/.*|' + + 'http:\\/\\/www\\.google\\.com\\/buzz\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.google\\.com\\/buzz\\/.*|' + + 'http:\\/\\/www\\.google\\.com\\/profiles\\/.*|' + + 'http:\\/\\/google\\.com\\/buzz\\/.*\\/.*\\/.*|' + + 'http:\\/\\/google\\.com\\/buzz\\/.*|' + + 'http:\\/\\/google\\.com\\/profiles\\/.*|' + + 'http:\\/\\/www\\.cnn\\.com\\/video\\/.*|' + + 'http:\\/\\/edition\\.cnn\\.com\\/video\\/.*|' + + 'http:\\/\\/money\\.cnn\\.com\\/video\\/.*|' + + 'http:\\/\\/today\\.msnbc\\.msn\\.com\\/id\\/.*\\/vp\\/.*|' + + 'http:\\/\\/www\\.msnbc\\.msn\\.com\\/id\\/.*\\/vp\\/.*|' + + 'http:\\/\\/www\\.msnbc\\.msn\\.com\\/id\\/.*\\/ns\\/.*|' + + 'http:\\/\\/today\\.msnbc\\.msn\\.com\\/id\\/.*\\/ns\\/.*|' + + 'http:\\/\\/multimedia\\.foxsports\\.com\\/m\\/video\\/.*\\/.*|' + + 'http:\\/\\/msn\\.foxsports\\.com\\/video.*|' + + 'http:\\/\\/www\\.globalpost\\.com\\/video\\/.*|' + + 'http:\\/\\/www\\.globalpost\\.com\\/dispatch\\/.*|' + + 'http:\\/\\/guardian\\.co\\.uk\\/.*\\/video\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/www\\.guardian\\.co\\.uk\\/.*\\/video\\/.*\\/.*\\/.*\\/.*|' + + 'http:\\/\\/bravotv\\.com\\/.*\\/.*\\/videos\\/.*|' + + 'http:\\/\\/www\\.bravotv\\.com\\/.*\\/.*\\/videos\\/.*|' + + 'http:\\/\\/video\\.nationalgeographic\\.com\\/.*\\/.*\\/.*\\.html|' + + 'http:\\/\\/dsc\\.discovery\\.com\\/videos\\/.*|' + + 'http:\\/\\/animal\\.discovery\\.com\\/videos\\/.*|' + + 'http:\\/\\/health\\.discovery\\.com\\/videos\\/.*|' + + 'http:\\/\\/investigation\\.discovery\\.com\\/videos\\/.*|' + + 'http:\\/\\/military\\.discovery\\.com\\/videos\\/.*|' + + 'http:\\/\\/planetgreen\\.discovery\\.com\\/videos\\/.*|' + + 'http:\\/\\/science\\.discovery\\.com\\/videos\\/.*|' + + 'http:\\/\\/tlc\\.discovery\\.com\\/videos\\/.*|' + , re.I ) + api_endpoint = 'http://api.embed.ly/v1/api/oembed' api_params = {'format':'json', 'maxwidth':600 } diff --git a/r2/r2/lib/sgm.pyx b/r2/r2/lib/sgm.pyx index 4fe84fc735..ef05201155 100644 --- a/r2/r2/lib/sgm.pyx +++ b/r2/r2/lib/sgm.pyx @@ -1,7 +1,7 @@ # smart get multi: # For any keys not found in the cache, miss_fn() is run and the result is # stored in the cache. Then it returns everything, both the hits and misses. -def sgm(cache, keys, miss_fn, str prefix='', int time=0, _update=False): +def sgm(cache, keys, miss_fn, str prefix='', int time=0, stale=False, found_fn=None, _update=False): cdef dict ret cdef dict s_keys cdef dict cached @@ -21,16 +21,26 @@ def sgm(cache, keys, miss_fn, str prefix='', int time=0, _update=False): if _update: cached = {} else: - cached = cache.get_multi(s_keys.keys(), prefix=prefix) + if stale: + cached = cache.get_multi(s_keys.keys(), prefix=prefix, stale=stale) + else: + cached = cache.get_multi(s_keys.keys(), prefix=prefix) for k, v in cached.iteritems(): ret[s_keys[k]] = v - if miss_fn and len(cached) < len(s_keys): + still_need = set(s_keys.values()) - set(ret.keys()) + + if found_fn is not None: + # give the caller an opportunity to reject some of the cache + # hits if they aren't good enough. it's expected to use the + # mutability of the cached dict and still_need set to modify + # them as appropriate + found_fn(ret, still_need) + + if miss_fn and still_need: # if we didn't get all of the keys from the cache, go to the # miss_fn with the keys they asked for minus the ones that we # found - still_need = set(s_keys.values()) - set(ret.keys()) - calculated = miss_fn(still_need) ret.update(calculated) diff --git a/r2/r2/lib/sr_pops.py b/r2/r2/lib/sr_pops.py index a10c35609f..8dc6328615 100644 --- a/r2/r2/lib/sr_pops.py +++ b/r2/r2/lib/sr_pops.py @@ -42,7 +42,6 @@ def set_downs(): if c != sr._downs and c > 0: sr._downs = max(c, 0) sr._commit() - count.clear_sr_counts(names) def cache_lists(): def _chop(srs): @@ -61,7 +60,8 @@ def _chop(srs): # skip special system reddits like promos continue - if sr.type not in ('public', 'restricted'): + type = getattr(sr, 'type', 'private') + if type not in ('public', 'restricted'): # skips reddits that can't appear in the default list # because of permissions continue diff --git a/r2/r2/lib/strings.py b/r2/r2/lib/strings.py index 8babff2f97..0bc6a9b8b6 100644 --- a/r2/r2/lib/strings.py +++ b/r2/r2/lib/strings.py @@ -44,7 +44,7 @@ banned_by = "removed by %s", banned = "removed", reports = "reports: %d", - + # this accomodates asian languages which don't use spaces number_label = _("%(num)d %(thing)s"), @@ -57,7 +57,7 @@ # this accomodates asian languages which don't use spaces float_label = _("%(num)5.3f %(thing)s"), - # this is for Japanese which treats people counds differently + # this is for Japanese which treats people counts differently person_label = _("%(num)s %(persons)s"), firsttext = _("reddit is a source for what's new and popular online. vote on links that you like or dislike and help decide what's popular, or submit your own!"), @@ -72,7 +72,7 @@ cover_disclaim = _("(don't worry, it only takes a few seconds)"), legal = _("I understand and agree that registration on or use of this site constitutes agreement to its %(user_agreement)s and %(privacy_policy)s."), - + friends = _('to view reddit with only submissions from your friends, use [reddit.com/r/friends](%s)'), sr_created = _('your reddit has been created'), @@ -96,14 +96,14 @@ banned = _("you've been banned"), traffic = _("you can view traffic on a promoted link") ), - + sr_messages = dict( empty = _('you have not subscribed to any reddits.'), subscriber = _('below are the reddits you have subscribed to'), contributor = _('below are the reddits that you are an approved submitter on.'), moderator = _('below are the reddits that you have moderator access to.') ), - + sr_subscribe = _('click the `+frontpage` or `-frontpage` buttons to choose which reddits appear on your front page.'), searching_a_reddit = _('you\'re searching within the [%(reddit_name)s](%(reddit_link)s) reddit. '+ @@ -137,10 +137,17 @@ verified_quota_msg = _("You've submitted several links recently that haven't been doing very well. You'll have to wait a while before you can submit again, or [write to the moderators of this reddit](%(link)s) and ask for an exemption."), unverified_quota_msg = _("You haven't [verified your email address](%(link1)s); until you do, your submitting privileges will be severely limited. Please try again in an hour or verify your email address. If you'd like an exemption from this rule, please [write to the moderators of this reddit](%(link2)s)."), read_only_msg = _("reddit is in \"emergency read-only mode\" right now. :( you won't be able to log in. we're sorry, and are working frantically to fix the problem."), - lounge_msg = _("please grab a drink and join us in [the lounge](%(link)s)"), + lounge_msg = _("Please grab a drink and join us in [the lounge](%(link)s)."), postcard_msg = _("You sent us a postcard! (Or something similar.) When we run out of room on our refrigerator, we might one day auction off the stuff that people sent in. Is it okay if we include your thing?"), over_comment_limit = _("Sorry, the maximum number of comments is %(max)d. (However, if you subscribe to reddit gold, it goes up to %(goldmax)d.)"), over_comment_limit_gold = _("Sorry, the maximum number of comments is %d."), + youve_got_gold = _("%(sender)s just sent you %(amount)s of reddit gold! Wasn't that nice?"), + giftgold_note = _("Here's a note that was included:\n\n----\n\n"), + gold_summary_autorenew = _("You're about to set up an ongoing, autorenewing subscription to reddit gold for yourself (%(user)s)."), + gold_summary_onetime = _("You're about to make a one-time purchase of %(amount)s of reddit gold for yourself (%(user)s)."), + gold_summary_creddits = _("You're about to purchase %(amount)s of reddit gold creddits. They work like gift certificates: each creddit you have will allow you to give one month of reddit gold to someone else."), + gold_summary_signed_gift = _("You're about to give %(amount)s of reddit gold to %(recipient)s, who will be told that it came from you."), + gold_summary_anonymous_gift = _("You're about to give %(amount)s of reddit gold to %(recipient)s. It will be an anonymous gift."), ) class StringHandler(object): @@ -156,7 +163,7 @@ def __getitem__(self, attr): return self.__getattr__(attr) except AttributeError: raise KeyError - + def __getattr__(self, attr): rval = self.string_dict[attr] if isinstance(rval, (str, unicode)): @@ -202,19 +209,20 @@ def __getattr__(self, attr): plurals = PluralManager([P_("comment", "comments"), P_("point", "points"), - + # things P_("link", "links"), P_("comment", "comments"), P_("message", "messages"), P_("subreddit", "subreddits"), - + P_("creddit", "creddits"), + # people P_("reader", "readers"), P_("subscriber", "subscribers"), P_("approved submitter", "approved submitters"), P_("moderator", "moderators"), - + # time words P_("milliseconds","milliseconds"), P_("second", "seconds"), @@ -239,7 +247,7 @@ def points(x): @staticmethod def safepoints(x): - return strings.points_label % dict(num=max(x,0), + return strings.points_label % dict(num=max(x,0), point=plurals.N_points(x)) @staticmethod @@ -250,11 +258,17 @@ def _people(x, label): @staticmethod def subscribers(x): - return Score._people(x, plurals.N_subscribers) + return Score._people(x, plurals.N_subscribers) @staticmethod def readers(x): - return Score._people(x, plurals.N_readers) + return Score._people(x, plurals.N_readers) + + @staticmethod + def somethings(x, word): + p = plurals.string_dict[word] + f = lambda x: ungettext(p[0], p[1], x) + return strings.number_label % dict(num=x, thing=f(x)) @staticmethod def none(x): @@ -288,7 +302,7 @@ class RandomString(object): def __init__(self, description, num): self.desc = description self.num = num - + def get(self, quantity = 0): """Generates a list of 'quantity' random strings. If quantity < self.num, the entries are guaranteed to be unique.""" @@ -310,7 +324,7 @@ def _trans_string(self, n): def __iter__(self): for i in xrange(self.num): yield self._trans_string(i) - + class RandomStringManager(object): """class for keeping randomized translatable strings organized. diff --git a/r2/r2/lib/tracking.py b/r2/r2/lib/tracking.py index fd0b40a6ee..d14b0cd89b 100644 --- a/r2/r2/lib/tracking.py +++ b/r2/r2/lib/tracking.py @@ -136,10 +136,15 @@ class UserInfo(Info): _tracked = ['name', 'site', 'lang', 'cname'] tracker_url = g.tracker_url - def init_defaults(self): - self.name = safe_str(c.user.name if c.user_is_loggedin else '') - self.site = safe_str(c.site.name if c.site else '') - action = "" + @staticmethod + def get_site(): + return safe_str(c.site.name if c.site else '') + + @staticmethod + def get_srpath(): + name = UserInfo.get_site() + + action = None if c.render_style in ("mobile", "compact"): action = c.render_style else: @@ -147,12 +152,20 @@ def init_defaults(self): action = request.environ['pylons.routes_dict'].get('action') except Exception,e: g.log.error(e) - if action: - self.site += "-%s" % action + if not action: + return name + return '-'.join((name, action)) + + def init_defaults(self): + self.name = safe_str(c.user.name if c.user_is_loggedin else '') + self.site = UserInfo.get_srpath() self.lang = safe_str(c.lang if c.lang else '') self.cname = safe_str(c.cname) +class UserInfoNew(UserInfo): + tracker_url = g.newtracker_url + class PromotedLinkInfo(Info): _tracked = [] tracker_url = g.adtracker_url diff --git a/r2/r2/lib/translation.py b/r2/r2/lib/translation.py index c999914d4e..87a2c791e4 100644 --- a/r2/r2/lib/translation.py +++ b/r2/r2/lib/translation.py @@ -72,11 +72,11 @@ def set_lang(lang, graceful_fail = False, **kwargs): registry.replace(pylons.translator, translator) -comment = re.compile(r'^\s*#') -msgid = re.compile(r'^\s*msgid\s+"') -msgid_pl = re.compile(r'^\s*msgid_plural\s+"') -msgstr = re.compile(r'^\s*msgstr(\[\d\])?\s+"') -str_only = re.compile(r'^\s*"') +comment = re.compile(r'\A\s*#') +msgid = re.compile(r'\A\s*msgid\s+"') +msgid_pl = re.compile(r'\A\s*msgid_plural\s+"') +msgstr = re.compile(r'\A\s*msgstr(\[\d\])?\s+"') +str_only = re.compile(r'\A\s*"') substr = re.compile("(%(\([^\)]+\))?([\d\.]+)?[a-zA-Z])") diff --git a/r2/r2/lib/utils/thing_utils.py b/r2/r2/lib/utils/thing_utils.py index dc441d7195..b9bcb7885d 100644 --- a/r2/r2/lib/utils/thing_utils.py +++ b/r2/r2/lib/utils/thing_utils.py @@ -56,6 +56,7 @@ def last_modified_multi(things, action): def set_last_visit(thing): from pylons import g + from r2.lib.cache import CL_ONE key = last_modified_key(thing, "visit") g.permacache.set(key, make_last_modified()) diff --git a/r2/r2/lib/utils/utils.py b/r2/r2/lib/utils/utils.py index 019ef662b4..326e56be46 100644 --- a/r2/r2/lib/utils/utils.py +++ b/r2/r2/lib/utils/utils.py @@ -255,9 +255,9 @@ def get_title(url): except: return None - -valid_schemes = ('http', 'https', 'ftp', 'mailto') -valid_dns = re.compile('^[-a-zA-Z0-9]+$') + +valid_schemes = ('http', 'https', 'ftp', 'mailto') +valid_dns = re.compile('\A[-a-zA-Z0-9]+\Z') def sanitize_url(url, require_scheme = False): """Validates that the url is of the form @@ -758,6 +758,8 @@ def fetch_things2(query, chunk_size = 100, batch_fn = None, chunks = False): if len(items) < chunk_size: done = True + after = items[-1] + if batch_fn: items = batch_fn(items) @@ -766,7 +768,6 @@ def fetch_things2(query, chunk_size = 100, batch_fn = None, chunks = False): else: for i in items: yield i - after = items[-1] if not done: query._rules = deepcopy(orig_rules) @@ -774,12 +775,12 @@ def fetch_things2(query, chunk_size = 100, batch_fn = None, chunks = False): items = list(query) def fix_if_broken(thing, delete = True): - from r2.models import Link, Comment, Subreddit + from r2.models import Link, Comment, Subreddit, Message # the minimum set of attributes that are required attrs = dict((cls, cls._essentials) for cls - in (Link, Comment, Subreddit)) + in (Link, Comment, Subreddit, Message)) if thing.__class__ not in attrs: raise TypeError @@ -790,6 +791,11 @@ def fix_if_broken(thing, delete = True): getattr(thing, attr) except AttributeError: # that failed; let's explicitly load it and try again + + # we don't have g + print "You might want to try this:" + print " g.memcache.delete('%s')" % thing._cache_key() + thing._load() try: getattr(thing, attr) @@ -897,12 +903,15 @@ def title_to_url(title, max_length = 50): title = title[:last_word] return title or "_" -def trace(fn): +def dbg(s): import sys + sys.stderr.write('%s\n' % (s,)) + +def trace(fn): def new_fn(*a,**kw): ret = fn(*a,**kw) - sys.stderr.write("Fn: %s; a=%s; kw=%s\nRet: %s\n" - % (fn,a,kw,ret)) + dbg("Fn: %s; a=%s; kw=%s\nRet: %s" + % (fn,a,kw,ret)) return ret return new_fn @@ -1068,7 +1077,7 @@ def in_chunks(it, size=25): if chunk: yield chunk -r_subnet = re.compile("^(\d+\.\d+)\.\d+\.\d+$") +r_subnet = re.compile("\A(\d+\.\d+)\.\d+\.\d+\Z") def ip_and_slash16(req): ip = req.ip @@ -1131,10 +1140,11 @@ def spaceout(items, targetseconds, if sleeptime > 0: sleep(sleeptime) -def progress(it, verbosity=100, key=repr, estimate=None, persec=False): +def progress(it, verbosity=100, key=repr, estimate=None, persec=True): """An iterator that yields everything from `it', but prints progress information along the way, including time-estimates if possible""" + from itertools import islice from datetime import datetime import sys @@ -1148,46 +1158,121 @@ def progress(it, verbosity=100, key=repr, estimate=None, persec=False): except: pass + def timedelta_to_seconds(td): + return td.days * (24*60*60) + td.seconds + (float(td.microseconds) / 1000000) + def format_timedelta(td, sep=''): + ret = [] + s = timedelta_to_seconds(td) + if s < 0: + neg = True + s *= -1 + else: + neg = False + + if s >= (24*60*60): + days = int(s//(24*60*60)) + ret.append('%dd' % days) + s -= days*(24*60*60) + if s >= 60*60: + hours = int(s//(60*60)) + ret.append('%dh' % hours) + s -= hours*(60*60) + if s >= 60: + minutes = int(s//60) + ret.append('%dm' % minutes) + s -= minutes*60 + if s >= 1: + seconds = int(s) + ret.append('%ds' % seconds) + s -= seconds + + if not ret: + return '0s' + + return ('-' if neg else '') + sep.join(ret) + def format_datetime(dt, show_date=False): + if show_date: + return dt.strftime('%Y-%m-%d %H:%M') + else: + return dt.strftime('%H:%M:%S') + def deq(dt1, dt2): + "Indicates whether the two datetimes' dates describe the same (day,month,year)" + d1, d2 = dt1.date(), dt2.date() + return ( d1.day == d2.day + and d1.month == d2.month + and d1.year == d2.year) + sys.stderr.write('Starting at %s\n' % (start,)) + # we're going to islice it so we need to start an iterator + it = iter(it) + seen = 0 - for item in it: - seen += 1 - if seen % verbosity == 0: - now = datetime.now() - elapsed = now - start - elapsed_seconds = elapsed.days * 86400 + elapsed.seconds - - if estimate: - remaining = ((elapsed/seen)*estimate)-elapsed - completion = now + remaining - count_str = ('%d/%d %.2f%%' - % (seen, estimate, float(seen)/estimate*100)) - estimate_str = (' (%s remaining; completion %s)' - % (remaining, completion)) - else: - count_str = '%d' % seen - estimate_str = '' + while True: + this_chunk = 0 + thischunk_started = datetime.now() + + # the simple bit: just iterate and yield + for item in islice(it, verbosity): + this_chunk += 1 + seen += 1 + yield item + + if this_chunk < verbosity: + # we're done, the iterator is empty + break - if key: - key_str = ': %s' % key(item) - else: - key_str = '' + now = datetime.now() + elapsed = now - start + thischunk_seconds = timedelta_to_seconds(now - thischunk_started) + + if estimate: + # the estimate is based on the total number of items that + # we've processed in the total amount of time that's + # passed, so it should smooth over momentary spikes in + # speed (but will take a while to adjust to long-term + # changes in speed) + remaining = ((elapsed/seen)*estimate)-elapsed + completion = now + remaining + count_str = ('%d/%d %.2f%%' + % (seen, estimate, float(seen)/estimate*100)) + completion_str = format_datetime(completion, not deq(completion,now)) + estimate_str = (' (%s remaining; completion %s)' + % (format_timedelta(remaining), + completion_str)) + else: + count_str = '%d' % seen + estimate_str = '' - if persec and elapsed_seconds > 0: - persec_str = ' (%.2f/s)' % (float(seen)/elapsed_seconds,) - else: - persec_str = '' + if key: + key_str = ': %s' % key(item) + else: + key_str = '' - sys.stderr.write('%s%s, %s%s%s\n' - % (count_str, persec_str, - elapsed, estimate_str, key_str)) - this_chunk = 0 - yield item + # unlike the estimate, the persec count is the number per + # second for *this* batch only, without smoothing + if persec and thischunk_seconds > 0: + persec_str = ' (%.1f/s)' % (float(this_chunk)/thischunk_seconds,) + else: + persec_str = '' + + sys.stderr.write('%s%s, %s%s%s\n' + % (count_str, persec_str, + format_timedelta(elapsed), estimate_str, key_str)) now = datetime.now() elapsed = now - start - sys.stderr.write('Processed %d items in %s..%s (%s)\n' % (seen, start, now, elapsed)) + elapsed_seconds = timedelta_to_seconds(elapsed) + if persec and seen > 0 and elapsed_seconds > 0: + persec_str = ' (@%.1f/sec)' % (float(seen)/elapsed_seconds) + else: + persec_str = '' + sys.stderr.write('Processed %d%s items in %s..%s (%s)\n' + % (seen, + persec_str, + format_datetime(start, not deq(start, now)), + format_datetime(now, not deq(start, now)), + format_timedelta(elapsed))) class Hell(object): def __str__(self): diff --git a/r2/r2/lib/wrapped.pyx b/r2/r2/lib/wrapped.pyx index ee065309f5..9092c1377b 100644 --- a/r2/r2/lib/wrapped.pyx +++ b/r2/r2/lib/wrapped.pyx @@ -361,6 +361,10 @@ class Templated(object): return res + def _cache_key(self, key): + return 'render_%s(%s)' % (self.__class__.__name__, + md5(key).hexdigest()) + def _write_cache(self, keys): from pylons import g @@ -369,7 +373,7 @@ class Templated(object): toset = {} for key, val in keys.iteritems(): - toset[md5(key).hexdigest()] = val + toset[self._cache_key(key)] = val g.rendercache.set_multi(toset) @@ -378,7 +382,7 @@ class Templated(object): ekeys = {} for key in keys: - ekeys[md5(key).hexdigest()] = key + ekeys[self._cache_key(key)] = key found = g.rendercache.get_multi(ekeys) ret = {} for fkey, val in found.iteritems(): diff --git a/r2/r2/models/_builder.pyx b/r2/r2/models/_builder.pyx index de406c2f44..bb6332d0f9 100644 --- a/r2/r2/models/_builder.pyx +++ b/r2/r2/models/_builder.pyx @@ -1,6 +1,6 @@ from builder import Builder, MAX_RECURSION, empty_listing from r2.lib.wrapped import Wrapped -from r2.lib.comment_tree import link_comments, link_comments_and_sort, tree_sort_fn +from r2.lib.comment_tree import link_comments, link_comments_and_sort, tree_sort_fn, MAX_ITERATIONS from r2.models.link import * from r2.lib.db import operators from r2.lib import utils @@ -122,7 +122,7 @@ class _CommentBuilder(Builder): # items is a list of things we actually care about so load them - items = Comment._byID(items, data = True, return_dict = False) + items = Comment._byID(items, data = True, return_dict = False, stale=self.stale) cdef list wrapped = self.wrap_items(items) @@ -168,16 +168,27 @@ class _CommentBuilder(Builder): #put the remaining comments into the tree (the show more comments link) cdef dict more_comments = {} + cdef int iteration_count = 0 + cdef int parentfinder_iteration_count while candidates: + if iteration_count > MAX_ITERATIONS: + raise Exception("bad comment tree for link %s" % + self.link._id36) + to_add = candidates.pop(0) direct_child = True #ignore top-level comments for now p_id = parents[to_add] #find the parent actually being displayed #direct_child is whether the comment is 'top-level' + parentfinder_iteration_count = 0 while p_id and not cids.has_key(p_id): + if parentfinder_iteration_count > MAX_ITERATIONS: + raise Exception("bad comment tree in link %s" % + self.link._id36) p_id = parents[p_id] direct_child = False + parentfinder_iteration_count += 1 mc2 = more_comments.get(p_id) if not mc2: @@ -203,6 +214,7 @@ class _CommentBuilder(Builder): mc2.children.append(to_add) mc2.count += 1 + iteration_count += 1 return final @@ -229,6 +241,20 @@ class _MessageBuilder(Builder): def _tree_filter(self, x): return tree_sort_fn(x) < self.after._id + def _viewable_message(self, m): + if (c.user_is_admin + or getattr(m, "author_id", 0) == c.user._id + or getattr(m, "to_id", 0) == c.user._id): + return True + + # m is wrapped at this time, so it should have an SR + subreddit = getattr(m, "subreddit", None) + if subreddit and subreddit.is_moderator(c.user): + return True + + return False + + def get_items(self): tree = self.get_tree() @@ -264,6 +290,9 @@ class _MessageBuilder(Builder): messages = Message._byID(message_ids, data = True, return_dict = False) wrapped = {} for m in self.wrap_items(messages): + if not self._viewable_message(m): + raise ValueError("%r is not viewable by %s; path is %s" % + (m, c.user.name, request.fullpath)) wrapped[m._id] = m if prev: @@ -304,6 +333,7 @@ class _MessageBuilder(Builder): child.focal = True else: child.collapsed = child.is_collapsed + parent.child.things.append(child) parent.is_parent = True # the parent might be the focal message on a permalink page diff --git a/r2/r2/models/account.py b/r2/r2/models/account.py index 4792afb4cf..b02dd77229 100644 --- a/r2/r2/models/account.py +++ b/r2/r2/models/account.py @@ -39,7 +39,7 @@ class Account(Thing): _data_int_props = Thing._data_int_props + ('link_karma', 'comment_karma', 'report_made', 'report_correct', 'report_ignored', 'spammer', - 'reported') + 'reported', 'gold_creddits') _int_prop_suffix = '_karma' _essentials = ('name', ) _defaults = dict(pref_numsites = 25, @@ -67,7 +67,8 @@ class Account(Thing): pref_collapse_read_messages = False, pref_private_feeds = True, pref_show_adbox = True, - pref_show_sponsors = True, + pref_show_sponsors = True, # sponsored links + pref_show_sponsorships = True, pref_highlight_new_comments = True, mobile_compress = False, mobile_thumbnail = True, @@ -88,7 +89,8 @@ class Account(Thing): pref_show_promote = None, gold = False, gold_charter = False, - creddits = 0, + gold_creddits = 0, + gold_creddit_escrow = 0, ) def karma(self, kind, sr = None): diff --git a/r2/r2/models/admintools.py b/r2/r2/models/admintools.py index ebfe6a87f3..1acf70357e 100644 --- a/r2/r2/models/admintools.py +++ b/r2/r2/models/admintools.py @@ -212,6 +212,9 @@ def degolden(self, account, severe=False): sr = Subreddit._by_name(g.lounge_reddit) sr.remove_contributor(account) + def admin_list(self): + return list(g.admins) + admintools = AdminTools() def cancel_subscription(subscr_id): @@ -264,7 +267,7 @@ def update_gold_users(verbose=False): print "%s just expired" % account.name admintools.degolden(account) send_system_message(account, "Your reddit gold subscription has expired. :(", - "Your subscription to reddit gold has expired. [Click here for details on how to renew, or to set up an automatically-renewing subscription.](http://www.reddit.com/help/gold) Or, if you don't want to, please write to us and tell us where we let you down, so we can work on fixing the problem.\n\nThis is a system account whose mail we don't read very often, so please address all feedback to 912@reddit.com.") + "Your subscription to reddit gold has expired. [Click here for details on how to renew, or to set up an automatically-renewing subscription.](http://www.reddit.com/gold) Or, if you don't want to, please write to us at 912@reddit.com and tell us where we let you down, so we can work on fixing the problem.") continue count += 1 @@ -290,7 +293,7 @@ def update_gold_users(verbose=False): print "Sending notice to %s" % account.name g.hardcache.set(hc_key, True, 86400 * 10) send_system_message(account, "Your reddit gold subscription is about to expire!", - "Your subscription to reddit gold will be expiring soon. [Click here for details on how to renew, or to set up an automatically-renewing subscription.](http://www.reddit.com/help/gold) Or, if you think we suck, just let your subscription lapse and go back to being a regular user.\n\nBy the way, this is a system account whose mail we don't read very often, so if you need to reply, please write to 912@reddit.com.") + "Your subscription to reddit gold will be expiring soon. [Click here for details on how to renew, or to set up an automatically-renewing subscription.](http://www.reddit.com/gold) Or, if you think we suck, just let your subscription lapse and go back to being a regular user.\n\nIf you have any questions, please write to 912@reddit.com.") if verbose: for exp_date in sorted(expiration_dates.keys()): @@ -306,9 +309,12 @@ def update_gold_users(verbose=False): def is_banned_IP(ip): return False -def is_banned_domain(dom): +def is_banned_domain(dom, ip): return None +def valid_vote(thing): + return True + def valid_thing(v, karma, *a, **kw): return not v._thing1._spam diff --git a/r2/r2/models/builder.py b/r2/r2/models/builder.py index 211476d5dc..238168c709 100644 --- a/r2/r2/models/builder.py +++ b/r2/r2/models/builder.py @@ -45,7 +45,8 @@ MAX_RECURSION = 10 class Builder(object): - def __init__(self, wrap = Wrapped, keep_fn = None): + def __init__(self, wrap = Wrapped, keep_fn = None, stale = True): + self.stale = stale self.wrap = wrap self.keep_fn = keep_fn @@ -72,14 +73,14 @@ def wrap_items(self, items): email_attrses = {} friend_rels = None if aids: - authors = Account._byID(aids, True) if aids else {} + authors = Account._byID(aids, data=True, stale=self.stale) if aids else {} cup_infos = Account.cup_info_multi(aids) if c.user_is_admin: email_attrses = admintools.email_attrs(aids, return_dict=True) if user and user.gold: friend_rels = user.friend_rels() - subreddits = Subreddit.load_subreddits(items) + subreddits = Subreddit.load_subreddits(items, stale=self.stale) if not user: can_ban_set = set() @@ -448,7 +449,7 @@ def fetch_more(self, last_item, num_have): done = True self.names, new_names = names[slice_size:], names[:slice_size] - new_items = Thing._by_fullname(new_names, data = True, return_dict=False) + new_items = Thing._by_fullname(new_names, data = True, return_dict=False, stale=self.stale) return done, new_items class SearchBuilder(IDBuilder): @@ -471,6 +472,7 @@ def keep_item(self,item): # doesn't use the default keep_item because we want to keep # things that were voted on, even if they've chosen to hide # them in normal listings + # TODO: Consider a flag to disable this (and see listingcontroller.py) if item._spam or item._deleted: return False else: diff --git a/r2/r2/models/gold.py b/r2/r2/models/gold.py index c067e8aa2e..0a1503f156 100644 --- a/r2/r2/models/gold.py +++ b/r2/r2/models/gold.py @@ -31,6 +31,7 @@ from r2.lib.utils import tup, randstr from httplib import HTTPSConnection from urlparse import urlparse +from time import time import socket, base64 from BeautifulSoup import BeautifulStoneSoup @@ -47,7 +48,7 @@ # status can be: invalid, unclaimed, claimed sa.Column('status', sa.String, nullable = False), sa.Column('date', sa.DateTime(timezone=True), - nullable = False, + nullable = False, default = sa.func.now()), sa.Column('payer_email', sa.String, nullable = False), sa.Column('paying_id', sa.String, nullable = False), @@ -68,7 +69,7 @@ def create_unclaimed_gold (trans_id, payer_email, paying_id, pennies, days, secret, date, subscr_id = None): - + try: gold_table.insert().execute(trans_id=str(trans_id), subscr_id=subscr_id, @@ -102,7 +103,17 @@ def notify_unclaimed_gold(txn_id, gold_secret, payer_email, source): # No point in i18n, since we don't have access to the user's # language info (or name) at this point - body = """ + if gold_secret.startswith("cr_"): + body = """ +Thanks for buying reddit gold gift creddits! We have received your %s +transaction, number %s. + +Your secret claim code is %s. To associate the +creddits with your reddit account, just visit +%s +""" % (source, txn_id, gold_secret, url) + else: + body = """ Thanks for subscribing to reddit gold! We have received your %s transaction, number %s. @@ -116,10 +127,10 @@ def notify_unclaimed_gold(txn_id, gold_secret, payer_email, source): def create_claimed_gold (trans_id, payer_email, paying_id, pennies, days, secret, account_id, date, - subscr_id = None): + subscr_id = None, status="claimed"): gold_table.insert().execute(trans_id=trans_id, subscr_id=subscr_id, - status="claimed", + status=status, payer_email=payer_email, paying_id=paying_id, pennies=pennies, @@ -128,6 +139,18 @@ def create_claimed_gold (trans_id, payer_email, paying_id, account_id=account_id, date=date) +def create_gift_gold (giver_id, recipient_id, days, date, signed): + trans_id = "X%d%s-%s" % (int(time()), randstr(2), 'S' if signed else 'A') + + gold_table.insert().execute(trans_id=trans_id, + status="gift", + paying_id=giver_id, + payer_email='', + pennies=0, + days=days, + account_id=recipient_id, + date=date) + # returns None if the ID was never valid # returns "already claimed" if it's already been claimed # Otherwise, it's valid and the function claims it, returning a tuple with: @@ -249,6 +272,15 @@ def process_google_transaction(trans_id): # get the financial details auth = trans.find("authorization-amount-notification") + + # creddits? + is_creddits = False + cart = trans.find("shopping-cart") + if cart: + for item in cart.findAll("item-name"): + if "creddit" in item.contents[0]: + is_creddits = True + break if not auth: # see if the payment was declinded @@ -268,7 +300,13 @@ def process_google_transaction(trans_id): days = None try: pennies = int(float(auth.find("order-total").contents[0])*100) - if pennies == 2999: + if is_creddits: + secret = "cr_" + if pennies >= 2999: + days = 12 * 31 * int(pennies / 2999) + else: + days = 31 * int(pennies / 399) + elif pennies == 2999: secret = "ys_" days = 366 elif pennies == 399: @@ -280,9 +318,9 @@ def process_google_transaction(trans_id): sa.and_(gold_table.c.status == 'uncharged', gold_table.c.trans_id == 'g' + str(trans_id)), values = { gold_table.c.status : "strange", - gold_table.c.pennies : pennies, + gold_table.c.pennies : pennies, gold_table.c.payer_email : email, - gold_table.c.paying_id : payer_id + gold_table.c.paying_id : payer_id }).execute() return except ValueError: diff --git a/r2/r2/models/link.py b/r2/r2/models/link.py index 2f781706a0..a9a7c300cb 100644 --- a/r2/r2/models/link.py +++ b/r2/r2/models/link.py @@ -67,53 +67,49 @@ class Link(Thing, Printable): def __init__(self, *a, **kw): Thing.__init__(self, *a, **kw) - @classmethod - def by_url_key(cls, url): - maxlen = 250 - template = 'byurl(%s,%s)' - keyurl = _force_utf8(UrlParser.base_url(url.lower())) - hexdigest = md5(keyurl).hexdigest() - usable_len = maxlen-len(template)-len(hexdigest) - return template % (hexdigest, keyurl[:usable_len]) - @classmethod def _by_url(cls, url, sr): from subreddit import FakeSubreddit if isinstance(sr, FakeSubreddit): sr = None - url = cls.by_url_key(url) - link_ids = g.urlcache.get(url) - if link_ids: - links = Link._byID(link_ids, data = True, return_dict = False) - links = [l for l in links if not l._deleted] + try: + lbu = LinksByUrl._byID(LinksByUrl._key_from_url(url)) + except tdb_cassandra.NotFound: + # translate the tdb_cassandra.NotFound into the NotFound + # the caller is expecting + raise NotFound('Link "%s"' % url) - if links and sr: - for link in links: - if sr._id == link.sr_id: - return link - elif links: - return links + link_id36s = lbu._values() - raise NotFound, 'Link "%s"' % url + links = Link._byID36(link_id36s, data = True, return_dict = False) + links = [l for l in links if not l._deleted] + + if links and sr: + for link in links: + if sr._id == link.sr_id: + # n.b. returns the first one if there are multiple + return link + elif links: + return links + + raise NotFound('Link "%s"' % url) def set_url_cache(self): if self.url != 'self': - key = self.by_url_key(self.url) - link_ids = g.urlcache.get(key) or [] - if self._id not in link_ids: - link_ids.append(self._id) - g.urlcache.set(key, link_ids) + LinksByUrl._set_values(LinksByUrl._key_from_url(self.url), + {self._id36: self._id36}) def update_url_cache(self, old_url): """Remove the old url from the by_url cache then update the cache with the new url.""" if old_url != 'self': - key = self.by_url_key(old_url) - link_ids = g.urlcache.get(key) or [] - while self._id in link_ids: - link_ids.remove(self._id) - g.urlcache.set(key, link_ids) + try: + lbu = LinksByUrl._key_from_url(old_url) + del lbu[self._id36] + lbu._commit() + except tdb_cassandra.NotFound: + pass self.set_url_cache() @property @@ -126,13 +122,13 @@ def resubmit_link(self, sr_url = False): return submit_url @classmethod - def _submit(cls, title, url, author, sr, ip): + def _submit(cls, title, url, author, sr, ip, spam=False): from r2.models import admintools l = cls(_ups = 1, title = title, url = url, - _spam = author._spam, + _spam = spam, author_id = author._id, sr_id = sr._id, lang = sr.lang, @@ -169,9 +165,13 @@ def _saved(cls, user, link): return cls._somethinged(SaveHide, user, link, 'save') def _save(self, user): + # dual-write CassandraSaves + CassandraSave._save(user, self) return self._something(SaveHide, user, self._saved, 'save') def _unsave(self, user): + # dual-write CassandraSaves + CassandraSave._unsave(user, self) return self._unsomething(user, self._saved, 'save') @classmethod @@ -186,9 +186,11 @@ def _hidden(cls, user, link): return cls._somethinged(SaveHide, user, link, 'hide') def _hide(self, user): + CassandraHide._hide(user, self) return self._something(SaveHide, user, self._hidden, 'hide') def _unhide(self, user): + CassandraHide._unhide(user, self) return self._unsomething(user, self._hidden, 'hide') def link_domain(self): @@ -306,12 +308,20 @@ def add_props(cls, user, wrapped): cname = c.cname site = c.site - saved = Link._saved(user, wrapped) if user_is_loggedin else {} - hidden = Link._hidden(user, wrapped) if user_is_loggedin else {} - trials = trial_info(wrapped) + if user_is_loggedin: + saved_lu = [] + for item in wrapped: + if not SaveHide._can_skip_lookup(user, item): + saved_lu.append(item._id36) + + saved = CassandraSave._fast_query(user._id36, saved_lu) + hidden = CassandraHide._fast_query(user._id36, saved_lu) + + clicked = {} + else: + saved = hidden = clicked = {} - #clicked = Link._clicked(user, wrapped) if user else {} - clicked = {} + trials = trial_info(wrapped) for item in wrapped: show_media = False @@ -344,7 +354,7 @@ def add_props(cls, user, wrapped): item.thumbnail = thumbnail_url(item) elif user.pref_no_profanity and item.over_18 and not c.site.over_18: if show_media: - item.thumbnail = "/static/nsfw.png" + item.thumbnail = "/static/nsfw2.png" else: item.thumbnail = "" elif not show_media: @@ -364,15 +374,24 @@ def add_props(cls, user, wrapped): item.domain = (domain(item.url) if not item.is_self else 'self.' + item.subreddit.name) item.urlprefix = '' - item.saved = bool(saved.get((user, item, 'save'))) - item.hidden = bool(hidden.get((user, item, 'hide'))) - item.clicked = bool(clicked.get((user, item, 'click'))) + + if user_is_loggedin: + item.saved = (user._id36, item._id36) in saved + item.hidden = (user._id36, item._id36) in hidden + + item.clicked = bool(clicked.get((user, item, 'click'))) + else: + item.saved = item.hidden = item.clicked = False + item.num = None item.permalink = item.make_permalink(item.subreddit) if item.is_self: - item.url = item.make_permalink(item.subreddit, + item.url = item.make_permalink(item.subreddit, force_domain = True) + if g.shortdomain: + item.shortlink = g.shortdomain + '/' + item._id36 + # do we hide the score? if user_is_admin: item.hide_score = False @@ -489,6 +508,14 @@ def subreddit_slow(self): when possible. """ return Subreddit._byID(self.sr_id, True, return_dict = False) +class LinksByUrl(tdb_cassandra.View): + _use_db = True + + @classmethod + def _key_from_url(cls, url): + keyurl = _force_utf8(UrlParser.base_url(url.lower())) + return keyurl + # Note that there are no instances of PromotedLink or LinkCompressed, # so overriding their methods here will not change their behaviour # (except for add_props). These classes are used to override the @@ -619,7 +646,7 @@ def add_props(cls, user, wrapped): #fetch parent links links = Link._byID(set(l.link_id for l in wrapped), data = True, - return_dict = True) + return_dict = True, stale=True) #get srs for comments that don't have them (old comments) for cm in wrapped: @@ -627,14 +654,14 @@ def add_props(cls, user, wrapped): cm.sr_id = links[cm.link_id].sr_id subreddits = Subreddit._byID(set(cm.sr_id for cm in wrapped), - data=True,return_dict=False) + data=True, return_dict=False, stale=True) cids = dict((w._id, w) for w in wrapped) parent_ids = set(cm.parent_id for cm in wrapped if getattr(cm, 'parent_id', None) and cm.parent_id not in cids) parents = {} if parent_ids: - parents = Comment._byID(parent_ids, data=True) + parents = Comment._byID(parent_ids, data=True, stale=True) can_reply_srs = set(s._id for s in subreddits if s.can_comment(user)) \ if c.user_is_loggedin else set() @@ -836,6 +863,7 @@ class Message(Thing, Printable): new = False, first_message = None, to_id = None, sr_id = None, to_collapse = None, author_collapse = None) _data_int_props = Thing._data_int_props + ('reported', ) + _essentials = ('author_id',) cache_ignore = set(["to", "subreddit"]).union(Printable.cache_ignore) @classmethod @@ -891,6 +919,7 @@ def _new(cls, author, to, subject, body, ip, parent = None, sr = None): elif sr.is_moderator(author): m.distinguished = 'yes' m._commit() + # if there is a "to" we may have to create an inbox relation as well # also, only global admins can be message spammed. if to and (not m._spam or to.name in g.admins): @@ -1001,6 +1030,7 @@ def add_props(cls, user, wrapped): item.to_collapse = False item.author_collapse = False item.link_title = link.title + item.permalink = item.lookups[0].make_permalink(link, sr=sr) item.link_permalink = link.make_permalink(sr) if item.parent_id: item.subject = _('comment reply') @@ -1047,6 +1077,76 @@ def keep_item(self, wrapped): class SaveHide(Relation(Account, Link)): pass class Click(Relation(Account, Link)): pass +class SimpleRelation(tdb_cassandra.Relation): + _use_db = False + + @classmethod + def _create(cls, user, link, write_consistency_level = None): + n = cls(thing1_id = user._id36, + thing2_id = link._id36) + n._commit(write_consistency_level=write_consistency_level) + return n + + @classmethod + def _uncreate(cls, user, link): + try: + cls._fast_query(user._id36, link._id36)._destroy() + except tdb_cassandra.NotFound: + pass + +class CassandraSave(SimpleRelation): + _use_db = True + _cf_name = 'Save' + + # thing1_cls = Account + # thing2_cls = Link + + @classmethod + def _save(cls, *a, **kw): + return cls._create(*a, **kw) + + @classmethod + def _unsave(cls, *a, **kw): + return cls._uncreate(*a, **kw) + + def _on_create(self): + # it's okay if these indices get lost + wcl = tdb_cassandra.CL.ONE + + SavesByAccount._set_values(self.thing1_id, + {self._id: self._id}, + write_consistency_level=wcl) + + return SimpleRelation._on_create(self) + + def _on_destroy(self): + sba = SavesByAccount._byID(self.thing1_id) + del sba[self._id] + sba._commit() + + return SimpleRelation._on_destroy(self) + +class CassandraHide(SimpleRelation): + _use_db = True + _cf_name = 'Hide' + _ttl = 7*24*60*60 + + @classmethod + def _hide(cls, *a, **kw): + return cls._create(*a, **kw) + + @classmethod + def _unhide(cls, *a, **kw): + return cls._uncreate(*a, **kw) + +class CassandraClick(SimpleRelation): + _use_db = True + _cf_name = 'Click' + +class SavesByAccount(tdb_cassandra.View): + _use_db = True + _cf_name = 'SavesByAccount' + class Inbox(MultiRelation('inbox', Relation(Account, Comment), Relation(Account, Message))): diff --git a/r2/r2/models/subreddit.py b/r2/r2/models/subreddit.py index 719500a191..56a8352b1f 100644 --- a/r2/r2/models/subreddit.py +++ b/r2/r2/models/subreddit.py @@ -101,9 +101,9 @@ def _new(cls, name, title, author_id, ip, lang = g.lang, type = 'public', _specials = {} - + @classmethod - def _by_name(cls, names, _update = False): + def _by_name(cls, names, stale=False, _update = False): #lower name here so there is only one cache names, single = tup(names, True) @@ -134,9 +134,9 @@ def _fetch(lnames): for sr in srs) srs = {} - srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname') + srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname', stale=stale) if srids: - srs = cls._byID(srids.values(), data=True, return_dict=False) + srs = cls._byID(srids.values(), data=True, return_dict=False, stale=stale) for sr in srs: ret[to_fetch[sr.name.lower()]] = sr @@ -182,6 +182,9 @@ def banned(self): def subscribers(self): return self.subscriber_ids() + def spammy(self): + return self._spam + def can_comment(self, user): if c.user_is_admin: return True @@ -264,14 +267,14 @@ def can_demod(self, bully, victim): return bully_rel._date <= victim_rel._date @classmethod - def load_subreddits(cls, links, return_dict = True): + def load_subreddits(cls, links, return_dict = True, stale=False): """returns the subreddits for a list of links. it also preloads the permissions for the current user.""" srids = set(l.sr_id for l in links if getattr(l, "sr_id", None) is not None) subreddits = {} if srids: - subreddits = cls._byID(srids, True) + subreddits = cls._byID(srids, data=True, stale=stale) if subreddits and c.user_is_loggedin: # dict( {Subreddit,Account,name} -> Relationship ) @@ -330,7 +333,14 @@ def add_props(cls, user, wrapped): item.contributor = bool(item.type != 'public' and (item.moderator or rels.get((item, user, 'contributor')))) + + # Don't reveal revenue information via /r/lounge's subscribers + if (g.lounge_reddit and item.name == g.lounge_reddit + and not c.user_is_admin): + item._ups = 0 + item.score = item._ups + # override "voting" score behavior (it will override the use of # item.score in builder.py to be ups-downs) item.likes = item.subscriber or None @@ -354,7 +364,7 @@ def wrapped_cache_key(wrapped, style): @classmethod def top_lang_srs(cls, lang, limit, filter_allow_top = False, over18 = True, - over18_only = False, ids=False): + over18_only = False, ids=False, stale=False): from r2.lib import sr_pops lang = tup(lang) @@ -362,10 +372,11 @@ def top_lang_srs(cls, lang, limit, filter_allow_top = False, over18 = True, sr_ids = sr_ids[:limit] return (sr_ids if ids - else Subreddit._byID(sr_ids, data=True, return_dict=False)) + else Subreddit._byID(sr_ids, data=True, return_dict=False, stale=stale)) @classmethod - def default_subreddits(cls, ids = True, over18 = False, limit = g.num_default_reddits): + def default_subreddits(cls, ids = True, over18 = False, limit = g.num_default_reddits, + stale=True): """ Generates a list of the subreddits any user with the current set of language preferences and no subscriptions would see. @@ -377,11 +388,12 @@ def default_subreddits(cls, ids = True, over18 = False, limit = g.num_default_re auto_srs = [] if g.automatic_reddits: auto_srs = map(lambda sr: sr._id, - Subreddit._by_name(g.automatic_reddits).values()) + Subreddit._by_name(g.automatic_reddits, stale=stale).values()) srs = cls.top_lang_srs(c.content_langs, limit + len(auto_srs), filter_allow_top = True, - over18 = over18, ids = True) + over18 = over18, ids = True, + stale=stale) rv = [] for sr in srs: @@ -393,7 +405,7 @@ def default_subreddits(cls, ids = True, over18 = False, limit = g.num_default_re rv = auto_srs + rv - return rv if ids else Subreddit._byID(rv, data=True,return_dict=False) + return rv if ids else Subreddit._byID(rv, data=True, return_dict=False, stale=stale) @classmethod @memoize('random_reddits', time = 1800) @@ -414,7 +426,7 @@ def random_reddit(cls, limit = 1000, over18 = False): if srs else Subreddit._by_name(g.default_sr)) @classmethod - def user_subreddits(cls, user, ids = True, over18=False, limit = sr_limit): + def user_subreddits(cls, user, ids = True, over18=False, limit = sr_limit, stale=False): """ subreddits that appear in a user's listings. If the user has subscribed, returns the stored set of subscriptions. @@ -430,10 +442,12 @@ def user_subreddits(cls, user, ids = True, over18=False, limit = sr_limit): sr_ids = cls.random_reddits(user.name, sr_ids, limit) return sr_ids if ids else Subreddit._byID(sr_ids, data=True, - return_dict=False) + return_dict=False, + stale=stale) else: - limit = g.num_default_reddits if limit is None else limit - return cls.default_subreddits(ids = ids, over18=over18, limit = limit) + return cls.default_subreddits(ids = ids, over18=over18, + limit=g.num_default_reddits, + stale=stale) @classmethod @memoize('subreddit.special_reddits') @@ -598,6 +612,9 @@ def get_all_comments(self): from r2.lib.db import queries return queries.get_all_comments() + def spammy(self): + return False + class FriendsSR(FakeSubreddit): name = 'friends' title = 'friends' @@ -798,6 +815,10 @@ def __init__(self, sr_ids, path): self.real_path = path self.sr_ids = sr_ids + def spammy(self): + srs = Subreddit._byID(self.sr_ids, return_dict=False) + return any(sr._spam for sr in srs) + @property def path(self): return '/r/' + self.real_path diff --git a/r2/r2/models/vote.py b/r2/r2/models/vote.py index 98dcdbfd12..d4b190958d 100644 --- a/r2/r2/models/vote.py +++ b/r2/r2/models/vote.py @@ -21,12 +21,15 @@ ################################################################################ from r2.lib.db.thing import MultiRelation, Relation from r2.lib.db import tdb_cassandra +from r2.lib.db.tdb_cassandra import TdbException from account import Account from link import Link, Comment from pylons import g +from datetime import datetime, timedelta +__all__ = ['Vote', 'CassandraLinkVote', 'CassandraCommentVote', 'score_changes'] def score_changes(amount, old_amount): uc = dc = 0 @@ -41,10 +44,14 @@ def score_changes(amount, old_amount): class CassandraVote(tdb_cassandra.Relation): _use_db = False + _bool_props = ('valid_user', 'valid_thing', 'organic') _str_props = ('name', # one of '-1', '0', '1' 'notes', 'ip') + _defaults = {'organic': False} + _default_ttls = {'ip': 30*24*60*60} + @classmethod def _rel(cls, thing1_cls, thing2_cls): if (thing1_cls, thing2_cls) == (Account, Link): @@ -52,16 +59,51 @@ def _rel(cls, thing1_cls, thing2_cls): elif (thing1_cls, thing2_cls) == (Account, Comment): return CassandraCommentVote + raise TdbException("Can't find relation for %r(%r,%r)" + % (cls, thing1_cls, thing2_cls)) + + class VotesByLink(tdb_cassandra.View): _use_db = True _type_prefix = 'VotesByLink' - # _view_of = LinkVote + # _view_of = CassandraLinkVote @classmethod def get_all(cls, link_id): vbl = cls._byID(link_id) - return LinkVote._byID(vbl._t.values()).values() + return CassandraLinkVote._byID(vbl._values()).values() + +class VotesByDay(tdb_cassandra.View): + _use_db = True + _type_prefix = 'VotesByDay' + + # _view_of = CassandraLinkVote + + @staticmethod + def _id_for_day(dt): + return dt.strftime('%Y-%j') + + @classmethod + def _votes_for_period(ls, start_date, length): + """An iterator yielding every vote that occured in the given + period in no particular order + + start_date =:= datetime() + length =:+ timedelta() + """ + + # n.b. because of the volume of data involved this has to do + # multiple requests and can be quite slow + + thisdate = start_date + while thisdate <= start_date + length: + for voteid_chunk in in_chunks(cls._byID(cls._id_for_date(thisdate)), + chunk_size=1000): + for vote in LinkVote._byID(voteid_chunk).values(): + yield vote + + thisdate += timedelta(days=1) class CassandraLinkVote(CassandraVote): _use_db = True @@ -70,20 +112,22 @@ class CassandraLinkVote(CassandraVote): # these parameters aren't actually meaningful, they just help # keep track - # _views = [VotesByLink] + # _views = [VotesByLink, VotesByDay] _thing1_cls = Account _thing2_cls = Link def _on_create(self): - try: - vbl = VotesByLink._byID(self.thing1_id) - except tdb_cassandra.NotFound: - vbl = VotesByLink(_id=self.thing1_id) + # it's okay if these indices get lost + wcl = tdb_cassandra.CL.ONE - vbl[self._id] = self._id - vbl._commit() + v_id = {self._id: self._id} - CassandraVote._on_create(self) + VotesByLink._set_values(self.thing1_id, v_id, + write_consistency_level=wcl) + VotesByDay._set_values(VotesByDay._id_for_day(self.date), v_id, + write_consistency_level=wcl) + + return CassandraVote._on_create(self) class CassandraCommentVote(CassandraVote): _use_db = True @@ -98,12 +142,12 @@ class CassandraCommentVote(CassandraVote): class Vote(MultiRelation('vote', Relation(Account, Link), Relation(Account, Comment))): - + _defaults = {'organic': False} @classmethod def vote(cls, sub, obj, dir, ip, organic = False, cheater = False): from admintools import valid_user, valid_thing, update_score - from r2.lib.count import incr_counts + from r2.lib.count import incr_sr_count from r2.lib.db import queries sr = obj.subreddit_slow @@ -111,8 +155,7 @@ def vote(cls, sub, obj, dir, ip, organic = False, cheater = False): karma = sub.karma(kind, sr) is_self_link = (kind == 'link' - and hasattr(obj,'is_self') - and obj.is_self) + and getattr(obj,'is_self',False)) #check for old vote rel = cls.rel(sub, obj) @@ -129,9 +172,9 @@ def vote(cls, sub, obj, dir, ip, organic = False, cheater = False): v._name = str(amount) #these still need to be recalculated - old_valid_thing = v.valid_thing + old_valid_thing = getattr(v, 'valid_thing', False) v.valid_thing = (valid_thing(v, karma, cheater = cheater) - and v.valid_thing) + and getattr(v,'valid_thing', False)) v.valid_user = (v.valid_user and v.valid_thing and valid_user(v, sr, karma)) @@ -140,11 +183,8 @@ def vote(cls, sub, obj, dir, ip, organic = False, cheater = False): is_new = True oldamount = 0 v = rel(sub, obj, str(amount)) - v.author_id = obj.author_id - v.sr_id = sr._id v.ip = ip - old_valid_thing = v.valid_thing = \ - valid_thing(v, karma, cheater = cheater) + old_valid_thing = v.valid_thing = valid_thing(v, karma, cheater = cheater) v.valid_user = (v.valid_thing and valid_user(v, sr, karma) and not is_self_link) if organic: @@ -169,7 +209,7 @@ def vote(cls, sub, obj, dir, ip, organic = False, cheater = False): #update the sr's valid vote count if is_new and v.valid_thing and kind == 'link': if sub._id != obj.author_id: - incr_counts([sr]) + incr_sr_count(sr) # now write it out to Cassandra. We'll write it out to both # this way for a while @@ -191,13 +231,102 @@ def vote(cls, sub, obj, dir, ip, organic = False, cheater = False): return v - #TODO make this generic and put on multirelation? @classmethod - def likes(cls, sub, obj): - votes = cls._fast_query(sub, obj, ('1', '-1'), - data=False, eager_load=False, - timestamp_optimize=True) - votes = dict((tuple(k[:2]), v) for k, v in votes.iteritems() if v) - return votes + def likes(cls, sub, objs): + # generalise and put on all abstract relations? + + if not sub or not objs: + return {} + + from r2.models import Account + + assert isinstance(sub, Account) + + rels = {} + for obj in objs: + try: + types = CassandraVote._rel(sub.__class__, obj.__class__) + except TdbException: + # for types for which we don't have a vote rel, we'll + # skip them + continue + + rels.setdefault(types, []).append(obj) + + + ret = {} + + for relcls, items in rels.iteritems(): + ids = dict((item._id36, item) + for item in items) + votes = relcls._fast_query(sub._id36, ids, + properties=['name']) + for (thing1_id36, thing2_id36), rel in votes.iteritems(): + ret[(sub, ids[thing2_id36])] = (True if rel.name == '1' + else False if rel.name == '-1' + else None) + return ret + +def test(): + from r2.models import Link, Account, Comment + from r2.lib.db.tdb_cassandra import thing_cache + + assert CassandraVote._rel(Account, Link) == CassandraLinkVote + assert CassandraVote._rel(Account, Comment) == CassandraCommentVote + + v1 = CassandraLinkVote('abc', 'def', valid_thing=True, valid_user=False) + v1.testing = 'lala' + v1._commit() + print 'v1', v1, v1._id, v1._t + + v2 = CassandraLinkVote._byID('abc_def') + print 'v2', v2, v2._id, v2._t + + if v1 != v2: + # this can happen after running the test more than once, it's + # not a big deal + print "Expected %r to be the same as %r" % (v1, v2) + + v2.testing = 'lala' + v2._commit() + v1 = None # invalidated this + + assert CassandraLinkVote._byID('abc_def') == v2 + + CassandraLinkVote('abc', 'ghi', name='1')._commit() + + try: + print v2.falsy + raise Exception("Got an attribute that doesn't exist?") + except AttributeError: + pass + + try: + assert Vote('1', '2') is None + raise Exception("I shouldn't be able to create _use_db==False instances") + except TdbException: + print "You can safely ignore the warning about discarding the uncommitted '1_2'" + except CassandraException: + print "Seriously?" + except Exception, e: + print id(e.__class__), id(TdbException.__class__) + print isinstance(e, TdbException) + print 'Huh?', repr(e) + + try: + CassandraLinkVote._byID('bacon') + raise Exception("I shouldn't be able to look up items that don't exist") + except NotFound: + pass + + print 'fast_query', CassandraLinkVote._fast_query('abc', ['def']) + + assert CassandraLinkVote._fast_query('abc', 'def') == v2 + assert CassandraLinkVote._byID('abc_def') == CassandraLinkVote._by_fullname('r6_abc_def') + + print 'all', list(CassandraLinkVote._all()), list(VotesByLink._all()) + + print 'all_by_link', VotesByLink.get_all('abc') + print 'Localcache:', dict(thing_cache.caches[0]) diff --git a/r2/r2/public/static/bestof_award.png b/r2/r2/public/static/bestof_award.png new file mode 100644 index 0000000000..b7c228aa15 Binary files /dev/null and b/r2/r2/public/static/bestof_award.png differ diff --git a/r2/r2/public/static/css/mobile.css b/r2/r2/public/static/css/mobile.css index 80cabe4653..47b08e7172 100644 --- a/r2/r2/public/static/css/mobile.css +++ b/r2/r2/public/static/css/mobile.css @@ -2,7 +2,6 @@ body { font-family: verdana,arial,helvetica,sans-serif; margin: 0; padding: 0; - font-size: x-small; color: #888; } @@ -23,17 +22,21 @@ ul { } .link { + padding: 5px 5px 5px 5px; + /*padding-top: 5px; + padding-bottom: 5px; margin-left: 2px; + display: inline;*/ } .title { - color: blue; - font-size: small; - margin-right: .5em; + color: #369; } .byline { margin: 0px 0px .5em 2px; + display: inline; + font-size: small; } .description { @@ -45,22 +48,26 @@ ul { } .buttons { - font-weight: bold; } +.comment .child { + border-left: 1px dotted #DDF; +} + .child { - margin-left: 2em; + padding-left: 1em; + margin-left: 1em; + border-left: 1px dotted #DDF; } .headerbar { - background:lightgray none repeat scroll 0%; + background-color: lightgrey; margin: 5px 0px 5px 2px; } .headerbar span { background-color: white; color: gray; - font-size: x-small; font-weight: bold; margin-left: 15px; padding: 0px 3px; @@ -81,6 +88,7 @@ ul { .tabmenu { list-style-type: none; + display: inline; } .tabmenu li { diff --git a/r2/r2/public/static/css/reddit.css b/r2/r2/public/static/css/reddit.css index 5e8086afb5..645fae954a 100644 --- a/r2/r2/public/static/css/reddit.css +++ b/r2/r2/public/static/css/reddit.css @@ -77,6 +77,14 @@ input[type=checkbox], input[type=radio] { margin-top: .4em; } label.disabled { color: gray; } .wrong {color: red; font-weight: normal} +.attention { + font-weight: bold; + border: solid 1px #ff6600; + padding: 3px; + -moz-border-radius: 7px; + -webkit-border-radius: 7px; + border-radius: 7px; +} .subform input.text { width: 25em } .subform textarea.text { width: 25em } @@ -371,19 +379,27 @@ ul.flat-vert {text-align: left;} .raisedbox li {margin-bottom: 2px;} .sidebox .spacer { + position: relative; margin-top: 10px; padding: 5px 0 0 44px; min-height: 41px; background: white none no-repeat scroll top left; } +.sidebox .spacer a { + position: absolute; + top: 0; left: 0px; + display: block; + height: 40px; + width: 40px; +} + .sidebox.create .spacer { background-image: url(/static/create-a-reddit.png); /* SPRITE */ background-repeat:no-repeat; } .sidebox.gold .spacer { - /* TODO: replace the following with the new logo */ background-image: url(/static/reddit_gold-40.png); /* SPRITE */ background-repeat:no-repeat; } @@ -1366,7 +1382,7 @@ textarea.gray { color: gray; } .status { margin-left: 5px; color: red; font-size: small;} .error { color: red; font-size: small; margin: 5px; } .red { color:red } -.buygold { color: #c59534; font-weight: bold; } +.buygold { color: #9A7D2E; font-weight: bold; } .line-through { text-decoration: line-through } #noresults { margin-right: 310px; } @@ -1395,6 +1411,56 @@ textarea.gray { color: gray; } width: 305px; } +#searchexpando { + display: none; + margin: 5px 0 0 0; + -moz-border-radius:3px; + -webkit-border-radius:3px; + border-radius:3px; +} + +#searchexpando input, #searchexpando p { + margin-bottom: 10px; +} + +#searchexpando { + padding-top: 10px; +} + +#searchexpando dl { + margin: 10px 0; +} + +#searchexpando dt { + margin: 0; +} + +#previoussearch p { + margin: 5px 0; +} + +#moresearchinfo { + display: none; + padding-top: 5px; + max-width: 300px; + border: 0 solid orange; + margin-top: -5px; +} + +label + #moresearchinfo { + border-width: 1px 0 0 0; + margin-top: 0px; +} + +#previoussearch #moresearchinfo { + border-color: gray; + margin: 5px 0; +} + +#search_hidemore { + float: right; + margin-left: 5px; +} .searchparams { margin: 5px 20px 5px 20px } @@ -1617,6 +1683,9 @@ textarea.gray { color: gray; } .error-log .exception-name { margin-right: 5px; + display: inline-block; + max-height: 50px; + overflow: hidden; } .error-log .nickname { @@ -2257,6 +2326,9 @@ textarea.gray { color: gray; } /* default form styles */ +form .blurb { + margin-bottom: 5px; +} form .spacer + .spacer { margin: 15px 0; @@ -2268,8 +2340,8 @@ form input[type=radio] {margin: 2px .5em 0 0; } .pretty-form { font-size: larger; - vertical-align: top; - } + vertical-align: top; +} .pretty-form p {margin: 3px ;} .pretty-form input[type=checkbox], @@ -3714,6 +3786,10 @@ dd { margin-left: 20px; } font-size: 19px; } +.titlebox h1 a { + color: black; +} + .titlebox .karma { font-size: 18px; font-weight: bold; @@ -3848,6 +3924,8 @@ dd { margin-left: 20px; } .linkinfo .upvotes {font-size: 80%; color: orangered;} .linkinfo .downvotes {font-size: 80%; color: #5f99cf; } +.linkinfo .shortlink {font-size: 80%; margin-top: 3px; } +.linkinfo .shortlink a {font-size: 120%; font-family: monospace} .linkinfo table {margin-top: 5px;} @@ -4105,7 +4183,7 @@ tr.gold-accent + tr > td { .friend-note.edited input[type=text] { width: 250px; margin-right: 0px; } -.gold-accent h1, +.gold-accent h1, .gold-accent th { color: #6a4d00; font-family: "Hoefler Text","Palatino Linotype","Book Antiqua", @@ -4143,6 +4221,16 @@ tr.gold-accent + tr > td { color: #583800; } +.giftgold { + margin-bottom: 5px; +} +.giftgold a { + padding-left: 20px; + color: #9a7d2e; + font-weight: bold; + background-image: url(/static/giftgold.png); + background-repeat:no-repeat; +} .tiny { font-size: xx-small; @@ -4155,6 +4243,81 @@ tr.gold-accent + tr > td { max-width: 550px; } +.gold-form { + font-family: "Hoefler Text","Palatino Linotype","Book Antiqua", Palatino,georgia,garamond,FreeSerif,serif; + min-height: 600px; + line-height: 22px; +} + +.gold-form .roundfield { + border: 1px solid #E1B000; + background-color: #fffdcc; + color: #6a4d00; +} + +.gold-form .roundfield-content .gray a { + margin-left: 0.8em; + font-size: small; +} + +.gold-form .note { + font-size: 11px; + font-style: italic; + color: #222; + margin-top: 7px; +} + +.gold-form .spacer { + margin-top: 20px !important; +} + +.gold-subsection { + display: none; + position: absolute; +} + +.gold-form textarea, .gold-form input[type=text] { + margin-top: 3px; +} + +.gold-logo { + float: left; + margin-right: 15px; +} + +.giftmessage { + border: solid #888 1px; + color: black; + background-color: white; + padding: 0 10px; + margin-bottom: 15px; + display: inline-block; + -moz-border-radius: 3px; + -webkit-border-radius: 3px; + border-radius: 3px; +} + +.gold-button { + padding-top: 2px; + margin-top: 8px; + margin-right: 8px; + color: #482800; + background-color: #efcc4c; + border-color: #fff088 #fff088 #6a3d00 #6a3d00; + font-size: 16px; + font-family: Palatino,georgia,garamond,FreeSerif,serif; + -webkit-appearance: none; + text-shadow: 0px 1px 0px hsla(0,0%,100%,.7); +} + +.gold-dropdown { + color: #482800; + background-color: #fff088; + font-size: 16px; + font-family: "Hoefler Text","Palatino Linotype","Book Antiqua", + Palatino,georgia,garamond,FreeSerif,serif; +} + .comment-visits-box .title { font-weight: bold; } @@ -4168,3 +4331,7 @@ tr.gold-accent + tr > td { border: solid 1px #cddaf3; margin: -1px 0; } + +.role { + width: 800px; +} diff --git a/r2/r2/public/static/giftgold.png b/r2/r2/public/static/giftgold.png new file mode 100644 index 0000000000..27d976939f Binary files /dev/null and b/r2/r2/public/static/giftgold.png differ diff --git a/r2/r2/public/static/gpgkey b/r2/r2/public/static/gpgkey new file mode 100644 index 0000000000..77869cc072 --- /dev/null +++ b/r2/r2/public/static/gpgkey @@ -0,0 +1,52 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG v1.4.11 (Darwin) + +mQINBEzdwrgBEADHVMaxISyA6BLq+bRB7wiBaNS4letyraylJ6txZbSZIqC7FhdF +2JEE264PBMUZXra5lnsmm3EfNcCc8e/drJxGbOjGAa+TnxG7uFIkZOd1R8DKaQF5 +Tl8IrBBjCrxNsKLAFHyRtw/UJ5FKYFbwmG4u+N179gy6I2i5KHQKdVDbi4k28gW3 +Rz+ohjTHsZKj6fFHFjRS7XVR+liW81nxj83W7cGyUXzFdfaqgZAR1Ej8usQgjSBF +kM0LgMtDuTG/BLVILVtKFSPtg3Sthq/MQIyw84llmAhY94i9wMD94Vg8suRicIaX +odtLAWhTbdXIhrOsDFwoGov/9JckTxxRnHEQS8mU3gO+rP6yjqR2sxu8C3DdRWeH +fH0GlL3kkCzlJK8JnXc7JhKrvNnPbUT9NTZGoogn8LodCDgImD7Ontc00CiL6nxb +h9bSVG/ymN863lrO69jnVtMM7PJnxzCXXx6g7DmvIVF+Fk3/WanC45teJXjahc0z +xv/ORdit2FAryfl0X0LM6eVYqGmDg0LT3pTcmk88T69Jpp/FlUb4cxsZ+Tb7TaY0 +2IkaU3JlyottxSIxZ3EoV1RAbmElIgTQltrpcS1rsy/7ty7DD8ZUE3V3/s2VnZSw +yKaokJ0py4qFBclqWnmMqzOgFPRP868K1zFqhWxVVcIpBAdRwuhLlpKCnwARAQAB +tDtSZWRkaXQgQWRtaW5zIChJdCdzIGEgc2VjcmV0IHRvIGV2ZXJ5Ym9keSkgPGdw +Z0ByZWRkaXQuY29tPokCOAQTAQIAIgUCTN3CuAIbAwYLCQgHAwIGFQgCCQoLBBYC +AwECHgECF4AACgkQftm/uSHgK4zpEg//cJoOMlATrDORh6fkDngC9L0AJkYdnEYB +iAbEVJUu/Vo9UcMvQ7WZGtOBJsXsm42NrSd+U/L5HCMYgEYPoRtT3pLXNiCb5sRV +BC/2XmqKgyIa/7z35fAS/hzkpxU3acsPm+OlaqAU9OK9vaqu2jfeG/X1Ye2sWh9r +/hr4Yu5uqpZzWhog6hqGPrtpYdEmdmlmGJ+FdEYeGQg6DjNd+d4nKWYbQpn5xZ54 +GCocMntfpWc0iWbssbr8Nw/itigIHKJXNEG6/tcmqr4aXyF0P20lJWJPR71nEGOL +ZvAdLOzJYsg/ek12EXiqlikVi79e/2yaX873QFBBKBRf1wfEloPvsHC4b4cSXkGg +P6Jycw3ErBC2SOxI4gobGrlf4f19P1Vn1q9kyT7kNOfh3EOAKzh7IMmsEB9OF4ax +YHAH/1F3BitUwJ6SO1Fwb231d+vhF2YXalh3NZwFTtvOD4mXa2RBqCuo0EBm42jJ +b6R1cVqgEvCKrIBFbrsdSgzgSxtn14MS+4n3ic/Cc/2alUlnf/uo0ieE7VM5HP+M +UgZrtWelVee6+b6bJhqG0hfZgEx8C3CbIsaKsNid1qkBHLBEk/MI3zDiNZmtYcfy +z8Od71szLJIh58RJYkbUiBB3/ZYhlYAlq6qU4zCEyz/miboB5pwfr1dWyir+1fK1 +n4vqY48WGV25Ag0ETN3CuAEQAOarHXrgWpLKfq6+qLtOVgNYnyvrGqInfxeUkATm +E6tv9tgQCLrgyXLz2c30teGm+PUK6LA68JdhyKBEOFY2H1Nr7BFmrKNaVhGz6HMd +7PTHk5S8Rq4tc18uU6YOJEvAb/phDFvFyXJYfJA/XLKtya8xEc6b20UrwAA2JYv0 +CFpljTTqYLZZjrKDWjhU6pB9wT4PJqXJ/yVUwrGTqirnUvX0xzOim4JPRqsgG+sU +zs7cyvzWFuDNd7vsx5kLj1VIDMJm98Ca0PkXnBY6csxuoUbVa6waaOYDUyvaa1NQ +uz1CjUd2GRMY6JNar0G4Fgwdn4bfn42fUhbJy3QvPcQyqufp4EC6kDeP/KZ0PyXt +5zJ9PKR3Q+gXDTteszmKTVkQ8KmUTMnX4IpzhiSVIuVhZBEmtZwHgOHrHXYmZtOQ +jDjVCuCFZ2J9S9AvPZjZnMHoQUvZ6zQp9fVyvmiUJhtWSwFjhw/LvTNQVglZcDBW +Q2X0CuNkzKxlskh02uiL3VJytbtWhdGWFfnrX+zxNDNdqsg7kBTtrTG+MxjlVqr2 +LjKOJtkL9TeRJj699kTc8iCUVjUVOOn8Asow7Mq7cie5Ub5CnnTzIvn5d33n1Cgt +xCcP6q9r85S5mWOR5bp/tQQXITlpmsh5V3ZKJKph1nRIH3AMgtEn3XYIF32SR+5V +56JDABEBAAGJAh8EGAECAAkFAkzdwrgCGwwACgkQftm/uSHgK4yPjBAAhKWbhwCq +hy550Bulz5klwPNAqY/HuT5m/+G5shFH0Kz6aya8/2F7SDdN3GUIoF7Wquum7QIV +ivvERCh6kMzLaXXlWvHQg/F6afwhpp8wiSH15mUXpDk3Ybj+BWR7H4z/ibbQE/AW +MBQ/b5GEJPCqR+gHyJvJjfD5RSZeDuBnGqS5gTE9cEQSfOYOhVC/4kNH2y3TZMry +5Apd3wufTOaG2PJK+F7FomxaLCYzQj2ti3uJPdLTYakCmORMp9lTeJWE5BG8wG2q +zjA0OWqObmur+IEkiWUET1YHFWjS32Q2+5cM1rpJv39m9SQEJeJlWPAuibVmRzZJ +t3+UPAbB/Dfx1xuOjB30d7jSaB5wG4FVraheurDSqBoD9V3PAy2zJo7ZYS5rRLN5 +1waSVdtNy7rQFa/tr4rT2LBq7+Wg/wZbGCFU7rK3xKyeWpxl5rrFRiI/hktRxRRd +pyguG+UibmOE8+n4sYhpi6iTyqkKQ2C+7QFwWQBkrz2ncPs0pJvorpwq+6Th7DUM +gkOSmX07RHycGbeSTRMEv0/6B9YARstRmfeVWlbXLfYfmrLQp1kHOahEtmLphGOJ +j0Rde31KVV5frKT65qQOhOgYZIQ5fSHaSzWMI3K9PRW5hqPDA++F7dYD2SrgE4vN +AR46Yv+J0Du6HjeMr03EKJ7on29sdPxb7cM= +=zTuA +-----END PGP PUBLIC KEY BLOCK----- diff --git a/r2/r2/public/static/iphone/index.html b/r2/r2/public/static/iphone/index.html index 4c0aa869ad..ac6850baf0 100644 --- a/r2/r2/public/static/iphone/index.html +++ b/r2/r2/public/static/iphone/index.html @@ -145,7 +145,7 @@ - +
    -
    -

    -Feeling hesitant? Try the free version of iReddit. You just get one sound effect, though it is cool, and you can aggregate only 100 of your reddit subscriptions. -

    - -
    -

    not convinced? watch the commercial

    @@ -228,11 +221,7 @@

    the feature list

  • display thumbnails for most efficient exploration
  • alien loading animation improves load time by 500% (that is, makes the waiting suck 5x less)
  • pro-tip: enable sounds in preferences and savor the awe of your peers
  • - -full-version only: -
    • 7 different awesome sounds to choose from
    • -
    • aggregates up to 500 of your subscribed reddits (free version maxes out at 100)
    @@ -318,7 +307,7 @@

    the buzz

    - +
    diff --git a/r2/r2/public/static/js/reddit.js b/r2/r2/public/static/js/reddit.js index 7ec81bd5a2..c81cb4bc50 100644 --- a/r2/r2/public/static/js/reddit.js +++ b/r2/r2/public/static/js/reddit.js @@ -9,10 +9,23 @@ function open_menu(menu) { .addClass("active inuse"); }; -function close_menus() { +function close_menus(event) { $(".drop-choices.inuse").not(".active") .removeClass("inuse"); $(".drop-choices.active").removeClass("active"); + + /* hide the search expando if the user clicks elsewhere on the page */ + if ($(event.target).closest("#search").length == 0) { + $("#moresearchinfo").slideUp(); + + if ($("#searchexpando").length == 1) { + $("#searchexpando").slideUp(function() { + $("#search_showmore").parent().show(); + }); + } else { + $("#search_showmore").parent().show(); + } + } }; function hover_open_menu(menu) { }; @@ -599,10 +612,28 @@ function updateEventHandlers(thing) { } var tracker = reddit.trackers[id]; if($.defined(tracker)) { + var title = $(this).find("a.title"); + var text; + if ($.browser.msie) { + /* bugfix for IE7-8; links with text that look like + * a url of some sort (including the @ character) + * have their text changed when href is set. + * see http://jsfiddle.net/JU2Vj/1/ for a distilled + * reproduction of the bug */ + text = title.html(); + } + $(this).find("a.title").attr("href", tracker.click).end() .find("a.thumbnail").attr("href", tracker.click).end() .find("img.promote-pixel") .attr("src", tracker.show); + + if ($.browser.msie) { + if (text != title.html()) { + title.html(text); + } + } + delete reddit.trackers[id]; } }) @@ -1255,10 +1286,27 @@ function juryvote(elem, dir) { /* The ready method */ $(function() { + $("body").click(close_menus); + /* set function to be called on thing creation/replacement, * and call it on all things currently rendered in the * page. */ $("body").set_thing_init(updateEventHandlers); + + /* Fall back to the old ".gray" system if placeholder isn't supported + * by this browser */ + if (!('placeholder' in document.createElement('input'))) { + $("textarea[placeholder], input[placeholder]") + .addClass("gray") + .each(function() { + var element = $(this); + var placeholder_text = element.attr('placeholder'); + if (element.val() == "") { + element.val(placeholder_text); + } + }); + } + /* Set up gray inputs and textareas to clear on focus */ $("textarea.gray, input.gray") .focus( function() { @@ -1282,6 +1330,26 @@ $(function() { /* visually mark the last-clicked entry */ last_click(); + /* search form help expando */ + /* TODO: use focusin and focusout in jQuery 1.4 */ + $("#search input[name=q]").focus(function () { + $("#searchexpando").slideDown(); + }); + + $("#search_showmore").click(function(event) { + $("#search_showmore").parent().hide(); + $("#moresearchinfo").slideDown(); + event.preventDefault(); + }); + + $("#moresearchinfo") + .prepend('[-]') + + $("#search_hidemore").click(function(event) { + $("#search_showmore").parent().show(); + $("#moresearchinfo").slideUp(); + event.preventDefault(); + }); }); function show_friend(account_fullname) { @@ -1336,3 +1404,11 @@ function highlight_new_comments(period) { } } } + +function grab_tracking_pixel(url) { + var random_value = Math.round(Math.random() * 2147483647); + var cachebusted_url = url + "&r=" + random_value; + var img = new Image(); + img.src = cachebusted_url; + document.getElementById("oldpixel").parentNode.appendChild(img); +} diff --git a/r2/r2/public/static/js/sponsored.js b/r2/r2/public/static/js/sponsored.js index c607067f00..c2c8353057 100644 --- a/r2/r2/public/static/js/sponsored.js +++ b/r2/r2/public/static/js/sponsored.js @@ -7,7 +7,8 @@ function update_bid(elem) { var bid = parseFloat(form.find("*[name=bid]").val()); var ndays = ((Date.parse(form.find("*[name=enddate]").val()) - Date.parse(form.find("*[name=startdate]").val())) / (86400*1000)); - $(".bid-info").html("  &rarr" + + ndays = Math.round(ndays); + $(".bid-info").html("  →" + "$" + (bid/ndays).toFixed(2) + " per day for " + ndays + " day(s)"); $("#duration span.gray") @@ -324,4 +325,4 @@ function free_campaign(elem) { function pay_campaign(elem) { $.redirect($(elem).find("input[name=pay_url]").val()); -} \ No newline at end of file +} diff --git a/r2/r2/public/static/nsfw2.png b/r2/r2/public/static/nsfw2.png new file mode 100644 index 0000000000..f2cab82180 Binary files /dev/null and b/r2/r2/public/static/nsfw2.png differ diff --git a/r2/r2/public/static/reddit404d.png b/r2/r2/public/static/reddit404d.png index b9b43acd5b..e7fac219fd 100644 Binary files a/r2/r2/public/static/reddit404d.png and b/r2/r2/public/static/reddit404d.png differ diff --git a/r2/r2/public/static/reddit_gold-70.png b/r2/r2/public/static/reddit_gold-70.png new file mode 100644 index 0000000000..2504db5454 Binary files /dev/null and b/r2/r2/public/static/reddit_gold-70.png differ diff --git a/r2/r2/templates/adminerrorlog.html b/r2/r2/templates/adminerrorlog.html index bfe35ee59c..b956df56f5 100644 --- a/r2/r2/templates/adminerrorlog.html +++ b/r2/r2/templates/adminerrorlog.html @@ -39,11 +39,11 @@
    - %for g in groupings: - %if g[0] > 0: - ${exception(date, *g)} + %for gr in groupings: + %if gr[0] > 0: + ${exception(date, *gr)} %else: - ${text(date, *g)} + ${text(date, *gr)} %endif %endfor
    diff --git a/r2/r2/templates/base.html b/r2/r2/templates/base.html index b8c955bc1c..fd11987829 100644 --- a/r2/r2/templates/base.html +++ b/r2/r2/templates/base.html @@ -23,6 +23,7 @@ from r2.lib.template_helpers import static from r2.models import Link, Comment, Subreddit + from r2.lib.tracking import UserInfo %> <%namespace file="utils.html" import="js_preamble"/> @@ -89,6 +90,23 @@ <%def name="head()"> +%if g.googleanalytics and thing.site_tracking: + +%endif diff --git a/r2/r2/templates/comment.mobile b/r2/r2/templates/comment.mobile index fdb47d4d07..7ca95c18ed 100644 --- a/r2/r2/templates/comment.mobile +++ b/r2/r2/templates/comment.mobile @@ -38,22 +38,23 @@ <%def name="entry()"> -%if thing.deleted: - -

    - ${_("[deleted]")} ${thing.timesince} ${_("ago")} -

    +
    +%if thing.deleted: +

    + ${_("[deleted]")} ${thing.timesince} ${_("ago")} +

    %else:

    - - ${thing.author.name} - |${thing.score} ${ungettext("point", "points", thing.score)} -${_("written")} ${thing.timesince} ${_("ago")} + + ${thing.author.name} +  |${thing.score} ${ungettext("point", "points", thing.score)} + ${_("written")} ${thing.timesince} ${_("ago")}

    ${unsafe(safemarkdown(thing.body, nofollow=thing.nofollow))} %endif +
    diff --git a/r2/r2/templates/createsubreddit.html b/r2/r2/templates/createsubreddit.html index 476d778c92..bf21db6b11 100644 --- a/r2/r2/templates/createsubreddit.html +++ b/r2/r2/templates/createsubreddit.html @@ -270,7 +270,7 @@

    ${_("create a community")}

    %if thing.site.sponsorship_name: value="${thing.site.sponsorship_name}" %else: - value="${'name this campaign (for traffic)'}" class="gray" + placeholder="${_('name this campaign (for traffic)')}" %endif /> @@ -279,7 +279,7 @@

    ${_("create a community")}

    %if thing.site.sponsorship_url: value="${thing.site.sponsorship_url}" %else: - value="${'enter link url here'}" class="gray" + placeholder="${_('enter link url here')}" %endif /> diff --git a/r2/r2/templates/dart_ad.html b/r2/r2/templates/dart_ad.html index aa9983c3d2..43cb5528bb 100644 --- a/r2/r2/templates/dart_ad.html +++ b/r2/r2/templates/dart_ad.html @@ -57,10 +57,10 @@ - + diff --git a/r2/r2/templates/usertext.html b/r2/r2/templates/usertext.html index b0e0c49cb4..7be9265f05 100644 --- a/r2/r2/templates/usertext.html +++ b/r2/r2/templates/usertext.html @@ -139,6 +139,14 @@ world!" + + ~~strikethrough~~ + strikethrough + + + super^script + superscript + %endif diff --git a/r2/r2/templates/usertext.mobile b/r2/r2/templates/usertext.mobile index 7423da0f68..67d87dd14e 100644 --- a/r2/r2/templates/usertext.mobile +++ b/r2/r2/templates/usertext.mobile @@ -28,4 +28,4 @@ ${unsafe(safemarkdown(thing.text, nofollow = thing.nofollow, target = thing.target))} -%endif +%endif \ No newline at end of file diff --git a/r2/r2/tests/functional/cassamodels.py b/r2/r2/tests/functional/cassamodels.py new file mode 100644 index 0000000000..442e7b2616 --- /dev/null +++ b/r2/r2/tests/functional/cassamodels.py @@ -0,0 +1,35 @@ +def test_cassasavehide(): + from r2.models import Account, Link, CassandraSave, SavesByAccount + from r2.lib.db import tdb_cassandra + + a = list(Account._query(sort=desc('_date'), + limit=1))[0] + l = list(Link._query(sort=desc('_date'), + limit=1))[0] + + try: + csh = CassandraSave._fast_query(a._id36, l._id36) + print "Warning! Deleting!", csh + CassandraSave._fast_query(a._id36, l._id36)._destroy() + except tdb_cassandra.NotFound: + pass + + csh = CassandraSave._save(a, l) + csh._commit() + assert CassandraSave._fast_query(a._id36, l._id36) == csh + + # check for the SavesByAccount object too + assert SavesByAccount._byID(a._id36)[csh._id] == csh._id + + csh._destroy() + + try: + CassandraSave._fast_query(a._id36, l._id36) == csh + raise Exception("shouldn't exist after destroying") + except tdb_cassandra.NotFound: + pass + + try: + assert csh._id not in SavesByAccount._byID(a._id36, properties = csh._id)._values() + except tdb_cassandra.NotFound: + pass diff --git a/r2/setup.py b/r2/setup.py index e12123689b..be2785a85d 100644 --- a/r2/setup.py +++ b/r2/setup.py @@ -70,11 +70,11 @@ class null(): pass try: import paste vers = getattr(paste, "__version__", "(undefined)") - assert vers == '1.7.2-reddit-0.1', \ + assert vers == '1.7.2-reddit-0.2', \ ("reddit is only compatible with its own magical version of paste, not '%s'" % vers) except (ImportError, AssertionError): print "Installing reddit's magical version of paste" - easy_install(["http://addons.reddit.com/paste/Paste-1.7.2-reddit-0.1.tar.gz"]) + easy_install(["http://addons.reddit.com/paste/Paste-1.7.2-reddit-0.2.tar.gz"]) #install the devel version of py-amqplib until the cheeseshop version is updated try: @@ -99,7 +99,7 @@ class null(): pass discount_path = "r2/lib/contrib/discount" discountmod = Extension('reddit-discount', include_dirs = [discount_path], - define_macros = [("VERSION", '"1.6.4"')], + define_macros = [("VERSION", '"1.6.8"')], sources = ([ "r2/lib/c/reddit-discount-wrapper.c" ] + map(lambda x: os.path.join(discount_path, x), ["Csio.c", @@ -111,6 +111,8 @@ class null(): pass "markdown.c", "mkdio.c", "resource.c", + "html5.c", + "tags.c", "toc.c", "version.c", "emmatch.c", @@ -134,17 +136,16 @@ class null(): pass "pycrypto", "Babel>=0.9.1", "flup", - "cython==0.13", + "cython==0.14", "simplejson", "SQLAlchemy==0.5.3", - "BeautifulSoup == 3.0.8.1", # last version to use the good parser + "BeautifulSoup", "cssutils==0.9.5.1", "chardet", "psycopg2", "py_interface", "pycountry", - "python-cassandra", - "thrift" # required by Cassandra + "thrift05", "pycassa==1.0.5", ], packages=find_packages(), include_package_data=True, @@ -169,11 +170,6 @@ class null(): pass """, ) - -# the cassandra stuff we'll need. down here because it needs to be -# done *after* thrift is installed -easy_install(["http://github.com/downloads/pycassa/pycassa/pycassa-0.3.0.tar.gz"]) - # running setup.py always fucks up the build directory, which we don't # need anyway. import shutil diff --git a/r2/updateini.py b/r2/updateini.py index 951844849b..57519ff342 100755 --- a/r2/updateini.py +++ b/r2/updateini.py @@ -2,7 +2,7 @@ import re, sys -line_rx = re.compile('^([-_a-zA-Z0-9 ]*[-_a-zA-Z0-9]+)\s*=\s*(.*)') +line_rx = re.compile('\A([-_a-zA-Z0-9 ]*[-_a-zA-Z0-9]+)\s*=\s*(.*)') def parse_line(line): m = line_rx.match(line)