Archive for the ‘Development’ Category

Quick (and dirty) Patch for Ruby Enterprise Edition 2011.03 to Prevent Hash Collision Attacks

Posted by Oleksiy Kovyrin under Admin-tips, Development

As you may have heard, this week on December 28, 2011, a group of security experts released information about a nasty problem in almost all languages and platforms related to hash function collisions and possibility of using those for DoS attack on web applications.

Ruby core team released new 1.8.7-p357 version with the problem fixed. JRuby development team came out with the new 1.6.5.1 release. Unfortunately 2 days after the release there is still no information from Ruby Enterprise Edition team on what to do with all the ree deployments.

So, since there is no patch for Ruby Enterprise Edition 2011.03 to prevent an attack, I’ve quickly ported ruby core patch to be used with the latest ree release. Here it is:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
From 4f69a748ab820c6a8bd204f94d13d970847f575c Mon Sep 17 00:00:00 2001
From: RPM Builder <rpmbuild @livingsocial.com>
Date: Thu, 29 Dec 2011 19:48:52 +0000
Subject: [PATCH] Backport randomized hash patch from upstream

---
 source/inits.c                  |    4 ++
 source/random.c                 |   74 ++++++++++++++++++++++++++++++---------
 source/st.c                     |   14 +++++++-
 source/string.c                 |    7 +++-
 source/test/ruby/test_string.rb |   13 +++++++
 source/version.c                |    2 +-
 6 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/source/inits.c b/source/inits.c
index 947bbbe..a0e061f 100644
--- a/source/inits.c
+++ b/source/inits.c
@@ -38,6 +38,7 @@ void Init_Precision _((void));
 void Init_sym _((void));
 void Init_process _((void));
 void Init_Random _((void));
+void Init_RandomSeed _((void));
 void Init_Range _((void));
 void Init_Regexp _((void));
 void Init_signal _((void));
@@ -46,10 +47,13 @@ void Init_Struct _((void));
 void Init_Time _((void));
 void Init_var_tables _((void));
 void Init_version _((void));
+void Init_st _((void));
 
 void
 rb_call_inits()
 {
+    Init_RandomSeed();
+    Init_st();
     Init_sym();
     Init_var_tables();
     Init_Object();
diff --git a/source/random.c b/source/random.c
index 258b0b2..790eda8 100644
--- a/source/random.c
+++ b/source/random.c
@@ -189,6 +189,7 @@ rb_genrand_real(void)
 #include <fcntl .h>
 #endif
 
+static int seed_initialized = 0;
 static VALUE saved_seed = INT2FIX(0);
 
 static VALUE
@@ -250,27 +251,22 @@ rand_init(vseed)
     return old;
 }
 
-static VALUE
-random_seed()
+#define DEFAULT_SEED_LEN (4 * sizeof(long))
+
+static void
+fill_random_seed(ptr)
+    char *ptr;
 {
     static int n = 0;
+    unsigned long *seed;
     struct timeval tv;
     int fd;
     struct stat statbuf;
+    char *buf = (char*)ptr;
 
-    int seed_len;
-    BDIGIT *digits;
-    unsigned long *seed;
-    NEWOBJ(big, struct RBignum);
-    OBJSETUP(big, rb_cBignum, T_BIGNUM);
-
-    seed_len = 4 * sizeof(long);
-    big->sign = 1;
-    big->len = seed_len / SIZEOF_BDIGITS + 1;
-    digits = big->digits = ALLOC_N(BDIGIT, big->len);
-    seed = (unsigned long *)big->digits;
+    seed = (unsigned long *)buf;
 
-    memset(digits, 0, big->len * SIZEOF_BDIGITS);
+    memset(buf, 0, DEFAULT_SEED_LEN);
 
 #ifdef S_ISCHR
     if ((fd = open("/dev/urandom", O_RDONLY
@@ -285,7 +281,7 @@ random_seed()
 #endif
             )) >= 0) {
         if (fstat(fd, &statbuf) == 0 && S_ISCHR(statbuf.st_mode)) {
-            read(fd, seed, seed_len);
+            read(fd, seed, DEFAULT_SEED_LEN);
         }
         close(fd);
     }
@@ -296,13 +292,37 @@ random_seed()
     seed[1] ^= tv.tv_sec;
     seed[2] ^= getpid() ^ (n++ < < 16);
     seed[3] ^= (unsigned long)&seed;
+}
+
+static VALUE
+make_seed_value(char *ptr)
+{
+    BDIGIT *digits;
+    NEWOBJ(big, struct RBignum);
+    OBJSETUP(big, rb_cBignum, T_BIGNUM);
+
+    RBIGNUM_SET_SIGN(big, 1);
+
+    digits = ALLOC_N(char, DEFAULT_SEED_LEN);
+    RBIGNUM(big)->digits = digits;
+    RBIGNUM(big)->len = DEFAULT_SEED_LEN / SIZEOF_BDIGITS;
+
+    MEMCPY(digits, ptr, char, DEFAULT_SEED_LEN);
 
     /* set leading-zero-guard if need. */
-    digits[big->len-1] = digits[big->len-2] < = 1 ? 1 : 0;
+    digits[RBIGNUM_LEN(big)-1] = digits[RBIGNUM_LEN(big)-2] <= 1 ? 1 : 0;
 
     return rb_big_norm((VALUE)big);
 }
 
+static VALUE
+random_seed(void)
+{
+    char buf[DEFAULT_SEED_LEN];
+    fill_random_seed(buf);
+    return make_seed_value(buf);
+}
+
 /*
  *  call-seq:
  *     srand(number=0)    => old_seed
@@ -443,6 +463,9 @@ rb_f_rand(argc, argv, obj)
     long val, max;
 
     rb_scan_args(argc, argv, "01", &vmax);
+    if (!seed_initialized) {
+       rand_init(random_seed());
+    }
     switch (TYPE(vmax)) {
       case T_FLOAT:
    if (RFLOAT(vmax)->value < = LONG_MAX && RFLOAT(vmax)->value >= LONG_MIN) {
@@ -490,10 +513,27 @@ rb_f_rand(argc, argv, obj)
     return LONG2NUM(val);
 }
 
+static char initial_seed[DEFAULT_SEED_LEN];
+
+void
+Init_RandomSeed(void)
+{
+    fill_random_seed(initial_seed);
+    init_by_array((unsigned long*)initial_seed, DEFAULT_SEED_LEN/sizeof(unsigned long));
+    seed_initialized = 1;
+}
+
+static void
+Init_RandomSeed2(void)
+{
+    saved_seed = make_seed_value(initial_seed);
+    memset(initial_seed, 0, DEFAULT_SEED_LEN);
+}
+
 void
 Init_Random()
 {
-    rand_init(random_seed());
+    Init_RandomSeed2();
     rb_define_global_function("srand", rb_f_srand, -1);
     rb_define_global_function("rand", rb_f_rand, -1);
     rb_global_variable(&saved_seed);
diff --git a/source/st.c b/source/st.c
index c16c310..21e157a 100644
--- a/source/st.c
+++ b/source/st.c
@@ -9,6 +9,7 @@
 #include <stdlib .h>
 #endif
 #include <string .h>
+#include <limits .h>
 #include "st.h"
 
 typedef struct st_table_entry st_table_entry;
@@ -521,6 +522,8 @@ st_foreach(table, func, arg)
     return 0;
 }
 
+static unsigned long hash_seed = 0;
+
 static int
 strhash(string)
     register const char *string;
@@ -550,10 +553,11 @@ strhash(string)
 
     return val + (val < < 15);
 #else
-    register int val = 0;
+    register unsigned long val = hash_seed;
 
     while ((c = *string++) != '\0') {
    val = val*997 + c;
+   val = (val << 13) | (val >> (sizeof(st_data_t) * CHAR_BIT - 13));
     }
 
     return val + (val>>5);
@@ -573,3 +577,11 @@ numhash(n)
 {
     return n;
 }
+
+extern unsigned long rb_genrand_int32(void);
+
+void
+Init_st(void)
+{
+    hash_seed = rb_genrand_int32();
+}
diff --git a/source/string.c b/source/string.c
index c6b2301..94a0281 100644
--- a/source/string.c
+++ b/source/string.c
@@ -875,13 +875,15 @@ rb_str_concat(str1, str2)
     return str1;
 }
 
+static unsigned long hash_seed;
+
 int
 rb_str_hash(str)
     VALUE str;
 {
     register long len = RSTRING(str)->len;
     register char *p = RSTRING(str)->ptr;
-    register int key = 0;
+    register unsigned long key = hash_seed;
 
 #if defined(HASH_ELFHASH)
     register unsigned int g;
@@ -905,6 +907,7 @@ rb_str_hash(str)
     while (len--) {
    key = key*65599 + *p;
    p++;
+   key = (key < < 13) | (key >> ((sizeof(unsigned long) * CHAR_BIT) - 13));
     }
     key = key + (key>>5);
 #endif
@@ -5062,4 +5065,6 @@ Init_String()
     rb_fs = Qnil;
     rb_define_variable("$;", &rb_fs);
     rb_define_variable("$-F", &rb_fs);
+
+    hash_seed = rb_genrand_int32();
 }
diff --git a/source/test/ruby/test_string.rb b/source/test/ruby/test_string.rb
index 5f2c54f..4d97182 100644
--- a/source/test/ruby/test_string.rb
+++ b/source/test/ruby/test_string.rb
@@ -1,4 +1,5 @@
 require 'test/unit'
+require File.expand_path('envutil', File.dirname(__FILE__))
 
 class TestString < Test::Unit::TestCase
   def check_sum(str, bits=16)
@@ -29,4 +30,16 @@ class TestString < Test::Unit::TestCase
   ensure
     $KCODE = original_kcode
   end
+
+  def test_hash_random
+    str = 'abc'
+    a = [str.hash.to_s]
+    cmd = sprintf("%s -e 'print %s.hash'", EnvUtil.rubybin, str.dump)
+    3.times {
+      IO.popen(cmd, "rb") {|o|
+        a << o.read
+      }
+    }
+    assert_not_equal([str.hash.to_s], a.uniq)
+  end
 end
diff --git a/source/version.c b/source/version.c
index 8b41cc9..5781cab 100644
--- a/source/version.c
+++ b/source/version.c
@@ -46,7 +46,7 @@ Init_version()
     rb_define_global_const("RUBY_PATCHLEVEL", INT2FIX(RUBY_PATCHLEVEL));
 
     snprintf(description, sizeof(description),
-             "ruby %s (%s %s %d) [%s], MBARI 0x%x, Ruby Enterprise Edition %s",
+             "ruby %s (%s %s %d) [%s], MBARI 0x%x, Ruby Enterprise Edition %s (with hash random)",
              RUBY_VERSION, RUBY_RELEASE_DATE, RUBY_RELEASE_STR,
              RUBY_RELEASE_NUM, RUBY_PLATFORM,
              STACK_WIPE_SITES, REE_VERSION);
--
1.7.6.4

You can view it or download it from github. Disclaimer: This is provided as is, no guarantees are provided, etc.

Read the rest of this entry »

DbCharmer 1.7.0 Release: Rails 3.0 Support and Forced Slave Reads

Posted by Oleksiy Kovyrin under Databases, Development, My Projects

This week, after 3 months in the works, we’ve finally released version 1.7.0 of DbCharmer ruby gem – Rails plugin that significantly extends ActiveRecord’s ability to work with multiple databases and/or database servers by adding features like multiple databases support, master/slave topologies support, sharding, etc.

New features in this release:

  • Rails 3.0 support. We’ve worked really hard to bring all the features we supported in Rails 2.X to the new version of Rails and now I’m proud that we’ve implemented them all and the implementation looks much cleaner and more universal (all kinds of relations in rails 3 work in exactly the same way and we do not need to implement connection switching for all kinds of weird corner-cases in ActiveRecord).
  • Forced Slave Reads functionality. Now we could have models with slaves that are not used by default, but could be turned on globally (per-controller, per-action or in a block). This is a new feature that brings our master/slave routing capabilities to a really new level – we could now use it for a really mission-critical models on demand and not be afraid of breaking major functionality of our applications by switching them to slave reads.
  • Lots of changes were made in the structure of our code and tests to make sure it would be much easier for new developers to understand DbCharmer internals and make changes in its code.

Along with the new release we’ve got a brand new web site. You can find much better, cleaner and, most importantly, correct documentation for the library on the web site. We’ll be adding more examples, will try to add more in-depth explanation of our core functions, etc.

If you have any questions about the release, feel free to ask them in our new mailing list: DbCharmer Users Group.

For more updates on our releases, you can follow @DbCharmer on Twitter.

Scribd is Hiring (I’m Looking for an Operations Engineer to Join My Team)

Posted by Oleksiy Kovyrin under Blog, Databases, Development, Links, Networks

Scribd is a top 100 site on the web and one of the largest sites built using Ruby on Rails. As one of the first rails sites to reach scale, we’ve built a lot of infrastructure and solved a lot of challenges to get Scribd to where it is today. We actively try to push the envelope and have contributed substantial work back to the open source community.

Scribd has an agile, startup culture and an unusually close working relationship between engineering and ops. You’ll regularly find cross-over work at Scribd, with ops people writing application-layer code and engineers figuring out operations-level problems. We think we’re able to make that work because of the uniquely talented people we have on the team.

To allow us to keep scaling, we’re now looking to add a strong, experienced operations guru to the team. As a member of Scribd operations, you’ll have tremendous ownership and responsibility for one of the web’s most popular applications. Because Scribd is a startup, you will wear many hats and have broader responsibility than you would at a larger company.

If you read this blog, you should already have a good sense of the kind of work you’ll be doing on this position.

The Ideal Profile

You are an experienced operations professional and have run ops at at least one large-scale website. You have comprehensive knowledge of a broad variety of system tools, from MySQL and Nginx to Squid and Memcached. You should also have strong software development skills and be well-versed in major programming languages. You should be strongly motivated, a creative solution finder, and ready to jump into the thorniest technical problems whenever necessary.

Responsibilities

  • Develop and maintain all aspects of Scribd’s operations infrastructure, including system monitoring, backups, server configuration, databases, and caching systems
  • Collaborate with engineering to create next generation infrastructure to support changing requirements
  • Predict scaling problems before they occur and work with engineering to prevent them
  • Write and debug application level ruby code
  • Participate in an on-call rotation
  • Quickly diagnose server problems and employ preventive measures to maintain high availability servers

Qualifications

  • Bachelors degree in CS or equivalent experience
  • 3-5 years of professional experience in site operations
  • Strong software engineering skills, including knowledge of major programming languages
  • Strong database skills, preferably with MySQL, and overall linux knowledge
  • Experience with most of the following technologies: MySQL, Nginx, Ruby, Memcached, Squid, git, Solr, HBase, Postfix
  • Proven ability to quickly learn and implement unfamiliar technologies
  • Strong desire to work hard at a rapidly growing company

Location: You are preferably located near San Francisco, CA. Relocation assistance is designed on a per-case basis. In short, we’ll be creative to get you here.

Contact: Please send your email cover letter and resume with the subject “Your name – Senior Site Operations Engineer – via Kovyrin.net” to jobs@scribd.com or contact me directly using any of my contacts. All communication and correspondence is held in the strictest confidence to ensure that you can connect and learn more without exposure.

Nginx-Fu: X-Accel-Redirect From Remote Servers

Posted by Oleksiy Kovyrin under Admin-tips, Development, Networks

We use nginx and its features a lot in Scribd. Many times in the last year we needed some pretty interesting, but not supported feature – we wanted nginx X-Accel-Redirect functionality to work with remote URLs. Out of the box nginx supports this functionality for local URIs only. In this short post I want to explain how did we make nginx serve remote content via X-Accel-Redirect.

Read the rest of this entry »

Advanced Squid Caching in Scribd: Cache Invalidation Techniques

Posted by Oleksiy Kovyrin under Admin-tips, Development, My Projects, Networks

Having a reverse-proxy web cache as one of the major infrastructure elements brings many benefits for large web applications: it reduces your application servers load, reduces average response times on your site, etc. But there is one problem every developer experiences when works with such a cache – cached content invalidation.

It is a complex problem that usually consists of two smaller ones: individual cache elements invalidation (you need to keep an eye on your data changes and invalidate cached pages when related data changes) and full cache purges (sometimes your site layout or page templates change and you need to purge all the cached pages to make sure users will get new visual elements of layout changes). In this post I’d like to look at a few techniques we use at Scribd to solve cache invalidation problems.

Read the rest of this entry »

DbCharmer – Rails Can Scale!

Posted by Oleksiy Kovyrin under Databases, Development, My Projects

Back in November 2009 I was working on a project to port Scribd.com code base to Rails 2.2 and noticed that some old plugins we were using in 2.1 were abandoned by their authors. Some of them were just removed from the code base, but one needed a replacement – that was an old plugin called acts_as_readonlyable that helped us to distribute our queries among a cluster of MySQL slaves. There were some alternatives but we didn’t like them for one or another reasons so we’ve decided to go with creating our own ActiveRecord plugin, that would help us scale our databases out. That’s the story behind the first release of DbCharmer.

Today, six months after the first release of the gem and we’ve moved it to gemcutter (which is now the official gems hosting) and we’re already at version 1.6.11. The gem was downloaded more than 2000 times. There are (at least) 10+ large users that rely on this gem to scale their products out. And (this is the most exciting) we’ve added tons of new features to the product.

Here are the main features added since the first release:

  • Much better multi-database migrations support including default migrations connection changing.
  • We’ve added ActiveRecord associations preload support that makes it possible to move eager loading queries to the same connection where your finder queries go to.
  • We’ve improved ActiveRecord’s query logging feature and now you can see what connections your queries executed on (and yes, all those improvements are colorized :-) ).
  • We’ve added an ability to temporary remap any ActiveRecord connections to any other connections for a block of code (really useful when you need to make sure all your queries would go to some non-default slave and you do not want to mess with all your models).
  • The most interesting change: we’ve implemented some basic sharding functionality in ActiveRecord which currently is being used in production in our application.

As you can see now DbCharmer helps you to do three major scalability tasks in your Rails projects:

  1. Master-Slave clusters to scale out your Rails models reads.
  2. Vertical sharding by moving some of your models to a separate (maybe even dedicated) servers and still keep using AR associations
  3. Horizontal sharding by slicing your models data to pieces and placing those pieces into different databases and/or servers.

So, If you didn’t check DbCharmer out yet and you’re working on some large rails project that is (or going to be) facing scalability problems, go read the docs, download/install the gem and prove them that Rails CAN scale!

DB Charmer – ActiveRecord Connection Magic Plugin

Posted by Oleksiy Kovyrin under Databases, Development, My Projects

Today I’m proud to announce the first public release of our ActiveRecord database connection magic plugin: DbCharmer.


DB Charmer – ActiveRecord Connection Magic Plugin

DbCharmer is a simple yet powerful plugin for ActiveRecord that does a few things:

  1. Allows you to easily manage AR models’ connections (switch_connection_to method)
  2. Allows you to switch AR models’ default connections to a separate servers/databases
  3. Allows you to easily choose where your query should go (on_* methods family)
  4. Allows you to automatically send read queries to your slaves while masters would handle all the updates.
  5. Adds multiple databases migrations to ActiveRecord

Read the rest of this entry »

Advanced Squid Caching in Scribd: Logged In Users and Complex URLs Handling

Posted by Oleksiy Kovyrin under Admin-tips, Development, My Projects

It’s been a while since I’ve posted my first post about the way we do document pages caching in Scribd and this approach has definitely proven to be really effective since then. In the second post of this series I’d like to explain how we handle our complex document URLs and logged in users in the caching architecture.

First of all, let’s take a look at a typical Scribd’s document URL: http://www.scribd.com/doc/1/Improved-Statistical-Test.

As we can see, it consists of a document-specific part (/doc/1) and a non-unique human-readable slug part (/Improved-Statistical-Test). When a user comes to the site with a wrong slug in the document URL, we need to make sure we send the user to the correct URL with a permanent HTTP 301 redirect. So, obviously we can’t simply send our requests to the squid because it’d cause few problems:

  • When we change document’s title, we’d create a new cached item and would not be able to redirect users from the old URL to the new one
  • When we change a title, we’d pollute cache with additional document page copies.

One more problem that makes the situation even worse – we have 3 different kinds of users on the site:

  1. Logged in users – active web site users that are logged in and should see their name at the top of the page, should see all kinds of customized parts of the page, etc (especially when a page is their own document).
  2. Anonymous users – all users that are not logged in and visit the site with a flash-enabled browser
  3. Bots – all kinds of crawlers that can’t read flash content and need to see a plain text document version

All three kinds of users should see their own document page versions whether the page is cached or not.

Read the rest of this entry »

Loops plugin for rails and merb released

Posted by Oleksiy Kovyrin under Development, Links, My Projects

loops is a small and lightweight framework for Ruby on Rails and Merb created to support simple background loops in your application which are usually used to do some background data processing on your servers (queue workers, batch tasks processors, etc).

Originally loops plugin was created to make our (Scribd.com) own loops code more organized. We used to have tens of different modules with methods that were called with script/runner and then used with nohup and other not so convenient backgrounding techniques. When you have such a number of loops/workers to run in background it becomes a nightmare to manage them on a regular basis (restarts, code upgrades, status/health checking, etc).

After a short time of writing our loops in more organized ways we were able to generalize most of the loops code so now our loops look like a classes with a single mandatory public method called run. Everything else (spawning many workers, managing them, logging, backgrounding, pid-files management, etc) is handled by the plugin itself.

The major idea behind this small project was to create a deadly simple and yet robust framework to be able to run some tasks in background and do not think about spawning many workers, restarting them when they die, etc. So, if you need to be able to run either one or many copies of your worker or you do not want to think about re-spawning dead workers and do not want to spend megabytes of RAM on separate copies of Ruby interpreter (when you run each copy of your loop as a separate process controlled by monit/god/etc), then I’d recommend you to try this framework — you’ll like it.

For more information, visit the project site and, of course, read the sources :-)