Skip to content

Commit

Permalink
Implement function that obfuscates a ydoc and scrambles its content
Browse files Browse the repository at this point in the history
  • Loading branch information
dmonad committed Apr 22, 2023
1 parent 5a8519d commit 39167e6
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 18 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,30 @@ currentState1 = Y.mergeUpdates([currentState1, diff2])
currentState1 = Y.mergeUpdates([currentState1, diff1])
```

#### Obfuscating Updates

If one of your users runs into a weird bug (e.g. the rich-text editor throws
error messages), then you don't have to request the full document from your
user. Instead, they can obfuscate the document (i.e. replace the content with
meaningless generated content) before sending it to you. Note that someone might
still deduce the type of content by looking at the general structure of the
document. But this is much better than requesting the original document.

Obfuscated updates contain all the CRDT-related data that is required for
merging. So it is safe to merge obfuscated updates.

```javascript
const ydoc = new Y.Doc()
// perform some changes..
ydoc.getText().insert(0, 'hello world')
const update = Y.encodeStateAsUpdate(ydoc)
// the below update contains scrambled data
const obfuscatedUpdate = Y.obfuscateUpdate(update)
const ydoc2 = new Y.Doc()
Y.applyUpdate(ydoc2, obfuscatedUpdate)
ydoc2.getText().toString() // => "00000000000"
```

#### Using V2 update format

Yjs implements two update formats. By default you are using the V1 update format.
Expand Down
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
},
"homepage": "https://docs.yjs.dev",
"dependencies": {
"lib0": "^0.2.72"
"lib0": "^0.2.74"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^24.0.1",
Expand Down
2 changes: 2 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ export {
diffUpdateV2,
convertUpdateFormatV1ToV2,
convertUpdateFormatV2ToV1,
obfuscateUpdate,
obfuscateUpdateV2,
UpdateEncoderV1
} from './internals.js'

Expand Down
166 changes: 154 additions & 12 deletions src/utils/updates.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,40 @@
import * as binary from 'lib0/binary'
import * as decoding from 'lib0/decoding'
import * as encoding from 'lib0/encoding'
import * as error from 'lib0/error'
import * as f from 'lib0/function'
import * as logging from 'lib0/logging'
import * as map from 'lib0/map'
import * as math from 'lib0/math'
import * as string from 'lib0/string'

import {
ContentAny,
ContentBinary,
ContentDeleted,
ContentDoc,
ContentEmbed,
ContentFormat,
ContentJSON,
ContentString,
ContentType,
createID,
readItemContent,
readDeleteSet,
writeDeleteSet,
Skip,
mergeDeleteSets,
decodeStateVector,
DSEncoderV1,
DSEncoderV2,
decodeStateVector,
Item, GC, UpdateDecoderV1, UpdateDecoderV2, UpdateEncoderV1, UpdateEncoderV2 // eslint-disable-line
GC,
Item,
mergeDeleteSets,
readDeleteSet,
readItemContent,
Skip,
UpdateDecoderV1,
UpdateDecoderV2,
UpdateEncoderV1,
UpdateEncoderV2,
writeDeleteSet,
YXmlElement,
YXmlHook
} from '../internals.js'

/**
Expand Down Expand Up @@ -552,30 +573,151 @@ const finishLazyStructWriting = (lazyWriter) => {

/**
* @param {Uint8Array} update
* @param {function(Item|GC|Skip):Item|GC|Skip} blockTransformer
* @param {typeof UpdateDecoderV2 | typeof UpdateDecoderV1} YDecoder
* @param {typeof UpdateEncoderV2 | typeof UpdateEncoderV1 } YEncoder
*/
export const convertUpdateFormat = (update, YDecoder, YEncoder) => {
export const convertUpdateFormat = (update, blockTransformer, YDecoder, YEncoder) => {
const updateDecoder = new YDecoder(decoding.createDecoder(update))
const lazyDecoder = new LazyStructReader(updateDecoder, false)
const updateEncoder = new YEncoder()
const lazyWriter = new LazyStructWriter(updateEncoder)

for (let curr = lazyDecoder.curr; curr !== null; curr = lazyDecoder.next()) {
writeStructToLazyStructWriter(lazyWriter, curr, 0)
writeStructToLazyStructWriter(lazyWriter, blockTransformer(curr), 0)
}
finishLazyStructWriting(lazyWriter)
const ds = readDeleteSet(updateDecoder)
writeDeleteSet(updateEncoder, ds)
return updateEncoder.toUint8Array()
}

/**
* @typedef {Object} ObfuscatorOptions
* @property {boolean} [ObfuscatorOptions.formatting=true]
* @property {boolean} [ObfuscatorOptions.subdocs=true]
* @property {boolean} [ObfuscatorOptions.yxml=true] Whether to obfuscate nodeName / hookName
*/

/**
* @param {ObfuscatorOptions} obfuscator
*/
const createObfuscator = ({ formatting = true, subdocs = true, yxml = true } = {}) => {
let i = 0
const mapKeyCache = map.create()
const nodeNameCache = map.create()
const formattingKeyCache = map.create()
const formattingValueCache = map.create()
formattingValueCache.set(null, null) // end of a formatting range should always be the end of a formatting range
/**
* @param {Item|GC|Skip} block
* @return {Item|GC|Skip}
*/
return block => {
switch (block.constructor) {
case GC:
case Skip:
return block
case Item: {
const item = /** @type {Item} */ (block)
const content = item.content
switch (content.constructor) {
case ContentDeleted:
break
case ContentType: {
if (yxml) {
const type = /** @type {ContentType} */ (content).type
if (type instanceof YXmlElement) {
type.nodeName = map.setIfUndefined(nodeNameCache, type.nodeName, () => 'node-' + i)
}
if (type instanceof YXmlHook) {
type.hookName = map.setIfUndefined(nodeNameCache, type.hookName, () => 'hook-' + i)
}
}
break
}
case ContentAny: {
const c = /** @type {ContentAny} */ (content)
c.arr = c.arr.map(() => i)
break
}
case ContentBinary: {
const c = /** @type {ContentBinary} */ (content)
c.content = new Uint8Array([i])
break
}
case ContentDoc: {
const c = /** @type {ContentDoc} */ (content)
if (subdocs) {
c.opts = {}
c.doc.guid = i + ''
}
break
}
case ContentEmbed: {
const c = /** @type {ContentEmbed} */ (content)
c.embed = {}
break
}
case ContentFormat: {
const c = /** @type {ContentFormat} */ (content)
if (formatting) {
c.key = map.setIfUndefined(formattingKeyCache, c.key, () => i + '')
c.value = map.setIfUndefined(formattingValueCache, c.value, () => ({ i }))
}
break
}
case ContentJSON: {
const c = /** @type {ContentJSON} */ (content)
c.arr = c.arr.map(() => i)
break
}
case ContentString: {
const c = /** @type {ContentString} */ (content)
c.str = string.repeat((i % 10) + '', c.str.length)
break
}
default:
// unknown content type
error.unexpectedCase()
}
if (item.parentSub) {
item.parentSub = map.setIfUndefined(mapKeyCache, item.parentSub, () => i + '')
}
i++
return block
}
default:
// unknown block-type
error.unexpectedCase()
}
}
}

/**
* This function obfuscates the content of a Yjs update. This is useful to share
* buggy Yjs documents while significantly limiting the possibility that a
* developer can on the user. Note that it might still be possible to deduce
* some information by analyzing the "structure" of the document or by analyzing
* the typing behavior using the CRDT-related metadata that is still kept fully
* intact.
*
* @param {Uint8Array} update
* @param {ObfuscatorOptions} [opts]
*/
export const obfuscateUpdate = (update, opts) => convertUpdateFormat(update, createObfuscator(opts), UpdateDecoderV1, UpdateEncoderV1)

/**
* @param {Uint8Array} update
* @param {ObfuscatorOptions} [opts]
*/
export const obfuscateUpdateV2 = (update, opts) => convertUpdateFormat(update, createObfuscator(opts), UpdateDecoderV2, UpdateEncoderV2)

/**
* @param {Uint8Array} update
*/
export const convertUpdateFormatV1ToV2 = update => convertUpdateFormat(update, UpdateDecoderV1, UpdateEncoderV2)
export const convertUpdateFormatV1ToV2 = update => convertUpdateFormat(update, f.id, UpdateDecoderV1, UpdateEncoderV2)

/**
* @param {Uint8Array} update
*/
export const convertUpdateFormatV2ToV1 = update => convertUpdateFormat(update, UpdateDecoderV2, UpdateEncoderV1)
export const convertUpdateFormatV2ToV1 = update => convertUpdateFormat(update, f.id, UpdateDecoderV2, UpdateEncoderV1)
53 changes: 52 additions & 1 deletion tests/updates.tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import * as Y from '../src/index.js'
import { readClientsStructRefs, readDeleteSet, UpdateDecoderV2, UpdateEncoderV2, writeDeleteSet } from '../src/internals.js'
import * as encoding from 'lib0/encoding'
import * as decoding from 'lib0/decoding'
import * as object from 'lib0/object'

/**
* @typedef {Object} Enc
Expand Down Expand Up @@ -138,7 +139,6 @@ export const testKeyEncoding = tc => {
*/
const checkUpdateCases = (ydoc, updates, enc, hasDeletes) => {
const cases = []

// Case 1: Simple case, simply merge everything
cases.push(enc.mergeUpdates(updates))

Expand Down Expand Up @@ -304,3 +304,54 @@ export const testMergePendingUpdates = tc => {
const yText5 = yDoc5.getText('textBlock')
t.compareStrings(yText5.toString(), 'nenor')
}

/**
* @param {t.TestCase} tc
*/
export const testObfuscateUpdates = tc => {
const ydoc = new Y.Doc()
const ytext = ydoc.getText('text')
const ymap = ydoc.getMap('map')
const yarray = ydoc.getArray('array')
// test ytext
ytext.applyDelta([{ insert: 'text', attributes: { bold: true } }, { insert: { href: 'supersecreturl' } }])
// test ymap
ymap.set('key', 'secret1')
ymap.set('key', 'secret2')
// test yarray with subtype & subdoc
const subtype = new Y.XmlElement('secretnodename')
const subdoc = new Y.Doc({ guid: 'secret' })
subtype.setAttribute('attr', 'val')
yarray.insert(0, ['teststring', 42, subtype, subdoc])
// obfuscate the content and put it into a new document
const obfuscatedUpdate = Y.obfuscateUpdate(Y.encodeStateAsUpdate(ydoc))
const odoc = new Y.Doc()
Y.applyUpdate(odoc, obfuscatedUpdate)
const otext = odoc.getText('text')
const omap = odoc.getMap('map')
const oarray = odoc.getArray('array')
// test ytext
const delta = otext.toDelta()
t.assert(delta.length === 2)
t.assert(delta[0].insert !== 'text' && delta[0].insert.length === 4)
t.assert(object.length(delta[0].attributes) === 1)
t.assert(!object.hasProperty(delta[0].attributes, 'bold'))
t.assert(object.length(delta[1]) === 1)
t.assert(object.hasProperty(delta[1], 'insert'))
// test ymap
t.assert(omap.size === 1)
t.assert(!omap.has('key'))
// test yarray with subtype & subdoc
const result = oarray.toArray()
t.assert(result.length === 4)
t.assert(result[0] !== 'teststring')
t.assert(result[1] !== 42)
const osubtype = /** @type {Y.XmlElement} */ (result[2])
const osubdoc = result[3]
// test subtype
t.assert(osubtype.nodeName !== subtype.nodeName)
t.assert(object.length(osubtype.getAttributes()) === 1)
t.assert(osubtype.getAttribute('attr') === undefined)
// test subdoc
t.assert(osubdoc.guid !== subdoc.guid)
}

0 comments on commit 39167e6

Please sign in to comment.