initial code repo
[stor4nfv.git] / src / ceph / qa / tasks / s3a_hadoop.py
diff --git a/src/ceph/qa/tasks/s3a_hadoop.py b/src/ceph/qa/tasks/s3a_hadoop.py
new file mode 100644 (file)
index 0000000..c01fe1d
--- /dev/null
@@ -0,0 +1,343 @@
+import contextlib
+import logging
+import time
+from teuthology import misc
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+     Run Hadoop S3A tests using Ceph
+     usage:
+      -tasks:
+         ceph-ansible:
+         s3a-hadoop:
+           maven-version: '3.3.9' (default)
+           hadoop-version: '2.7.3'
+           bucket-name: 's3atest' (default)
+           access-key: 'anykey' (uses a default value)
+           secret-key: 'secretkey' ( uses a default value)
+    """
+    if config is None:
+        config = {}
+
+    assert isinstance(config, dict), \
+        "task only supports a dictionary for configuration"
+
+    overrides = ctx.config.get('overrides', {})
+    misc.deep_merge(config, overrides.get('s3a-hadoop', {}))
+    testdir = misc.get_testdir(ctx)
+    rgws = ctx.cluster.only(misc.is_type('rgw'))
+    # use the first rgw node to test s3a
+    rgw_node = rgws.remotes.keys()[0]
+    # get versions
+    maven_major = config.get('maven-major', 'maven-3')
+    maven_version = config.get('maven-version', '3.3.9')
+    hadoop_ver = config.get('hadoop-version', '2.7.3')
+    bucket_name = config.get('bucket-name', 's3atest')
+    access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F')
+    secret_key = config.get(
+        'secret-key',
+        'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb')
+
+    # set versions for cloning the repo
+    apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format(
+        maven_version=maven_version)
+    maven_link = 'http://mirror.jax.hugeserver.com/apache/maven/' + \
+        '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven
+    hadoop_git = 'https://github.com/apache/hadoop'
+    hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver)
+    install_prereq(rgw_node)
+    rgw_node.run(
+        args=[
+            'cd',
+            testdir,
+            run.Raw('&&'),
+            'wget',
+            maven_link,
+            run.Raw('&&'),
+            'tar',
+            '-xvf',
+            apache_maven,
+            run.Raw('&&'),
+            'git',
+            'clone',
+            run.Raw(hadoop_git),
+            run.Raw('&&'),
+            'cd',
+            'hadoop',
+            run.Raw('&&'),
+            'git',
+            'checkout',
+            '-b',
+            run.Raw(hadoop_rel)
+        ]
+    )
+    dnsmasq_name = 's3.ceph.com'
+    configure_s3a(rgw_node, dnsmasq_name, access_key, secret_key, bucket_name, testdir)
+    setup_dnsmasq(rgw_node, dnsmasq_name)
+    fix_rgw_config(rgw_node, dnsmasq_name)
+    setup_user_bucket(rgw_node, dnsmasq_name, access_key, secret_key, bucket_name, testdir)
+    if hadoop_ver.startswith('2.8'):
+        # test all ITtests but skip AWS test using public bucket landsat-pds
+        # which is not available from within this test
+        test_options = '-Dit.test=ITestS3A* -Dit.test=\!ITestS3AAWSCredentialsProvider* -Dparallel-tests -Dscale -Dfs.s3a.scale.test.huge.filesize=128M verify'
+    else:
+        test_options = 'test -Dtest=S3a*,TestS3A*'
+    try:
+        run_s3atest(rgw_node, maven_version, testdir, test_options)
+        yield
+    finally:
+        log.info("Done s3a testing, Cleaning up")
+        for fil in ['apache*', 'hadoop*', 'venv*', 'create*']:
+            rgw_node.run(args=['rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))])
+        # restart and let NM restore original config
+        rgw_node.run(args=['sudo', 'systemctl', 'stop', 'dnsmasq'])
+        rgw_node.run(args=['sudo', 'systemctl', 'restart', 'network.service'], check_status=False)
+        rgw_node.run(args=['sudo', 'systemctl', 'status', 'network.service'], check_status=False)
+
+
+def install_prereq(client):
+    """
+    Install pre requisites for RHEL and CentOS
+    TBD: Ubuntu
+    """
+    if client.os.name == 'rhel' or client.os.name == 'centos':
+        client.run(
+               args=[
+                    'sudo',
+                    'yum',
+                    'install',
+                    '-y',
+                    'protobuf-c.x86_64',
+                    'java',
+                    'java-1.8.0-openjdk-devel',
+                    'dnsmasq'
+                    ]
+                )
+
+
+def setup_dnsmasq(client, name):
+    """
+    Setup simple dnsmasq name eg: s3.ceph.com
+    Local RGW host can then be used with whatever name has been setup with.
+    """
+    resolv_conf = "nameserver 127.0.0.1\n"
+    dnsmasq_template = """address=/{name}/{ip_address}
+server=8.8.8.8
+server=8.8.4.4
+""".format(name=name, ip_address=client.ip_address)
+    dnsmasq_config_path = '/etc/dnsmasq.d/ceph'
+    # point resolv.conf to local dnsmasq
+    misc.sudo_write_file(
+        remote=client,
+        path='/etc/resolv.conf',
+        data=resolv_conf,
+    )
+    misc.sudo_write_file(
+        remote=client,
+        path=dnsmasq_config_path,
+        data=dnsmasq_template,
+    )
+    client.run(args=['cat', dnsmasq_config_path])
+    # restart dnsmasq
+    client.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
+    client.run(args=['sudo', 'systemctl', 'status', 'dnsmasq'])
+    time.sleep(5)
+    # verify dns name is set
+    client.run(args=['ping', '-c', '4', name])
+
+
+def fix_rgw_config(client, name):
+    """
+    Fix RGW config in ceph.conf, we need rgw dns name entry
+    and also modify the port to use :80 for s3a tests to work
+    """
+    rgw_dns_name = 'rgw dns name = {name}'.format(name=name)
+    ceph_conf_path = '/etc/ceph/ceph.conf'
+    # append rgw_dns_name
+    client.run(
+        args=[
+            'sudo',
+            'sed',
+            run.Raw('-i'),
+            run.Raw("'/client.rgw*/a {rgw_name}'".format(rgw_name=rgw_dns_name)),
+            ceph_conf_path
+
+        ]
+    )
+    # listen on port 80
+    client.run(
+        args=[
+            'sudo',
+            'sed',
+            run.Raw('-i'),
+            run.Raw('s/:8080/:80/'),
+            ceph_conf_path
+        ]
+    )
+    client.run(args=['cat', ceph_conf_path])
+    client.run(args=['sudo', 'systemctl', 'restart', 'ceph-radosgw.target'])
+    client.run(args=['sudo', 'systemctl', 'status', 'ceph-radosgw.target'])
+
+
+def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir):
+    """
+    Create user with access_key and secret_key that will be
+    used for the s3a testdir
+    """
+    client.run(
+        args=[
+            'sudo',
+            'radosgw-admin',
+            'user',
+            'create',
+            run.Raw('--uid'),
+            's3a',
+            run.Raw('--display-name=s3a cephtests'),
+            run.Raw('--access-key={access_key}'.format(access_key=access_key)),
+            run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)),
+            run.Raw('--email=s3a@ceph.com'),
+        ]
+    )
+    client.run(
+        args=[
+            'virtualenv',
+            '{testdir}/venv'.format(testdir=testdir),
+            run.Raw('&&'),
+            run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)),
+            'install',
+            'boto'
+        ]
+    )
+    create_bucket = """
+#!/usr/bin/env python
+import boto
+import boto.s3.connection
+access_key = '{access_key}'
+secret_key = '{secret_key}'
+
+conn = boto.connect_s3(
+        aws_access_key_id = access_key,
+        aws_secret_access_key = secret_key,
+        host = '{dns_name}',
+        is_secure=False,
+        calling_format = boto.s3.connection.OrdinaryCallingFormat(),
+        )
+bucket = conn.create_bucket('{bucket_name}')
+for bucket in conn.get_all_buckets():
+        print bucket.name + "\t" + bucket.creation_date
+""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name)
+    py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir)
+    misc.sudo_write_file(
+        remote=client,
+        path=py_bucket_file,
+        data=create_bucket,
+        perms='0744',
+        )
+    client.run(
+        args=[
+            'cat',
+            '{testdir}/create_bucket.py'.format(testdir=testdir),
+        ]
+    )
+    client.run(
+        args=[
+            '{testdir}/venv/bin/python'.format(testdir=testdir),
+            '{testdir}/create_bucket.py'.format(testdir=testdir),
+        ]
+    )
+
+
+def run_s3atest(client, maven_version, testdir, test_options):
+    """
+    Finally run the s3a test
+    """
+    aws_testdir = '{testdir}/hadoop/hadoop-tools/hadoop-aws/'.format(testdir=testdir)
+    run_test = '{testdir}/apache-maven-{maven_version}/bin/mvn'.format(testdir=testdir, maven_version=maven_version)
+    client.run(
+        args=[
+            'cd',
+            run.Raw(aws_testdir),
+            run.Raw('&&'),
+            run.Raw(run_test),
+            run.Raw(test_options)
+        ]
+    )
+
+
+def configure_s3a(client, dns_name, access_key, secret_key, bucket_name, testdir):
+    """
+    Use the template to configure s3a test, Fill in access_key, secret_key
+    and other details required for test.
+    """
+    config_template = """<configuration>
+<property>
+<name>fs.s3a.endpoint</name>
+<value>{name}</value>
+</property>
+
+<property>
+<name>fs.s3a.connection.ssl.enabled</name>
+<value>false</value>
+</property>
+
+<property>
+<name>test.fs.s3n.name</name>
+<value>s3n://{bucket_name}/</value>
+</property>
+
+<property>
+<name>test.fs.s3a.name</name>
+<value>s3a://{bucket_name}/</value>
+</property>
+
+<property>
+<name>test.fs.s3.name</name>
+<value>s3://{bucket_name}/</value>
+</property>
+
+<property>
+<name>fs.s3.awsAccessKeyId</name>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3.awsSecretAccessKey</name>
+<value>{secret_key}</value>
+</property>
+
+<property>
+<name>fs.s3n.awsAccessKeyId</name>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3n.awsSecretAccessKey</name>
+<value>{secret_key}</value>
+</property>
+
+<property>
+<name>fs.s3a.access.key</name>
+<description>AWS access key ID. Omit for Role-based authentication.</description>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3a.secret.key</name>
+<description>AWS secret key. Omit for Role-based authentication.</description>
+<value>{secret_key}</value>
+</property>
+</configuration>
+""".format(name=dns_name, bucket_name=bucket_name, access_key=access_key, secret_key=secret_key)
+    config_path = testdir + '/hadoop/hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml'
+    misc.write_file(
+        remote=client,
+        path=config_path,
+        data=config_template,
+    )
+    # output for debug
+    client.run(args=['cat', config_path])