Research Scientist, Fiksu Inc.
About Me
I am particularly interested in the application of Big Data to solve difficult real-world problems. My current research focuses on the design of large-scale storage systems, including distributed file systems and databases, to support big data applications. Previously I have explored low-level operating system support for contributory applications, these are applications such as Folding@Home and the Great Internet Mersenne Prime Search, that allow a user to contribute computing resources to projects from which they do not directly benefit.
Academic Background
I'm a sixth year PhD student in the Computer Science department at Carnegie Mellon University, advised by Greg Ganger.
I hold a Masters degree from the University of Massachusetts at Amherst, where I was co-advised by Mark Corner and Emery Berger. My Bachelor of Science degree is also from UMass, a dual degree in Computer Science and Mathematics.
Awards and Fellowships
- APC Fellowship and Award in Data Center Efficiency Research, 2009
- Best Paper Award, TFS: A Transparent File System for Contributory Storage, FAST, 2007
- Best Student Project, VMware, Cambridge, MA, Presented at VMWorld, 2007
- Best Undergraduate Research in Computer Science, University of Massachusetts, 2005
Research
My current research is focused on exploiting staleness tolerance in system design. To that end, I am working closely with a number of Machine Learning researchers to demonstrate how the error-tolerance of ML algorithms -- staleness tolerance in particular -- can be used in large scale parallel machine learning.
Additionally, I have worked with HP Labs on LazyBase, a distributed database designed for high-throughput updates and inserts, while allowing low latency analytical queries. It does this by exploiting a tradeoff between query result freshness and query latency.
Along with Alexey Tumanov, I am working on AlSched, a system that allows cluster frameworks to specify resource requests as composable algebraic utility functions. The scheduler then optimizes over these functions to create a globally optimal resource assignment.
Papers
@inproceedings{lazytables-hotos2013, author = {James Cipar and Qirong Ho and Jin Kyu Kim and Seunghak Lee and Gregory R. Ganger and Garth Gibson and Kimberly Keeton and Eric Xing}, title = {Solving the straggler problem with bounded staleness}, booktitle = {Proc. of the 14th Usenix Workshop on Hot Topics in Operating Systems}, series = {HotOS '13}, year = {2013}, location = {Santa Ana Pueblo, NM}, Publisher = {Usenix}, }
in heterogeneous clouds
@inproceedings{alsched-socc12, author = {Alexey Tumanov and James Cipar and Michael A. Kozuch and Gregory R. Ganger}, title = {{a}lsched: algebraic scheduling of mixed workloads in heterogeneous clouds}, booktitle = {Proc. of the 3nd ACM Symposium on Cloud Computing}, series = {SOCC '12}, year = {2012}, location = {San Jose, CA}, Publisher = {ACM}, }
@inproceedings{Cipar:2012:LTF:2168836.2168854, author = {Cipar, James and Ganger, Greg and Keeton, Kimberly and Morrey,III, Charles B. and Soules, Craig A.N. and Veitch, Alistair}, title = {{L}azy{B}ase: trading freshness for performance in a scalable database}, booktitle = {Proceedings of the 7th ACM european conference on Computer Systems}, series = {EuroSys '12}, year = {2012}, isbn = {978-1-4503-1223-3}, location = {Bern, Switzerland}, pages = {169--182}, numpages = {14}, url = {http://doi.acm.org/10.1145/2168836.2168854}, doi = {10.1145/2168836.2168854}, acmid = {2168854}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {consistency, freshness, pipeline}, }
@inproceedings{Amur:2010:RFP:1807128.1807164, author = {Amur, Hrishikesh and Cipar, James and Gupta, Varun and Ganger, Gregory R. and Kozuch, Michael A. and Schwan, Karsten}, title = {Robust and flexible power-proportional storage}, booktitle = {Proceedings of the 1st ACM symposium on Cloud computing}, series = {SoCC '10}, year = {2010}, isbn = {978-1-4503-0036-0}, location = {Indianapolis, Indiana, USA}, pages = {217--228}, numpages = {12}, url = {http://doi.acm.org/10.1145/1807128.1807164}, doi = {10.1145/1807128.1807164}, acmid = {1807164}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {cluster computing, data-layout, power-proportionality}, }
@inproceedings{Kozuch:2009:TLC:1555271.1555282, author = {Kozuch, Michael A. and Ryan, Michael P. and Gass, Richard and Schlosser, Steven W. and O'Hallaron, David and Cipar, James and Krevat, Elie and L\'{o}pez, Julio and Stroucken, Michael and Ganger, Gregory R.}, title = {{T}ashi: location-aware cluster management}, booktitle = {Proceedings of the 1st workshop on Automated control for datacenters and clouds}, series = {ACDC '09}, year = {2009}, isbn = {978-1-60558-585-7}, location = {Barcelona, Spain}, pages = {43--48}, numpages = {6}, url = {http://doi.acm.org/10.1145/1555271.1555282}, doi = {10.1145/1555271.1555282}, acmid = {1555282}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {cloud computing, cluster management, virtualization}, }
Best paper award
@inproceedings{Cipar:2007:TTF:1267903.1267931, author = {Cipar, James and Corner, Mark D. and Berger, Emery D.}, title = {{TFS}: a transparent file system for contributory storage}, booktitle = {Proceedings of the 5th USENIX conference on File and Storage Technologies}, series = {FAST '07}, year = {2007}, location = {San Jose, CA}, pages = {28--28}, numpages = {1}, url = {http://dl.acm.org/citation.cfm?id=1267903.1267931}, acmid = {1267931}, publisher = {USENIX Association}, address = {Berkeley, CA, USA}, }
@inproceedings{Cipar:2006:TCM:1267359.1267370, author = {Cipar, James and Corner, Mark D. and Berger, Emery D.}, title = {Transparent contribution of memory}, booktitle = {Proceedings of the annual conference on USENIX '06 Annual Technical Conference}, series = {ATEC '06}, year = {2006}, location = {Boston, MA}, pages = {11--11}, numpages = {1}, url = {http://dl.acm.org/citation.cfm?id=1267359.1267370}, acmid = {1267370}, publisher = {USENIX Association}, address = {Berkeley, CA, USA}, }